# **Question 1: What is the overall trend of digital learning across different locales during January to December, 2020?**

In [None]:
# Importing libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Increment in online engagement per locality over time
def engagementAnalysis(districts, startMonth, stopMonth) :
    # Start the data processing with district data
    [rrDisctrict, ccDisctrict] = np.shape(districts)
    count = 0
    sumEngagementJan = 0
    sumEngagementDec = 0
    for i in range(rrDisctrict) : # rrDisctrict       
        # Extract data
        tempVar = districts.loc[i, 'district_id'] # Scanning through all the districts for a given locale
        engagementFile = '../input/learnplatform-covid19-impact-on-digital-learning/engagement_data/'+ str(tempVar) +'.csv'
        # Read the specific Engagement data file
        engagement = pd.read_csv(engagementFile)
    
        # Pre-process the engagement file
        engagement['year'] = pd.DatetimeIndex(engagement['time']).year
        engagement['month'] = pd.DatetimeIndex(engagement['time']).month
    
        # Get data from Jan, 2020 to Dec 2020 for a given district
        engagementJan = engagement.loc[engagement['month'] == int(startMonth)]
        engagementDec = engagement.loc[engagement['month'] == int(stopMonth)]
        engagementJan = engagementJan.dropna()
        engagementDec = engagementDec.dropna()
        engagementJan = engagementJan.drop(['time'], axis = 1)
        engagementDec = engagementDec.drop(['time'], axis = 1)
        
        # Group by engagement index
        engJan = engagementJan['engagement_index'].mean()
        engDec = engagementDec['engagement_index'].mean()
        if np.isnan(engJan) == False and np.isnan(engDec) == False :
            #print(engJan, engDec)
            count = count + 1
            sumEngagementJan = sumEngagementJan + engJan
            sumEngagementDec = sumEngagementDec + engDec
    
    # Calculating the average
    sumEngagementJan = sumEngagementJan / float(count)
    sumEngagementDec = sumEngagementDec / float(count)
    
    return(((sumEngagementDec - sumEngagementJan) / float(sumEngagementJan))*100) # Increment per locality
    
# Reading data and pre-process them
districts = pd.read_csv('../input/learnplatform-covid19-impact-on-digital-learning/districts_info.csv')
products = pd.read_csv('../input/learnplatform-covid19-impact-on-digital-learning/products_info.csv')
uniqueLocale = districts['locale'].unique()

# Divide data as per locale
suburbArea = districts.loc[districts['locale'] == 'Suburb'].reset_index(drop=True)
ruralArea = districts.loc[districts['locale'] == 'Rural'].reset_index(drop=True)
cityArea = districts.loc[districts['locale'] == 'City'].reset_index(drop=True)
townArea = districts.loc[districts['locale'] == 'Town'].reset_index(drop=True)

monthRange = [3, 6, 8, 12] # March, June, August, December
sChangeOverMonth = []
rChangeOverMonth = []
cChangeOverMonth = []
tChangeOverMonth = []

# Computing
for i in range(len(monthRange)) :
    sChange = engagementAnalysis(suburbArea, 1, monthRange[i])
    sChangeOverMonth.append(sChange)
    rChange = engagementAnalysis(ruralArea, 1, monthRange[i])
    rChangeOverMonth.append(rChange)
    cChange = engagementAnalysis(cityArea, 1, monthRange[i])
    cChangeOverMonth.append(cChange)
    tChange = engagementAnalysis(townArea, 1, monthRange[i])
    tChangeOverMonth.append(tChange)

# PLotting
import seaborn as sns
data = {'Suburb' : sChangeOverMonth,
       'Rural' : rChangeOverMonth,
       'City' : cChangeOverMonth,
       'Town' : tChangeOverMonth,
       'Time' : ['March', 'June', 'August', 'December']}
df = pd.DataFrame(data)
titleSNS = "[%] change in engagement over time with respect to Jan, 2020"
sns.set_style("darkgrid")
sns.lineplot(x="Time", y="value", hue='variable', data = pd.melt(df, ['Time'])).set_title(titleSNS)