In [None]:
from datetime import datetime
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None

In [None]:
inputpath = '/kaggle/input'
compdir = os.path.join(inputpath, 'learnplatform-covid19-impact-on-digital-learning')
engagedir = os.path.join(compdir, 'engagement_data')
engagecsv = sorted([f for f in os.listdir(engagedir) if f.endswith('csv')])
print(len(engagecsv))

Districts Info

In [None]:
distdf = pd.read_csv(os.path.join(compdir, 'districts_info.csv'))
sorted_district = distdf.groupby('state').count().sort_values('district_id', ascending=False)
sorted_district

Products Info

In [None]:
proddf = pd.read_csv(os.path.join(compdir, 'products_info.csv'))
proddf['LP ID'].nunique()

Engagement Info

In [None]:
engagelist = []
for jj in range(len(engagecsv)):
    distid = int(engagecsv[jj].split('.')[0])
    datadf = pd.read_csv(os.path.join(engagedir, engagecsv[jj]), parse_dates=True, infer_datetime_format=True)
    datadf['district_id'] = distid
    engagelist.append(datadf)
allengagedf = pd.concat(engagelist, ignore_index=True)
allstatedf = allengagedf.merge(distdf, on='district_id')
allstatedf['YearMonth'] = allstatedf['time'].apply(lambda x: '-'.join(t for t in x.split('-')[:-1]))
print(allengagedf.shape, allstatedf.shape)

### What is the picture of digital connectivity and engagement in 2020?

From the figure below, the mean engagement index in most states follow a bimodal trend with peaks in spring and fall and dips in the summer.

In [None]:
fig, ax = plt.subplots(sorted_district.shape[0] + 1, 1,figsize=(15,46), sharex=True)

for num, state_name in enumerate(sorted_district.index):
    v = allstatedf[allstatedf['state'] == state_name]
    mmagg = v.groupby('YearMonth').mean()
    ax[num+1].plot(mmagg.index, mmagg['engagement_index'])
    ax[num+1].set_title(state_name)
    ax[num+1].set_ylabel('Mean Index')
mmagg = allstatedf.groupby('YearMonth').mean()
ax[0].plot(mmagg.index, mmagg['engagement_index'])
ax[0].set_title('Overall')
ax[0].set_ylabel('Mean Index')

### What is the effect of the COVID-19 pandemic on online and distance learning, and how might this also evolve in the future?
The plot of KIDS Count data for the states with LearnPlatform indicate a large percentage change in education from traditional settings to online resources in April and November 2020. The data also indicates a dip in during summer holidays.

In [None]:
educationdf = pd.read_csv('/kaggle/input/education/edu_change.csv', parse_dates=True, infer_datetime_format=True)
state_edu = educationdf[educationdf['LocationType'] == 'State']
print(state_edu['COVIDImpactEduc'].unique())

In [None]:
educationdf = pd.read_csv('/kaggle/input/education/edu_change.csv', parse_dates=True, infer_datetime_format=True)
state_edu = educationdf[educationdf['LocationType'] == 'State']
online_edu = state_edu[state_edu['COVIDImpactEduc'] == 'Classes moved to distance learning: using online resources']

online_edu['Date1'] = online_edu['TimeFrame'].apply(lambda x: x.split(',')[-1].strip() + '-' + x.split('-')[0].strip().split()[0])
online_edu['Date1'] = online_edu['Date1'].apply(lambda x: datetime.strptime(x, '%Y-%b').strftime('%Y-%m'))
online_edu['Data'] = online_edu['Data'].astype(float)
select_states = online_edu[online_edu['Location'].isin(sorted_district.index)]
fig, ax = plt.subplots(1, 1, figsize=(20,10))
for k, v in select_states.groupby('Location'):
    seq_change = v.groupby('Date1').mean()
    ax.plot(seq_change.index, seq_change['Data'], label=k)
ax.legend()
ax.set_ylabel('Mean Percent')
ax.set_xlabel('Year-Month')
ax.set_title('Mean Percent Classes Moved to Online Resources by State')