In [None]:
import numpy as np
import pandas as pd

import glob

import missingno as msno
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

pd.set_option('display.max_rows', 150)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

sns.set(rc={'figure.figsize':(16,8)})
sns.set(font_scale=1.3)
plt.style.use('fivethirtyeight')

import warnings
warnings.filterwarnings('ignore')

In [None]:
districts = pd.read_csv('../input/learnplatform-covid19-impact-on-digital-learning/districts_info.csv')
products = pd.read_csv('../input/learnplatform-covid19-impact-on-digital-learning/products_info.csv')

In [None]:
path = '../input/learnplatform-covid19-impact-on-digital-learning/engagement_data'

fl = glob.glob(path+'/*.csv')
li = []
for f in fl:
    df = pd.read_csv(f, index_col=None, header=0)
    district = f.split("/")[4].split(".")[0]
    df["district_id"] = district
    li.append(df)
    
engagement = pd.concat(li)
engagement = engagement.reset_index(drop=True)

# EAD

## District Information Data
The file includes information about the characteristics of school districts, including data from NCES and FCC.

In [None]:
districts.head()

In [None]:
districts.info()

In [None]:
plt.figure(figsize=(16,12))

sns.countplot(y='state', data=districts, order=districts.state.value_counts().index,
               edgecolor='black', linewidth=1.2)
plt.title('State Distribuition', {'font': 'Serif', 'Size': '30','weight':'bold', 'color':'black'})
plt.grid(False)
plt.show()

In [None]:
districts_locale = pd.DataFrame(districts.groupby(['locale']).locale.count())

fig, ax = plt.subplots(figsize=(10,6))
ax.barh([3], districts_locale.locale[2], height=0.7)
ax.barh([2], districts_locale.locale[1], height=0.7)
ax.barh([1], districts_locale.locale[0], height=0.7)
ax.barh([0], districts_locale.locale[3], height=0.7)

ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(True)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)

plt.text(-20,4, 'Percentage of People by Location',
         {'font': 'Serif', 'Size': '25','weight':'bold', 'color':'black'})
plt.text(-20,0, 'Suburb',{'font': 'Serif','weight':'bold','Size': '16',
                          'style':'normal', 'color':'#444444'})
plt.text(15,-0.1, '5.68%',{'font':'Serif','weight':'bold' ,'size':'16',
                           'color':'#444444'})
plt.text(-20,1, 'Rural',{'font': 'Serif','weight':'bold','Size': '16',
                         'style':'normal', 'color':'#444444'})
plt.text(35,1, '18.8%',{'font':'Serif','weight':'bold' ,'size':'16',
                        'color':'#444444'})
plt.text(-20,2, 'City',{'font': 'Serif','weight':'bold','Size': '16',
                        'style':'normal', 'color':'#444444'})
plt.text(40,2, '16.5%',{'font':'Serif','weight':'bold' ,'size':'16',
                        'color':'#444444'})
plt.text(-20,3, 'Town',{'font': 'Serif','weight':'bold','Size': '16',
                        'style':'normal', 'color':'#444444'})
plt.text(108,3, '59.1%',{'font':'Serif','weight':'bold' ,'size':'16',
                         'color':'#444444'})
plt.show()

### Products Information Data

The file includes information about the characteristics of the top 372 products with most users in 2020.

In [None]:
products.head()

In [None]:
products.info()

In [None]:
plt.figure(figsize=(16,12),dpi = 120)
sns.countplot(y='Primary Essential Function', data=products, 
              order=products['Primary Essential Function'].value_counts()[:10].index,
              edgecolor='black', linewidth=1.2)
plt.title('Top 10 Primary Essential Function',  {'font': 'Serif', 'Size': '30','weight':'bold', 'color':'black'})
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.show()

In [None]:
labels = list(products['Primary Essential Function'].value_counts().index)
sizes = products['Primary Essential Function'].value_counts().values

fig = go.Figure(data=[go.Pie(labels=labels, values=sizes)])
fig.show()


### Engagement 
The engagement data are aggregated at school district level, and each file in the folder engagement_data represents data from one school district.

In [None]:
engagement.head()

In [None]:
engagement.info()

In [None]:
plt.figure(figsize=(10,6),dpi = 120)
sns.countplot(y='district_id', data=engagement, 
              order=engagement['district_id'].value_counts()[:10].index,
              palette="Greens", edgecolor='black', linewidth=1.2)
plt.title('Top 10 Primary Essential Function',  {'font': 'Serif', 'Size': '18','weight':'bold', 'color':'black'})
plt.show()