In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt 
import seaborn as sns 
import matplotlib as mpl
from matplotlib.colors import LinearSegmentedColormap
from IPython.core.display import display, HTML

mpl.style.use(['ggplot'])

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
districts_info = pd.read_csv("../input/learnplatform-covid19-impact-on-digital-learning/districts_info.csv")
districts_info

In [None]:
districts_info.describe(include='all')

In [None]:
plt.figure(figsize=(16, 10))

sns.countplot(y="state",data=districts_info,order=districts_info.state.value_counts().index,palette="pastel",linewidth=3)
plt.title("State Distribution",size=18)

sns.despine()
plt.show()

In [None]:
fig, ax  = plt.subplots(figsize=(16, 8))
fig.suptitle('Locale Type Distribution', size = 20)

labels = list(districts_info.locale.value_counts().index)
sizes = districts_info.locale.value_counts().values
explode = (0, 0, 0, 0.1)

ax.pie(sizes, explode=explode,startangle=60, labels=labels,autopct='%1.0f%%', pctdistance=0.7, colors=["#FFFF33","#ff9100","#eaaa00","#6d6875"])
ax.add_artist(plt.Circle((0,0),0.4,fc='white'))
plt.show()

In [None]:
plt.figure(figsize=(18,10))

ax=sns.countplot(data=districts_info,x='state',palette='pastel', hue='locale')

plt.xticks(rotation=45)
plt.title("State and its Locality")
plt.legend(loc='upper right')
for p in ax.patches:
    ax.text (p.get_x() + p.get_width()  / 2,p.get_height()+ 0.25,p.get_height(), fontsize = 11)

In [None]:
products_info = pd.read_csv("../input/learnplatform-covid19-impact-on-digital-learning/products_info.csv")
products_info

In [None]:
plt.figure(figsize = (16,8))

sns.countplot(data=products_info, y="Provider/Company Name", order=products_info['Provider/Company Name'].value_counts().index[:10], palette='pastel')
plt.title('Top 10 Provider Platform with the Most Product in 2020', size=18)
sns.despine()
plt.show()

In [None]:
plt.figure(figsize = (16,8))

sns.countplot(data=products_info, y="Primary Essential Function", order=products_info['Primary Essential Function'].value_counts().index[:], palette='pastel')
plt.title('Distribution of Primary Essential Function of Platform in 2020', size=18)
sns.despine()
plt.show()

In [None]:
data = products_info.groupby('Sector(s)').count()[['LP ID']].reset_index().sort_values(by="LP ID", ascending=False)

plt.figure(figsize = (16,8))

sns.barplot(data=data, x="Sector(s)", y="LP ID")

plt.title('Products Sector Distribution',size=18)
plt.xlabel('Company Name',size=14)
locs, labels = plt.xticks()
plt.setp(labels, rotation=45)
sns.despine()
plt.show()

#### Engagement Data

In [None]:
PATH = '../input/learnplatform-covid19-impact-on-digital-learning/engagement_data' 

temp = []

for district in districts_info.district_id.unique():
    df = pd.read_csv(f'{PATH}/{district}.csv', index_col=None, header=0)
    df["district_id"] = district
    temp.append(df)
    
    
engagement = pd.concat(temp)
engagement = engagement.reset_index(drop=True)

In [None]:
engagement

In [None]:
districts_info.isnull().sum()

In [None]:
products_info.isnull().sum()

In [None]:
engagement.isnull().sum()

In [None]:
engagement.engagement_index=engagement.engagement_index.fillna(0)
products_info=products_info.rename(columns = {'LP ID': 'lp_id'}, inplace = False)

In [None]:
df1=engagement.join(districts_info.set_index('district_id'),on='district_id')
df2=df1.join(products_info.set_index('lp_id'),on='lp_id')
df2=df2.dropna(subset=['Product Name'])
df2.shape

In [None]:
df1.head()

In [None]:
df2.head()

In [None]:
df2.info()

In [None]:
df2.isnull().sum()

In [None]:
df2['time'] = pd.to_datetime(df2['time'], errors='coerce')
df2['month'] = df2['time'].dt.month

In [None]:
engagement_per_month=df2.groupby(['month'], as_index=False)['engagement_index'].mean()
engagement_per_month=engagement_per_month.sort_values(by=['month'],ascending=True)

In [None]:
plt.figure(figsize = (16,8))

sns.lineplot(data=engagement_per_month, x="month", y= "engagement_index", color='b')
plt.title('Monthly Average Engagement in 2020 (All District)', size=18)
plt.xlabel('Month',size=14)

sns.despine()
plt.show()

In [None]:
#get the product name data base on average engagement index and sort it
top_product=df2.groupby(['Product Name', 'Primary Essential Function'], as_index=False)['engagement_index'].mean()
top_product=top_product.sort_values(by=['engagement_index'],ascending=False)

In [None]:
top_product

In [None]:
plt.figure(figsize = (16,8))

sns.barplot(data=top_product.head(10), x="Product Name", y= "engagement_index")

plt.title('Top 10 Product with the Most Average Daily Engagement in 2020 (All District)',size=18)
plt.xlabel('Company Name',size=14)
locs, labels = plt.xticks()
plt.setp(labels, rotation=45)
sns.despine()
plt.show()

In [None]:
#get the category data base on average engagement index and sort it
top_category_platform=df2.groupby(['Primary Essential Function'], as_index=False)['engagement_index'].mean()
top_category_platform=top_category_platform.sort_values(by=['engagement_index'],ascending=False)

In [None]:
top_category_platform.head()

In [None]:
plt.figure(figsize = (16,8))

sns.barplot(data=top_category_platform[:10], y="Primary Essential Function", x= "engagement_index")
plt.title('Top 10 Category Platform with the Most Average Daily Engagement in 2020 (All District)', size=18)
sns.despine()
plt.show()

In [None]:
#get the lms data base on average engagement index
lms_engage = df2[df2['Primary Essential Function']=='SDO - Learning Management Systems (LMS)']
lms_engage=lms_engage.groupby(['month'], as_index=False)['engagement_index'].mean()
lms_engage=lms_engage.sort_values(by=['month'],ascending=True)

In [None]:
plt.figure(figsize = (16,8))

sns.lineplot(data=lms_engage, x="month", y= "engagement_index", color='b')
plt.title('Monthly Average Engagement of LMS in 2020 (All District)', size=18)
plt.xlabel('Month',size=14)

sns.despine()
plt.show()

In [None]:
#get the state data base on average engagement index
state_most_visit_lms = df2[df2['Primary Essential Function']=='SDO - Learning Management Systems (LMS)']
state_most_visit_lms = state_most_visit_lms.groupby(['state'], as_index=False)['engagement_index'].mean()
state_most_visit_lms = state_most_visit_lms.sort_values(by=['engagement_index'],ascending=False)

In [None]:
plt.figure(figsize = (12,6))

sns.barplot(data=state_most_visit_lms.head(5), x="state", y= "engagement_index")

plt.title('Top 5 State that Often Visited Learning Management Systems in 2020',size=18)
plt.xlabel('State',size=14)

locs, labels = plt.xticks()
plt.setp(labels, rotation=45)
sns.despine()
plt.show()

In [None]:
plt.figure(figsize = (12,6))

sns.barplot(data=state_most_visit_lms.tail(5), x="state", y= "engagement_index")

plt.title('Top 5 State that the Least Often Visited Learning Management Systems in 2020',size=18)
plt.xlabel('State',size=14)

locs, labels = plt.xticks()
plt.setp(labels, rotation=45)
sns.despine()
plt.gca().invert_xaxis()
plt.show()