In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
init_notebook_mode(connected=True)
import plotly.graph_objs as go
from plotly import tools

In [None]:
products_info=pd.read_csv("/kaggle/input/learnplatform-covid19-impact-on-digital-learning/products_info.csv")

In [None]:
products_info.head()

In [None]:
products_info[['Sector_1', 'Sector_2', "Sector_3"]] =products_info['Sector(s)'].str.split(';', expand=True)
products_info[['Main_Category', 'Sub_Category']] =products_info['Primary Essential Function'].str.split('-',1, expand=True)
products_info=products_info.drop(columns=["Sector(s)","Primary Essential Function","URL"])

In [None]:
products_info["LP ID"]=products_info["LP ID"].astype(float)

In [None]:
districts=pd.read_csv("/kaggle/input/learnplatform-covid19-impact-on-digital-learning/districts_info.csv")

In [None]:
districts.isnull().sum()

In [None]:
districts=districts.dropna(subset = ['state'])# as some of the state data is not available. 

In [None]:
districts.head()

In [None]:
districts.county_connections_ratio.value_counts()

In [None]:
districts["pct_free/reduced"].value_counts()

In [None]:
i = districts[districts["county_connections_ratio"]=="[1, 2["].index
districts=districts.drop(i)

In [None]:
districts.isnull().sum()

In [None]:
for i in districts.columns[2:]:
    districts[i]=districts[i].str.strip('[]')#removing square brackets
districts.head()

In [None]:
import glob
import os

# get data file names
globbed_files = glob.glob("../input/learnplatform-covid19-impact-on-digital-learning/engagement_data/*.csv") 
engagement = []

In [None]:
for filename in globbed_files:
    frame = pd.read_csv(filename)
    frame['district_id'] = os.path.basename(filename)
    engagement.append(frame)

# Concatenating all data into one DataFrame
engagement = pd.concat(engagement, ignore_index=True)
engagement['district_id'] = engagement['district_id'].str.strip('.csv').astype(int)

In [None]:
engagement['time'] = pd.to_datetime(engagement['time'], errors = 'coerce')
engagement["Month"]=pd.DatetimeIndex(engagement['time']).month
engagement["Year"]=pd.DatetimeIndex(engagement['time']).year

In [None]:
engagement.isnull().sum()#there are many null values in engagement_index.

In [None]:
engagement=engagement.dropna(subset = ['engagement_index',"lp_id"])#wiping out the null values. 

In [None]:
#Merging the tables.
engage_districts=pd.merge(engagement,districts, how="inner",left_on="district_id" , right_on="district_id")

In [None]:
Final_dataframe=pd.merge(engage_districts,products_info, how="inner",left_on="lp_id" , right_on="LP ID")

In [None]:
engage_table=Final_dataframe.sample(frac=0.50)#sampling the data to only 50% as we have too many rows.

In [None]:
engage_table.shape

In [None]:
engage_table.head()

In [None]:
engage_table.columns

**1. What is the effect of the COVID-19 pandemic on online and distance learning, and how might this also evolve in the future?**

In [None]:
engage_table["engagement_index"]=engage_table["engagement_index"].astype(float)

In [None]:
Monthwise=engage_table.groupby("Month")["engagement_index"].mean()
plt.figure(figsize = (12,6))
sns.pointplot(x=Monthwise.index, y=Monthwise.values,color='magenta',alpha=0.8)
plt.xlabel('Months',fontsize = 10,color='darkblue')
plt.ylabel('Mean of engagement index',fontsize = 10,color='darkblue')
plt.title('Monthwise Engagement Index in 2020',fontsize = 15,color='darkblue')

**2.How does student engagement with different types of education technology change over the course of the pandemic?**

In [None]:
edtech=engage_table.groupby(['Month','Product Name'])['engagement_index'].mean().unstack()
top10edtech=edtech.sum(axis=0).sort_values(ascending=False).head(10)#Comparing only top20 as we have to many products.

In [None]:
dftop10edtech=edtech[top10edtech.index].copy()
dftop10edtech["Month"]=dftop10edtech.index
dftop10edtech = dftop10edtech.melt('Month', var_name='cols',  value_name='vals')

plt.figure(figsize = (12,8))
g = sns.factorplot(x="Month", y="vals", hue='cols', data=dftop10edtech)
plt.xlabel('Months',fontsize = 10,color='darkblue')
plt.ylabel('Engagement Index',fontsize = 10,color='darkblue')
plt.title('Productwise Engagement Index of top 10 products in 2020',fontsize = 12,color='darkblue')

#### **3.How does student engagement with online learning platforms relate to different geography? Demographic context (e.g., race/ethnicity, ESL, learning disability)? Learning context? Socioeconomic status**

In [None]:
## Creating a  Bar Plot  ##
state_ei=engage_table.groupby(by=["state"])['engagement_index'].mean().sort_values(ascending=False)
plt.figure(figsize=(8,4),dpi=100)
plt.title(' Statewise mean engagement index')
sns.barplot(x=state_ei.index,y=state_ei.values, palette = "viridis",edgecolor='black')
plt.ylabel("Mean Engagement index", fontsize=10)
plt.xticks(rotation = 90)
plt.yticks(fontsize=10)
plt.xlabel("State", fontsize=10)

In [None]:
locale_ei=engage_table.groupby(by=["locale"])['engagement_index'].mean()
labels=locale_ei.index  # x ticks
sizes= locale_ei.values

## Create Pie chart Plot ##
plt.figure(figsize = (7,7),dpi=100)
explode = [0.1,0,0,0]
colors = ['#2D87BB','#64C2A6','#AADEA7', "#E6F69D"]
plt.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',shadow=False,textprops={'fontsize': 14},pctdistance=0.85,startangle=90, colors=colors)
#draw circle
centre_circle = plt.Circle((0,0),0.70,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
plt.title('Mean engagement index by Locale',color = 'Black',fontsize = 12)

In [None]:
## Creating a  Bar Plot  ##
bh_ei=engage_table.groupby(by=["pct_black/hispanic"])['engagement_index'].mean()
plt.figure(figsize=(8,4),dpi=100)
plt.title('Percentage of black/Hispanic to the mean engagement index')
sns.barplot(x=bh_ei.index,y=bh_ei.values, palette = "viridis",edgecolor='black')
plt.ylabel("Mean Engagement index", fontsize=10)
plt.xticks(rotation = 90)
plt.yticks(fontsize=10)
plt.xlabel("Percenage of black/Hispanic", fontsize=10)

#### Work is still in progress!!.