In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import glob
import sys
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from tqdm.notebook import tqdm_notebook as tq
import warnings
import plotly as py
import statistics as stat
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)
import matplotlib.pyplot as plt
%matplotlib inline
import geopandas

In [None]:
eng = '../input/learnplatform-covid19-impact-on-digital-learning/engagement_data'
eng_files = glob.glob(eng + "/*.csv")

files = []

for f in eng_files:
    df = pd.read_csv(f, index_col = None, header = 0)
    district_id = f.split('/')[4].split('.')[0]
    df['district_id'] = district_id
    files.append(df)
    
engagement = pd.concat(files)
engagement = engagement.reset_index(drop = True)
engagement['time'] = pd.to_datetime(engagement['time'])

In [None]:
district = pd.read_csv('/kaggle/input/learnplatform-covid19-impact-on-digital-learning/districts_info.csv')
product = pd.read_csv('/kaggle/input/learnplatform-covid19-impact-on-digital-learning/products_info.csv')

In [None]:
#percentage of null or missing values in the dataset
def pcrt_null_values(data):
    sum_of_null = data.isnull().sum()
    percentage = (sum_of_null/len(data))*100
    val_data = pd.DataFrame(data = [sum_of_null, percentage])
    val_data = val_data.T
    val_data.columns = ["Total Missing", "Percentage Missing"]
    return val_data

In [None]:
#counting duplicates in the dataset
def dup_count(data):
    dc = data.duplicated().sum()
    return dc

In [None]:
#delete rows in the district data that are all null values
#and maintain a new data without nullvalues rows
def drop_district_null_rows(data):
    m = district.drop(columns = 'district_id')
    n = m.dropna(axis = 0, how = 'all')
    n['district_id'] = district['district_id']
    new = n.reindex(columns = ['district_id', 'state', 'locale', 'pct_black/hispanic',
                               'pct_free/reduced', 'county_connections_ratio', 'pp_total_raw'])
    return new

In [None]:
new_district = drop_district_null_rows(district)
new_district.head(0)

In [None]:
#imputing missing values
from sklearn.impute import SimpleImputer
impute = SimpleImputer(missing_values = np.nan, strategy = 'most_frequent')
new_district1 = impute.fit_transform(new_district)
new_district_1 = pd.DataFrame(new_district1, columns=['district_id', 'state', 'locale', 'pct_black/hispanic',
                                         'pct_free/reduced', 'county_connections_ratio', 'pp_total_raw'])


In [None]:
#Information about pct_black/hispanic, pct_free/reduced, county_connections_ratio and pp_total_raw is 
#presented in the form of intervals, where "[a, b[" means that a ≤ x < b. 
new_district_1['pp_total_raw'] = new_district_1['pp_total_raw'].apply(lambda x: int(x.split(',')[0][1:]) + 1000)
for i in ['pct_black/hispanic', 'pct_free/reduced']:
    new_district_1[i] = new_district_1[i].apply(lambda x: float(x.split(',')[0][1:]) + 0.1)
new_district_1.drop('county_connections_ratio', axis = 1, inplace = True)

In [None]:

#######
##State Abbreviation in Usa
states = {'Alabama' : 'AL','Alaska' : 'AK','Arizona' : 'AZ','Arkansas' : 'AR','California' : 'CA','Colorado': 'CO',
         'Connecticut' : 'CT', 'District of Columbia' : 'DC', 'Delaware' : 'DE','Florida' : 'FL','Georgia' : 'GA',
         'Hawaii' : 'HI','Idaho' : 'ID', 'Illinois' : 'IL','Indiana' : 'IN','Iowa' : 'IA','Kansas' : 'KS',
         'Kentucky' : 'KY','Louisiana' : 'LA','Maine' : 'ME','Maryland' : 'MD','Massachusetts' : 'MA',
         'Michigan' : 'MI','Minnesota' : 'MN','Mississipi' : 'MS','Missouri' : 'MO','Montana' : 'MT','Nebraska' : 'NE',
         'Nevada' : 'NV','New Hampshire' : 'NH','New Jersey' : 'NJ','New Mexico' : 'NM','New York' : 'NY',
         'North Carolina' : 'NC','North Dakota' : 'ND','Ohio' : 'OH','Oklahoma' : 'OK','Oregon' : 'OR','Pennsylvania' : 'PA',
         'Rhode Island' : 'RI','South Carolina' : 'SC','South Dakota' : 'SD','Tennessee' : 'TN','Texas' : 'TX' ,'Utah' : 'UT',
         'Vermont' : 'VT','Virginia' : 'VA','Washington' : 'WA','West Virginia' : 'WV', 'Wisconsin' : 'WI', 'Wyoming' : 'WY',}
###Geo-Map
def district_geomap(title):
    new_district_1['states'] = new_district_1['state'].map(states)
    fig = go.Figure()
    layout = dict(title_text = title,
                  title_font = dict(family = "monospace", size = 25, color = "black"), geo_scope = 'usa')
    fig.add_trace(go.Choropleth(locations = new_district_1['states'].value_counts().to_frame().reset_index()['index'],
                                zmax = 1, z = new_district_1['states'].value_counts().to_frame().reset_index()['states'],
                                locationmode = 'USA-states', marker_line_color = 'white', geo = 'geo', colorscale = "RdYlBu"))          
    fig.update_layout(layout)   
    fig.show()
    
    
    plt.figure(figsize = (15, 8))
    sns.set_style("white")
    a = sns.barplot(data = new_district_1['state'].value_counts().reset_index(), x = 'state', y = 'index', color = '#90afc5')
    plt.xticks([])
    plt.yticks(fontname = 'monospace', fontsize = 14, color = '#283655')
    plt.ylabel('')
    plt.xlabel('')

    a.spines['left'].set_linewidth(1.5)
    for w in ['right', 'top', 'bottom']:
        a.spines[w].set_visible(False)
    
    for p in a.patches:
        width = p.get_width()
        plt.text(0.5 + width, p.get_y() + 0.55 * p.get_height(), f'{int(width)}',
             ha = 'center', va = 'center', fontname = 'monospace', fontsize = 15, color = '#283655')

    plt.show()

# PREPROCESSING

### District Data:
The district data consist of information based on 233 entries of which state, locale and percentage of students who identified themselves as Black or Hispanic (pct_black/Hispanic) had 24.5% missing entries. Percentage of free or reduced meals (pct_free/reduced), ratio base on the county level data (county_connections_ratio) and per-pupil total expenditure (pp_total_raw) had 36.5%, 30.47% and 49.36% of missing entries respectively. State and locale missing entries were dropped or deleted and the remaining missing entries were imputed using the most frequent value or item in each column.

In all, there are 23 states presented in the data. The entries for pct_black/Hispanic, pct_free/reduced, county_connections_ratio and pp_total_raw were originally entered as intervals. The intervals for pct_black/Hispanic, pct_free/reduced and pp_total_raw are very close and with a range of 0.2 , 0.02 and 2000 respectively. Therefore, prudent to represent each interval entry by its average and median in case of pp_total_raw. However, intervals for county_connections_ratio are wide and hence inappropriate to represent each entry with its mean or median value.

The top six states with more than ten school districts and the highest number of suburbs are Connecticut, Utah, Massachusetts, Illinois, California and Ohio. As shown in figure 1 and figure 2 below.


In [None]:
district_geomap('Number of School Districts in each State')

#### Figure 1

In [None]:
#this gives the number of counts of locale in each state
def district_count_plot(y, palette, title, hue = None, data = None):
    sns.set_theme(style = 'darkgrid')
    plt.figure(figsize = (15, 22))
    plt.xticks(rotation = 90)
    plt.title(title)
    b = sns.countplot(y = y, hue = hue, palette = palette, data = data)
    b.spines['left'].set_linewidth(2)
    for c in ['right', 'top', 'bottom']:
        b.spines[c].set_visible(True)
    for p in b.patches:
        width = p.get_width()
        plt.text(0.5 + width, p.get_y() + 0.55 * p.get_height(), f'{str(width)}',
                 ha = 'center', va = 'center', fontname = 'monospace', fontsize = 10, color = '#283655')

In [None]:
district_count_plot('state', 'CMRmap', 'Counts of locale in each State Presented', 'locale', new_district_1)

#### Figure 2

In [None]:
###Pie Graph
for i in ['state', 'locale', 'Basic_category','Provider/Company Name', 'Primary Essential Function', 'Sector(s)']:
    def pie_graph(data, i, title):
        fig = px.pie(data[i].value_counts().reset_index().rename(columns = {'i': 'count'}), values = i, names = 'index',
                     width = 800, height = 800)
        fig.update_traces(textposition = 'inside', textinfo = 'percent + label', hole = 0.65, 
                           marker = dict(colors = ['#90afc5','#336b87','#2a3132','#763626'], 
                                         line = dict(color = 'white', width = 2)))
        fig.update_layout(annotations = [dict(text = title, x = 0.5, y = 0.5, font_size = 25, 
                                              showarrow = False, font_family = 'monospace',
                                              font_color = '#283655')],showlegend = False)
                  
        fig.show()

In [None]:
pie_graph(new_district_1, 'state', 'Percentage of School<br> District in Each State')

#### Figure 3

In [None]:
pie_graph(new_district_1, 'locale', 'Percentage of School <br> District in Each Locale')

#### Figure 4

It’s observed that the percentage of students in the districts who identified themselves as Black or Hispanic has a distribution which is right skewed with less than 24% having the largest counts of school districts in the city. Moreover, percentage of students in districts eligible for free or reduced-price lunch and the local and federal expenditure observed almost a symmetrical distribution but a right skewed with less than 34% and having the largest counts of districts in the city for the former. On average total expenditure on each student is $11329.55 and this mostly occurred in the rural area. As illustrated in figure 5 to figure 7 and chart 1 below.  

In [None]:
colors = ["#90afc5", "#336b87", "#763626"]

fig = plt.figure(figsize = (15, 11))
sns.set_style("white")
plt.title('Percentage of Black/Hispanic', size = 20, fontname = "monospace", color = "#763626")
a = sns.kdeplot(new_district_1['pct_black/hispanic'], color = "#763626",
                shade = True, alpha = 0.9, linewidth = 1.5, edgecolor = "black")
plt.ylabel("")
plt.xlabel("")
plt.xticks(fontname = "monospace")
plt.yticks([])
for j in ["right", "left", "top"]:
    a.spines[j].set_visible(True)
    a.spines["bottom"].set_linewidth(1.5)
fig.tight_layout(h_pad = 3)
plt.figtext(0.07, 1.05, "Distribution of Charateristics of School Districts",
            fontsize = 30, fontname = "monospace", color = "#283655") 
plt.figtext(0.70, 0.47, "Conclusion", fontsize = 30, fontname = "monospace",
               color = "#283655")
plt.figtext(0.54, 0.40, """The average number of students who identified themselves
as black or Hispanic is 24%. The most common value is 10%.""", fontsize = 13)
   
plt.show()

#### Figure 5

In [None]:
colors = ["#90afc5", "#336b87", "#763626"]

fig = plt.figure(figsize = (11, 11))
sns.set_style("white")
plt.title('Percentage of Students Eligible for Free or Reduced', size = 20, fontname = "monospace", color = "#90afc5")
a = sns.kdeplot(new_district_1['pct_free/reduced'], color = "red",
                shade = True, alpha = 0.9, linewidth = 2, edgecolor = "blue")
plt.ylabel("")
plt.xlabel("")
plt.xticks(fontname = "monospace")
plt.yticks([])
for j in ["right", "left", "top"]:
    a.spines[j].set_visible(True)
    a.spines["bottom"].set_linewidth(1.5)
fig.tight_layout(h_pad = 1)
plt.figtext(0.07, 1.05, "Distribution of Charateristics of School Districts",
            fontsize = 30, fontname = "monospace", color = "#283655") 
plt.figtext(0.70, 0.47, "Conclusion", fontsize = 30, fontname = "monospace",
               color = "#283655")
plt.figtext(0.64, 0.40, """The average number of students eligible
for free or reduced - price lunch is 33.3%. 
The most value is 30%.""", fontsize = 13)

plt.show()

#### Figure 6

In [None]:
colors = ["#90afc5", "#336b87", "#763626"]

fig = plt.figure(figsize = (11, 11))
sns.set_style("white")
plt.title('Local and Federal Expenditure', size = 20, fontname = "monospace", color = "#336b87")
a = sns.kdeplot(new_district_1['pp_total_raw'], color = "blue",
                shade = True, alpha = 0.9, linewidth = 2, edgecolor = "red")
plt.ylabel("")
plt.xlabel("")
plt.xticks(fontname = "monospace")
plt.yticks([])
for j in ["right", "left", "top"]:
    a.spines[j].set_visible(True)
    a.spines["bottom"].set_linewidth(1.5)
fig.tight_layout(h_pad = 1)
plt.figtext(0.07, 1.05, "Distribution of Charateristics of School Districts",
            fontsize = 30, fontname = "monospace", color = "#283655") 
plt.figtext(0.70, 0.47, "Conclusion", fontsize = 30, fontname = "monospace",
               color = "#283655")
plt.figtext(0.92, 0.40, """Per-pupil total expenditure
 (sum of local and federal expenditure) is $11,329.55 
 and the most common value is $9,000.""", fontsize = 13, fontname = "monospace", color = "#283655", ha = "right") 


plt.show()

#### Figure 7

In [None]:
dist_area_group = new_district_1.groupby("locale").agg({"pct_black/hispanic":"mean","pct_free/reduced":"mean", "pp_total_raw":"mean"}).reset_index()

colors = ["#90afc5", "#336b87", "#763626"]

fig = plt.figure(figsize = (13, 12))
for i in range(len(dist_area_group.columns.tolist()[1:])):
    plt.subplot(2, 2, i+1)
    sns.set_style("white")
    plt.title(dist_area_group.columns.tolist()[1:][i], size = 20, fontname = "monospace",
             y = 1.09, color = colors[i])
    plt.grid(color = "gray", linestyle = ":", axis = "y", zorder = 0, dashes = (1, 7))
    a = sns.barplot(data = dist_area_group, x = "locale", y = dist_area_group.columns.tolist()[1:][i],
                   color = colors[i])
    plt.xlabel("")
    plt.ylabel("")
    plt.xticks(fontname = "monospace", size = 14)
    plt.yticks([])
    
    for j in ["right", "top", "left"]:
        a.spines[j].set_visible(False)
    for j in ["bottom"]:
        a.spines[j].set_linewidth(1.4)
    
    if i < 3:
        for p in a.patches:
            height = p.get_height()
            a.annotate(f'{int(height*100)}%', (p.get_x() + p.get_width()/2, p.get_height()- 0.03),
                      ha = "center", va = "center", size = 18, xytext = (2, 5),
                      textcoords = "offset points", color = "white", fontname = "monospace")
        else:
            for p in a.patches:
                height = p.get_height()
                a.annotate(f'{int(height)}$', (p.get_x() + p.get_width()/2, p.get_height() - 1000),
                          ha = "center", va = "center", size = 18, xytext = (0, 5), textcoords = "offset points",
                          color = "white", fontname = "monospace")

plt.figtext(0.07, 1.05, "Characteristics of School Districts by locale", fontsize = 22, fontname = "monospace", color = "#283655")
plt.figtext(0.83, 0.34, "Conclusion", fontsize = 22, fontname = "monospace", color = "#283655")
plt.figtext(0.99, 0.15, """The largest count of students who identified themselves as
Black or Hispanic are in large cities. 
The smallest count is in the suburbs and the rural areas.

In cities and towns half of the students are eligible
for free or reduced-priced lunch.

The Highest total expenses per student is in the rural area.""", fontsize = 11, fontname = "monospace", color = "#283655", ha = "right")

fig.tight_layout(pad = 2)

plt.show()

#### Chart 1

In [None]:
product['Basic_category'] = 'x'
for i in range(len(product)):
    if pd.isna(product['Primary Essential Function'][i]) == False:
        product['Basic_category'][i] = product['Primary Essential Function'][i].split('-')[0][:-1]

In [None]:
#delete rows in the product data that has any null values
def drop_product_null_rows():
    s = product.dropna(axis = 0, how = 'any')
    return s 

In [None]:
new_product = drop_product_null_rows()

#### Product Data:
The product data comes with information based on 372 entries out of which company name had 1 missing entry and Sector(s) and Primary Essential Function had 20 missing entries respectively. These missing entries were deleted or dropped. There are five unique Sector(s) and four major categories. The PreK-12 sector of education had the most usage of product constituting about 48% of the dataset and the top learning provider was Google LLC.

Most products can be found in the LC category and this constitute about 77% of the products as shown in the figure 8, chart 2 and chart 3 below.


#### Engagement Data:
The engagement data consist of data based on 22324189 entries of which about 24% are missing. These missing entries were dropped or deleted. The data was merged with the product data and district data for further analysis.



In [None]:
#presenting the top 10  on a bar chart and pie chart

plt.figure(figsize = (15, 8))
sns.set_style("white")

plt.title("TOP - 10 learning providers", size = 35, x = 0.4, y = 1.06, fontname = 'monospace', color = '#283655')
r = sns.barplot(data = new_product['Provider/Company Name'].value_counts().reset_index().head(10), 
                x = 'Provider/Company Name', y = 'index', color = '#90afc5')
plt.xticks([])
plt.yticks(fontname = 'monospace', fontsize = 14, color = '#283655')
plt.xlabel("")
plt.ylabel("")

r.spines['left'].set_linewidth(1.5)
for w in ['right', 'bottom', 'top']:
    r.spines[w].set_visible(False)

for p in r.patches:
    width = p.get_width()
    plt.text(0.5 + width, p.get_y() + 0.55 * p.get_height(), f'{int(width)}',
            ha = "center", va = "center", fontname = 'monospace', fontsize = 15,
            color = "#283655")
    
fig = px.pie(product['Sector(s)'].value_counts().reset_index().rename(columns = {'Sectors(s)': 'count'}).head(15),
             values = 'Sector(s)', names = 'index', width = 700, height = 700)

fig.update_traces(textposition = 'inside', textinfo = 'percent + value + label', hole = 0.7, marker = dict(colors = ['#90afc5', '#336b87', '#2a3132','#763626', 'a43820'],
                                                                                                  line = dict(color = 'white', width = 2)))
fig.update_layout(annotations = [dict(text = 'Sector of education <br> where the product is used', x = 0.5, y = 0.5, font_size = 26, showarrow = False,
                                     font_family = 'monospace', font_color = '#283655')], showlegend = False)

fig.show()


#### Figure 8

In [None]:
fig = px.pie(new_product['Basic_category'].value_counts().reset_index().rename(columns = {'Basic_category' : 'count'}),
             values = 'count', names = 'index', width = 600, height = 600)

fig.update_traces(textposition = 'inside', textinfo = 'value + percent + label', hole = 0.7, marker = dict(colors = ['#90afc5', '#336b87', '#2a3132','#763626', 'a43820'],
                                                                                                  line = dict(color = 'white', width = 2)))
fig.update_layout(annotations = [dict(text = 'Count of Products <br> by category', x = 0.5, y = 0.5, font_size = 26, showarrow = False,
                                     font_family = 'monospace', font_color = '#283655')], showlegend = False)

fig.show()

#### Chart 2

In [None]:
plt.figure(figsize = (12, 20))
sns.set_style("white")
plt.title('Count of products by subcategory', size = 35, x = 0.2, y = 1.06, fontname = 'monosapce', color = "#283655")
a = sns.barplot(data = new_product["Primary Essential Function"].value_counts().reset_index(), x = 'Primary Essential Function',
               y = 'index', color = "#90afc5")
plt.xticks([])
plt.yticks(fontname = 'monospace', fontsize = 10, color = '#283655')
plt.xlabel("")
plt.ylabel("")

a.spines['left'].set_linewidth(1.5)
for w in ['right', 'top', 'bottom']:
    a.spines[w].set_visible(False)

for p in a.patches:
    width = p.get_width()
    plt.text(1 + width, p.get_y() + 0.55 * p.get_height(), f'{int(width)}', ha = 'center', va = 'center', fontname = 'monospace',
            fontsize = 11, color = '#283655')
plt.show()

###########################

#### Chart 3

In [None]:
#check if there are rows with complete null values
def drop_eng_null_rows():   
    p = engagement.drop(columns = ['district_id', 'time'])
    q = p.dropna(axis = 0, how = 'all')
    if len(q) == len(p):
        q['district_id'] = engagement['district_id']
        q['time'] = engagement['time']
        new = q.reindex(columns = ['time', 'district_id','lp_id', 'pct_access', 'engagement_index'])
        print("There is no Null Values in rows \n" 'count of null values in each column \n')
    print(engagement.isnull().sum())

In [None]:
#dropping all null values in the dataset
#delete rows in the product data that has any null values
def drop_eng_null_rows():
    x = engagement.dropna(axis = 0, how = 'any')
    return x 

In [None]:
new_eng = drop_eng_null_rows()

In [None]:
#merging new_product and engagement data
merged_data_1 = pd.merge(new_product, new_eng, left_on = 'LP ID', right_on = 'lp_id')
merged_data_1['district_id'] = merged_data_1['district_id'].astype('int64')
merged_data = merged_data_1.drop('lp_id', axis = 1)

# ANALYSIS
The Covid-19 pandemic also known as the coronavirus pandemic is an ongoing global pandemic caused by Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-Cov-2). The pandemic has disrupted in person learning for more than 56 million students in the USA. Covid-19 was declared a pandemic on the 11th March 2020 across countries by World Health Organization (WHO). Most states and locale governments temporarily shut or closed down educational institutions in order to avoid the rapid spread of the virus.

>
#### Before 11th March
From figure 9 and figure 10, prior to the declaration of Covid-19 a pandemic, the percentage of students in districts that have at least one page-load event of a given product on a given day (pct_access) in most states is fairly less than 500% with the least access being from Texas, California and Tennesse and the highest access being from Arizona and Connecticut. During this same period the total page-load events per one thousand students on a given day (engagement_index)  is less than 1300 with the least being from Tennesse, Texas and California and the highest engagement_index being from Arizona and Connecticut. 

Moreover, in general, the rural had the highest rate of pct_access and the highest engagement_index followed by the suburbs while the cities and towns had the least pct_access and engagement_index.

#### After 11th to Start of Summer Holidays
After WHO had declared Covid-19 a pandemic, pct_access of all products by states fairly declined and engagement_index fairly rose in states. This is due to some school districts  temporarily shut down. 

However, in general, the rural had the highest rate of pct_access and the highest engagement_index followed by the suburbs and towns.

#### During Summer Holidays
Summer holidays are defined by school districts, there is no exact date or day throughout the states. However, in general, late May to early September can be assumed. From the graph in figure 9 and figure 10,  school districts that begun their holidays in early June had pct_access of all products and engagement_index declined till July 31st were it starts to peak and throughout the month of August. This rise is due to school districts in some states such as Arizona and Indiana resuming early.

Although, the rural area had the highest rate of pct_access and the highest engagement_index followed by the suburbs and cities but in the month of August the pct_access and engagement_index for towns peaked.


#### After Summer Holidays
After the summer holidays there had been on average a consistent rise in pct_access of all products by states and on average a consistent rise in engagement_index by states.

Nonetheless, on average, the rural had the highest rate of pct_access and the highest engagement_index followed by the suburbs and cities. This is attributed to school district involving in digital learning.

#### The Imediate Impact of Covid-19
From tables 1 and 2, the immediate impact of covid-19 after 11th March saw a decline in pct_access and an rise in engagement_index. Nevetheless, from the third week in March 2020 to the fourth week in April 2020 on average had an increase in pct_access and engagement_index of all products by both states and locale but throughout May 2020 there was a decline in pct_access and engagement_index by both states and locale.

In addition, pct_acces of primary functions of products fell immediately after 11th March 2020 yet increase from the third week in March 2020 to first week in April 2020. But engagement_index rose right after 11th March 2020 to the first week in April 2020. Throughout the month of May 2020, on average pct_access and engagement_index of primary functions of products fell, this is evident in table 3.

In [None]:
#merging merged_data and new_district_1 data
merged_data_1 = pd.merge(merged_data, new_district_1, on = 'district_id')
merged_data_1.drop(['URL', 'states'], axis = 1, inplace = True)

In [None]:
state_access = merged_data_1.groupby(['state', 'time']).agg({'pct_access': 'mean'}).reset_index()
state_eng = merged_data_1.groupby(['state', 'time']).agg({'engagement_index': 'mean'}).reset_index()
locale_access = merged_data_1.groupby(['locale', 'time']).agg({'pct_access': 'mean'}).reset_index()
locale_eng = merged_data_1.groupby(['locale', 'time']).agg({'engagement_index': 'mean'}).reset_index()
cat_access = merged_data_1.groupby(['Basic_category', 'time']).agg({'pct_access': 'mean'}).reset_index()
cat_eng = merged_data_1.groupby(['Basic_category', 'time']).agg({'engagement_index': 'mean'}).reset_index()

for i in [state_access, state_eng, locale_access, locale_eng, cat_access, cat_eng]:
    i['day_of_week'] = i['time'].dt.dayofweek

In [None]:
#state_access.query("time >= '2020-04-27' & time<= '2020-06-01' & state == 'Minnesota'").mean()

In [None]:
fig = px.line(state_eng, x = 'time', y = 'engagement_index', color = 'state', line_group = 'state')
fig.update_layout(plot_bgcolor = 'white', title = 'Dynamics of engagement_index of all products by states',
                 title_font_family = 'monospace', title_font_color = '#221f1f', title_font_size = 12)
fig.update_yaxes(showline = True, linecolor = '#f5f2f2', showgrid = True, gridwidth = 1, gridcolor = '#f5f2f2',
                linewidth = 2, tickfont_family = 'monospace', tickfont_color = '#221f1f', tickfont_size = 12)
fig.update_xaxes(showline = True, linecolor = '#f5f2f2', showgrid = True, gridwidth = 1, gridcolor = '#f5f2f2',
                linewidth = 1.5, tickfont_family = 'monospace', tickfont_color = '#221f1f', tickfont_size = 12)
fig.add_vline(x = '2020-03-11', line_width = 3, line_color = 'red')
fig.add_annotation(x = '2020-03-11', y = 2500, text = 'WHO has declared Covid-19 a pandemic', showarrow = True,
                  font = dict(family = 'monospace', size = 11, color = 'black'), arrowhead = 2, arrowsize = 1,
                  arrowwidth = 2, arrowcolor = '#636363', ax = 130, ay = 1)
fig.add_vrect(x0 = "2020-06-01", x1 = "2020-08-31", fillcolor = "yellow", opacity = 0.25, line_width = 0)
fig.add_annotation(x = "2020-07-15", y = 2000, text = "Summer holidays", showarrow = False,
                   font = dict(family ='monospace', size = 11, color = 'black'))
fig.update_traces(line_width = 2)
fig.show()

#### Figure 9

In [None]:
fig = px.line(state_access, x = 'time', y = 'pct_access', color = 'state', line_group = 'state')
fig.update_layout(plot_bgcolor = 'white', title = 'Dynamics of pct_access of all products by states',
                 title_font_family = 'monospace', title_font_color = '#221f1f', title_font_size = 12)
fig.update_yaxes(showline = True, linecolor = '#f5f2f2', showgrid = True, gridwidth = 1, gridcolor = '#f5f2f2',
                linewidth = 2, tickfont_family = 'monospace', tickfont_color = '#221f1f', tickfont_size = 12)
fig.update_xaxes(showline = True, linecolor = '#f5f2f2', showgrid = True, gridwidth = 1, gridcolor = '#f5f2f2',
                linewidth = 2, tickfont_family = 'monospace', tickfont_color = '#221f1f', tickfont_size = 12)
fig.add_vline(x = '2020-03-11', line_width = 3, line_color = 'red')
fig.add_annotation(x = '2020-03-11', y = 7.5, text = 'WHO has declared Covid-19 a pandemic', showarrow = True,
                  font = dict(family = 'monospace', size = 11, color = 'black'), arrowhead = 2, arrowsize = 1,
                  arrowwidth = 2, arrowcolor = '#636363', ax = 130, ay = 1)
fig.add_vrect(x0 = "2020-06-01", x1 = "2020-08-31", fillcolor = "yellow", opacity = 0.25, line_width = 0)
fig.add_annotation(x = "2020-07-15", y = 6, text = "Summer holidays", showarrow = False,
                   font = dict(family ='monospace', size = 11, color = 'black'))
fig.update_traces(line_width = 1)
fig.show()

#### Figure 10

In [None]:
months_map = {1:"January", 2:"February", 3:"March", 4:"April",
             5:"May", 6:"June", 7:"July", 8:"August",
             9:"September", 10:"October", 11:"November", 12:"December"}
for i in [state_access, state_eng]:
    i["states"] = i["state"].map(states)
    i["month"] = i.time.dt.month.map(months_map)
    
    fig = px.choropleth(data_frame = i.groupby(['state', 'states', 'month']).agg({i.columns[2]:'mean'}).reset_index(),
                       locations = "states", locationmode = 'USA-states',
                       color = i.groupby(['state','states', 'month']).agg({i.columns[2]:'mean'}).reset_index()[i.groupby(['state', 'states', 'month']).agg({i.columns[2]:'mean'}).reset_index().columns[3]],
                       scope = 'usa', color_continuous_scale = "cividis", animation_frame = 'month', hover_name = "state")
    fig.update_layout(title_text = f'Monthly Dynamics of {i.columns[2]}', title_font = dict(family = "monospace", size = 25, color = 'black'))
    fig.show()
    # Changes in the average student activity indicators on school days 1 and 2 weeks after the announcement of the pandemic in every state.
    #there are no information about Texas during the start of the pandemic, hence, the state does not participate in the analysis

#### Figure 11

In [None]:
cov_imp = pd.DataFrame(state_access["state"].unique().tolist()).rename(columns = {0:'state'})
# There is no information about North Dakota during the start of the pandemic
# There is no information about Texas during the start and in most period of the pandemic
cov_imp = cov_imp.query("state != 'North Dakota' & state != 'Texas'").reset_index()
cov_imp.drop('index', axis = 1, inplace = True)

for i in ["mean_access", 'w1_access', 'w2_access', 'w3_access', 'w4_access','w5_access','w6_access','w7_access','w8_access','w9_access',
          'w10_access','w11_access','w12_access','mean_eng', 'w1_eng', 'w2_eng', 'w3_eng', 'w4_eng', 'w5_eng', 'w6_eng', 'w7_eng', 'w8_eng',
          'w9_eng', 'w10_eng', 'w11_eng', 'w12_eng']:
    cov_imp[i] = 0.0
statesss = cov_imp['state'].unique().tolist()
for i in statesss:
    cov_imp["mean_access"][statesss.index(i)] = round(state_access.query("time >= '2020-03-09' & time <= '2020-03-13' & state == @i")['pct_access'].mean(), 2)
    cov_imp["w1_access"][statesss.index(i)] = round((state_access.query("time >= '2020-03-16' & time <= '2020-03-20' & state == @i")['pct_access'].mean()
                                                            /cov_imp['mean_access'][statesss.index(i)] - 1)*100, 1)
    cov_imp["w2_access"][statesss.index(i)] = round((state_access.query("time >= '2020-03-23' & time <= '2020-03-27' & state == @i")['pct_access'].mean()
                                                             /state_access.query("time>= '2020-03-16' & time<= '2020-03-20' & state == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp["w3_access"][statesss.index(i)] = round((state_access.query("time >= '2020-03-30' & time <= '2020-04-03' & state == @i")['pct_access'].mean()
                                                             /state_access.query("time>= '2020-03-23' & time<= '2020-03-27' & state == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp["w4_access"][statesss.index(i)] = round((state_access.query("time >= '2020-04-06' & time <= '2020-04-10' & state == @i")['pct_access'].mean()
                                                             /state_access.query("time>= '2020-03-30' & time<= '2020-04-03' & state == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp["w5_access"][statesss.index(i)] = round((state_access.query("time >= '2020-04-13' & time <= '2020-04-17' & state == @i")['pct_access'].mean()
                                                             /state_access.query("time>= '2020-04-06' & time<= '2020-04-10' & state == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp["w6_access"][statesss.index(i)] = round((state_access.query("time >= '2020-04-20' & time <= '2020-04-24' & state == @i")['pct_access'].mean()
                                                             /state_access.query("time>= '2020-04-13' & time<= '2020-04-17' & state == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp["w7_access"][statesss.index(i)] = round((state_access.query("time >= '2020-04-27' & time <= '2020-05-01' & state == @i")['pct_access'].mean()
                                                             /state_access.query("time>= '2020-04-20' & time<= '2020-04-24' & state == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp["w8_access"][statesss.index(i)] = round((state_access.query("time >= '2020-05-04' & time <= '2020-05-08' & state == @i")['pct_access'].mean()
                                                             /state_access.query("time>= '2020-04-27' & time<= '2020-05-01' & state == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp["w9_access"][statesss.index(i)] = round((state_access.query("time >= '2020-05-11' & time <= '2020-05-15' & state == @i")['pct_access'].mean()
                                                             /state_access.query("time>= '2020-05-04' & time<= '2020-05-08' & state == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp["w10_access"][statesss.index(i)] = round((state_access.query("time >= '2020-05-18' & time <= '2020-05-22' & state == @i")['pct_access'].mean()
                                                             /state_access.query("time>= '2020-05-11' & time<= '2020-05-15' & state == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp["w11_access"][statesss.index(i)] = round((state_access.query("time >= '2020-05-25' & time <= '2020-05-29' & state == @i")['pct_access'].mean()
                                                             /state_access.query("time>= '2020-05-18' & time<= '2020-05-22' & state == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp["w12_access"][statesss.index(i)] = round((state_access.query("time >= '2020-06-01' & time <= '2020-06-05' & state == @i")['pct_access'].mean()
                                                             /state_access.query("time>= '2020-05-25' & time<= '2020-05-29' & state == @i")['pct_access'].mean() - 1)*100, 1)
    
    cov_imp["mean_eng"][statesss.index(i)] = round(state_eng.query("time >= '2020-03-09' & time <= '2020-03-13' & state == @i")['engagement_index'].mean(), 1)
    cov_imp["w1_eng"][statesss.index(i)] = round((state_eng.query("time >= '2020-03-16' & time <= '2020-03-20' & state == @i")['engagement_index'].mean()
                                                            /cov_imp['mean_eng'][statesss.index(i)] - 1)*100, 1)
    cov_imp["w1_eng"][statesss.index(i)] = round((state_eng.query("time >= '2020-03-23' & time <= '2020-03-27' & state == @i")['engagement_index'].mean()
                                                             /state_eng.query("time>= '2020-03-16' & time<= '2020-03-20' & state == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp["w3_eng"][statesss.index(i)] = round((state_eng.query("time >= '2020-03-30' & time <= '2020-04-03' & state == @i")['engagement_index'].mean()
                                                             /state_eng.query("time>= '2020-03-23' & time<= '2020-03-27' & state == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp["w4_eng"][statesss.index(i)] = round((state_eng.query("time >= '2020-04-06' & time <= '2020-04-10' & state == @i")['engagement_index'].mean()
                                                             /state_eng.query("time>= '2020-03-30' & time<= '2020-04-03' & state == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp["w5_eng"][statesss.index(i)] = round((state_eng.query("time >= '2020-04-13' & time <= '2020-04-17' & state == @i")['engagement_index'].mean()
                                                             /state_eng.query("time>= '2020-04-06' & time<= '2020-04-10' & state == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp["w6_eng"][statesss.index(i)] = round((state_eng.query("time >= '2020-04-20' & time <= '2020-04-24' & state == @i")['engagement_index'].mean()
                                                             /state_eng.query("time>= '2020-04-13' & time<= '2020-04-17' & state == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp["w7_eng"][statesss.index(i)] = round((state_eng.query("time >= '2020-04-27' & time <= '2020-05-01' & state == @i")['engagement_index'].mean()
                                                             /state_eng.query("time>= '2020-04-20' & time<= '2020-04-24' & state == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp["w8_eng"][statesss.index(i)] = round((state_eng.query("time >= '2020-05-04' & time <= '2020-05-08' & state == @i")['engagement_index'].mean()
                                                             /state_eng.query("time>= '2020-04-27' & time<= '2020-05-01' & state == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp["w9_eng"][statesss.index(i)] = round((state_eng.query("time >= '2020-05-11' & time <= '2020-05-15' & state == @i")['engagement_index'].mean()
                                                             /state_eng.query("time>= '2020-05-04' & time<= '2020-05-08' & state == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp["w10_eng"][statesss.index(i)] = round((state_eng.query("time >= '2020-05-18' & time <= '2020-05-22' & state == @i")['engagement_index'].mean()
                                                             /state_eng.query("time>= '2020-05-11' & time<= '2020-05-15' & state == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp["w11_eng"][statesss.index(i)] = round((state_eng.query("time >= '2020-05-25' & time <= '2020-05-29' & state == @i")['engagement_index'].mean()
                                                             /state_eng.query("time>= '2020-05-18' & time<= '2020-05-22' & state == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp["w12_eng"][statesss.index(i)] = round((state_eng.query("time >= '2020-06-01' & time <= '2020-06-05' & state == @i")['engagement_index'].mean()
                                                             /state_eng.query("time>= '2020-05-25' & time<= '2020-05-29' & state == @i")['engagement_index'].mean() - 1)*100, 1)

    
    def color_values(val):
        color = 'red' if val < 0 else 'green'
        return 'color: %s' % color
    
    slice_ = ['w1_access', 'w2_access', 'w3_access', 'w4_access','w5_access','w6_access','w7_access','w8_access','w9_access','w10_access','w11_access','w12_access', 'w1_eng', 'w2_eng', 'w3_eng', 'w4_eng', 'w5_eng','w6_eng', 'w7_eng', 'w8_eng','w9_eng', 'w10_eng', 'w11_eng', 'w12_eng']
    slice_2 = ["mean_access", 'w1_access', 'w2_access', 'w3_access', 'w4_access','w5_access','w6_access','w7_access','w8_access','w9_access','w10_access','w11_access','w12_access']
    slice_3 = ['mean_eng', 'w1_eng', 'w2_eng', 'w3_eng', 'w4_eng', 'w5_eng','w6_eng', 'w7_eng', 'w8_eng','w9_eng', 'w10_eng', 'w11_eng', 'w12_eng']
    asz = cov_imp.style.applymap(color_values, subset = slice_).set_precision(1).set_properties(**{'background-color': '#fafafa'}, subset = slice_2).set_properties(**{'background-color': '#f7f7f7'}, subset = slice_3)

In [None]:
asz

#### Table 1

In [None]:
fig = px.line(locale_access, x = 'time', y = 'pct_access', color = 'locale', line_group = 'locale')
fig.update_layout(plot_bgcolor = 'white', title = 'Dynamics of pct_access of all products by locale',
                 title_font_family = 'monospace', title_font_color = '#221f1f', title_font_size = 20,
                 title_x = 0.5)
fig.update_yaxes(showline = True, linecolor = '#f5f2f2', showgrid = True, gridwidth = 1, gridcolor = '#f5f2f2', 
                 linewidth = 2, tickfont_family = 'monospace', tickfont_color = '#221f1f', tickfont_size = 12 )
fig.update_xaxes(showline = True, linecolor = '#f5f2f2', showgrid = True, gridwidth = 1, gridcolor = '#f5f2f2', 
                 linewidth = 2, tickfont_family = 'monospace', tickfont_color = '#221f1f', tickfont_size = 12 )
fig.add_vline(x = '2020-03-11', line_width = 3, line_color = 'red')
fig.add_annotation()
fig.add_annotation(x = '2020-03-11', y = 2.5, text = 'WHO has declared Covid-19 a pandemic', showarrow = True,
                  font = dict(family = 'monospace', size = 11, color = 'black'), arrowhead = 2, arrowsize = 1,
                  arrowwidth = 2, arrowcolor = '#636363', ax = 130, ay = 1)
fig.add_vrect(x0 = "2020-06-01", x1 = "2020-08-31", fillcolor = "yellow", opacity = 0.25, line_width = 0)
fig.add_annotation(x = "2020-07-15", y = 1.95, text = "Summer holidays", showarrow = False,
                   font = dict(family ='monospace', size = 11, color = 'black'))
fig.update_traces(line_width = 1)
fig.show()

#### Figure 12

In [None]:
fig = px.line(locale_eng, x = 'time', y = 'engagement_index', color = 'locale', line_group = 'locale')
fig.update_layout(plot_bgcolor = 'white', title = 'Dynamics of engagement_index of all products by locale',
                 title_font_family = 'monospace', title_font_color = '#221f1f', title_font_size = 20,
                 title_x = 0.5)
fig.update_yaxes(showline = True, linecolor = '#f5f2f2', showgrid = True, gridwidth = 1, gridcolor = '#f5f2f2', 
                 linewidth = 2, tickfont_family = 'monospace', tickfont_color = '#221f1f', tickfont_size = 12 )
fig.update_xaxes(showline = True, linecolor = '#f5f2f2', showgrid = True, gridwidth = 1, gridcolor = '#f5f2f2', 
                 linewidth = 2, tickfont_family = 'monospace', tickfont_color = '#221f1f', tickfont_size = 12 )
fig.add_vline(x = '2020-03-11', line_width = 3, line_color = 'red')
fig.add_annotation()
fig.add_annotation(x = '2020-03-11', y = 700, text = 'WHO has declared Covid-19 a pandemic', showarrow = True,
                  font = dict(family = 'monospace', size = 11, color = 'black'), arrowhead = 2, arrowsize = 1,
                  arrowwidth = 2, arrowcolor = '#636363', ax = 130, ay = 1)
fig.add_vrect(x0 = "2020-06-01", x1 = "2020-08-31", fillcolor = "yellow", opacity = 0.25, line_width = 0)
fig.add_annotation(x = "2020-07-15", y = 610, text = "Summer holidays", showarrow = False,
                   font = dict(family ='monospace', size = 11, color = 'black'))
fig.update_traces(line_width = 1)
fig.show()

#### Figure 13

In [None]:
cov_imp1 = pd.DataFrame(locale_access["locale"].unique().tolist()).rename(columns = {0:'locale'})


for i in ["mean_access", 'w1_access', 'w2_access', 'w3_access', 'w4_access','w5_access','w6_access','w7_access','w8_access','w9_access',
          'w10_access','w11_access','w12_access','mean_eng', 'w1_eng', 'w2_eng', 'w3_eng', 'w4_eng', 'w5_eng', 'w6_eng', 'w7_eng', 'w8_eng',
          'w9_eng', 'w10_eng', 'w11_eng', 'w12_eng']:

    cov_imp1[i] = 0.0
locales = cov_imp1['locale'].unique().tolist()
for i in locales:
    cov_imp1["mean_access"][locales.index(i)] = round(locale_access.query("time >= '2020-03-09' & time <= '2020-03-13' & locale == @i")['pct_access'].mean(), 2)
    cov_imp1["w1_access"][locales.index(i)] = round((locale_access.query("time >= '2020-03-16' & time <= '2020-03-20' & locale == @i")['pct_access'].mean()
                                                            /cov_imp1['mean_access'][locales.index(i)] - 1)*100, 1)
    cov_imp1["w2_access"][locales.index(i)] = round((locale_access.query("time >= '2020-03-23' & time <= '2020-03-27' & locale == @i")['pct_access'].mean()
                                                             /locale_access.query("time>= '2020-03-16' & time<= '2020-03-20' & locale == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp1["w3_access"][locales.index(i)] = round((locale_access.query("time >= '2020-03-30' & time <= '2020-04-03' & locale == @i")['pct_access'].mean()
                                                             /locale_access.query("time>= '2020-03-23' & time<= '2020-03-27' & locale == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp1["w4_access"][locales.index(i)] = round((locale_access.query("time >= '2020-04-06' & time <= '2020-04-10' & locale == @i")['pct_access'].mean()
                                                             /locale_access.query("time>= '2020-03-30' & time<= '2020-04-03' & locale == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp1["w5_access"][locales.index(i)] = round((locale_access.query("time >= '2020-04-13' & time <= '2020-04-17' & locale == @i")['pct_access'].mean()
                                                             /locale_access.query("time>= '2020-04-06' & time<= '2020-04-10' & locale == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp1["w6_access"][locales.index(i)] = round((locale_access.query("time >= '2020-04-20' & time <= '2020-04-24' & locale == @i")['pct_access'].mean()
                                                             /locale_access.query("time>= '2020-04-13' & time<= '2020-04-17' & locale == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp1["w7_access"][locales.index(i)] = round((locale_access.query("time >= '2020-04-27' & time <= '2020-05-01' & locale == @i")['pct_access'].mean()
                                                             /locale_access.query("time>= '2020-04-20' & time<= '2020-04-24' & locale == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp1["w8_access"][locales.index(i)] = round((locale_access.query("time >= '2020-05-04' & time <= '2020-05-08' & locale == @i")['pct_access'].mean()
                                                             /locale_access.query("time>= '2020-04-27' & time<= '2020-05-01' & locale == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp1["w9_access"][locales.index(i)] = round((locale_access.query("time >= '2020-05-11' & time <= '2020-05-15' & locale == @i")['pct_access'].mean()
                                                             /locale_access.query("time>= '2020-05-04' & time<= '2020-05-08' & locale == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp1["w10_access"][locales.index(i)] = round((locale_access.query("time >= '2020-05-18' & time <= '2020-05-22' & locale == @i")['pct_access'].mean()
                                                             /locale_access.query("time>= '2020-05-11' & time<= '2020-05-15' & locale == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp1["w11_access"][locales.index(i)] = round((locale_access.query("time >= '2020-05-25' & time <= '2020-05-29' & locale == @i")['pct_access'].mean()
                                                             /locale_access.query("time>= '2020-05-18' & time<= '2020-05-22' & locale == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp1["w12_access"][locales.index(i)] = round((locale_access.query("time >= '2020-06-01' & time <= '2020-06-05' & locale == @i")['pct_access'].mean()
                                                             /locale_access.query("time>= '2020-05-25' & time<= '2020-05-29' & locale == @i")['pct_access'].mean() - 1)*100, 1)
    
    cov_imp1["mean_eng"][locales.index(i)] = round(locale_eng.query("time >= '2020-03-09' & time <= '2020-03-13' & locale == @i")['engagement_index'].mean(), 1)
    cov_imp1["w1_eng"][locales.index(i)] = round((locale_eng.query("time >= '2020-03-16' & time <= '2020-03-20' & locale == @i")['engagement_index'].mean()
                                                            /cov_imp1['mean_eng'][locales.index(i)] - 1)*100, 1)
    cov_imp1["w1_eng"][locales.index(i)] = round((locale_eng.query("time >= '2020-03-23' & time <= '2020-03-27' & locale == @i")['engagement_index'].mean()
                                                             /locale_eng.query("time>= '2020-03-16' & time<= '2020-03-20' & locale == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp1["w3_eng"][locales.index(i)] = round((locale_eng.query("time >= '2020-03-30' & time <= '2020-04-03' & locale == @i")['engagement_index'].mean()
                                                             /locale_eng.query("time>= '2020-03-23' & time<= '2020-03-27' & locale == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp1["w4_eng"][locales.index(i)] = round((locale_eng.query("time >= '2020-04-06' & time <= '2020-04-10' & locale == @i")['engagement_index'].mean()
                                                             /locale_eng.query("time>= '2020-03-30' & time<= '2020-04-03' & locale == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp1["w5_eng"][locales.index(i)] = round((locale_eng.query("time >= '2020-04-13' & time <= '2020-04-17' & locale == @i")['engagement_index'].mean()
                                                             /locale_eng.query("time>= '2020-04-06' & time<= '2020-04-10' & locale == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp1["w6_eng"][locales.index(i)] = round((locale_eng.query("time >= '2020-04-20' & time <= '2020-04-24' & locale == @i")['engagement_index'].mean()
                                                             /locale_eng.query("time>= '2020-04-13' & time<= '2020-04-17' & locale == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp1["w7_eng"][locales.index(i)] = round((locale_eng.query("time >= '2020-04-27' & time <= '2020-05-01' & locale == @i")['engagement_index'].mean()
                                                             /locale_eng.query("time>= '2020-04-20' & time<= '2020-04-24' & locale == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp1["w8_eng"][locales.index(i)] = round((locale_eng.query("time >= '2020-05-04' & time <= '2020-05-08' & locale == @i")['engagement_index'].mean()
                                                             /locale_eng.query("time>= '2020-04-27' & time<= '2020-05-01' & locale == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp1["w9_eng"][locales.index(i)] = round((locale_eng.query("time >= '2020-05-11' & time <= '2020-05-15' & locale == @i")['engagement_index'].mean()
                                                             /locale_eng.query("time>= '2020-05-04' & time<= '2020-05-08' & locale == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp1["w10_eng"][locales.index(i)] = round((locale_eng.query("time >= '2020-05-18' & time <= '2020-05-22' & locale == @i")['engagement_index'].mean()
                                                             /locale_eng.query("time>= '2020-05-11' & time<= '2020-05-15' & locale == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp1["w11_eng"][locales.index(i)] = round((locale_eng.query("time >= '2020-05-25' & time <= '2020-05-29' & locale == @i")['engagement_index'].mean()
                                                             /locale_eng.query("time>= '2020-05-18' & time<= '2020-05-22' & locale == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp1["w12_eng"][locales.index(i)] = round((locale_eng.query("time >= '2020-06-01' & time <= '2020-06-05' & locale == @i")['engagement_index'].mean()
                                                             /locale_eng.query("time>= '2020-05-25' & time<= '2020-05-29' & locale == @i")['engagement_index'].mean() - 1)*100, 1)

    
    def color_values(val):
        color = 'red' if val < 0 else 'green'
        return 'color: %s' % color
    
    slice_ = ['w1_access', 'w2_access', 'w3_access', 'w4_access','w5_access','w6_access','w7_access','w8_access','w9_access','w10_access','w11_access','w12_access', 'w1_eng', 'w2_eng', 'w3_eng', 'w4_eng', 'w5_eng','w6_eng', 'w7_eng', 'w8_eng','w9_eng', 'w10_eng', 'w11_eng', 'w12_eng']
    slice_2 = ["mean_access", 'w1_access', 'w2_access', 'w3_access', 'w4_access','w5_access','w6_access','w7_access','w8_access','w9_access','w10_access','w11_access','w12_access']
    slice_3 = ['mean_eng', 'w1_eng', 'w2_eng', 'w3_eng', 'w4_eng', 'w5_eng','w6_eng', 'w7_eng', 'w8_eng','w9_eng', 'w10_eng', 'w11_eng', 'w12_eng']
    adp = cov_imp1.style.applymap(color_values, subset = slice_).set_precision(1).set_properties(**{'background-color': '#fafafa'}, subset = slice_2).set_properties(**{'background-color': '#f7f7f7'}, subset = slice_3)

In [None]:
   adp

#### Table 2

In [None]:
fig = px.line(cat_access.query("Basic_category != 'x' "), x = 'time', y = 'pct_access', color = 'Basic_category', line_group = 'Basic_category')
fig.update_layout(plot_bgcolor = 'white', title = 'Dynamics of pct_access of all products by product category',
                 title_font_family = 'monospace', title_font_color = '#221f1f', title_font_size = 20,
                 title_x = 0.5)
fig.update_yaxes(showline = True, linecolor = '#f5f2f2', showgrid = True, gridwidth = 1, gridcolor = '#f5f2f2', 
                 linewidth = 2, tickfont_family = 'monospace', tickfont_color = '#221f1f', tickfont_size = 12 )
fig.update_xaxes(showline = True, linecolor = '#f5f2f2', showgrid = True, gridwidth = 1, gridcolor = '#f5f2f2', 
                 linewidth = 2, tickfont_family = 'monospace', tickfont_color = '#221f1f', tickfont_size = 12 )
fig.add_vline(x = '2020-03-11', line_width = 3, line_color = 'red')
fig.add_annotation(x = '2020-03-11', y = 4.5, text = 'WHO has declared Covid-19 a pandemic', showarrow = True,
                  font = dict(family = 'monospace', size = 11, color = 'black'), arrowhead = 2, arrowsize = 1,
                  arrowwidth = 2, arrowcolor = '#636363', ax = 130, ay = 1)
fig.add_vrect(x0 = "2020-06-01", x1 = "2020-08-31", fillcolor = "yellow", opacity = 0.25, line_width = 0)
fig.add_annotation(x = "2020-07-15", y = 3.8, text = "Summer holidays", showarrow = False,
                   font = dict(family ='monospace', size = 11, color = 'black'))
fig.update_traces(line_width = 1)
fig.show()

#### Figure 14

In [None]:
fig = px.line(cat_eng.query("Basic_category != 'x' "), x = 'time', y = 'engagement_index', color = 'Basic_category', line_group = 'Basic_category')
fig.update_layout(plot_bgcolor = 'white', title = 'Dynamics of engagement_index of all products by product category',
                 title_font_family = 'monospace', title_font_color = '#221f1f', title_font_size = 20,
                 title_x = 0.5)
fig.update_yaxes(showline = True, linecolor = '#f5f2f2', showgrid = True, gridwidth = 1, gridcolor = '#f5f2f2', 
                 linewidth = 2, tickfont_family = 'monospace', tickfont_color = '#221f1f', tickfont_size = 12 )
fig.update_xaxes(showline = True, linecolor = '#f5f2f2', showgrid = True, gridwidth = 1, gridcolor = '#f5f2f2', 
                 linewidth = 2, tickfont_family = 'monospace', tickfont_color = '#221f1f', tickfont_size = 12 )
fig.add_vline(x = '2020-03-11', line_width = 3, line_color = 'red')
fig.add_annotation(x = '2020-03-11', y = 1900, text = 'WHO has declared Covid-19 a pandemic', showarrow = True,
                  font = dict(family = 'monospace', size = 11, color = 'black'), arrowhead = 2, arrowsize = 1,
                  arrowwidth = 2, arrowcolor = '#636363', ax = 130, ay = 1)
fig.add_vrect(x0 = "2020-06-01", x1 = "2020-08-31", fillcolor = "yellow", opacity = 0.25, line_width = 0)
fig.add_annotation(x = "2020-07-15", y = 1600, text = "Summer holidays", showarrow = False,
                   font = dict(family ='monospace', size = 11, color = 'black'))
fig.update_traces(line_width = 1)
fig.show()

#### Figure 15

In [None]:
cov_imp2 = pd.DataFrame(cat_eng.query("Basic_category != 'x'")["Basic_category"].unique().tolist()).rename(columns = {0:'Basic_category'})


for i in ["mean_access", 'w1_access', 'w2_access', 'w3_access', 'w4_access','w5_access','w6_access','w7_access','w8_access','w9_access',
          'w10_access','w11_access','w12_access','mean_eng', 'w1_eng', 'w2_eng', 'w3_eng', 'w4_eng', 'w5_eng', 'w6_eng', 'w7_eng', 'w8_eng',
          'w9_eng', 'w10_eng', 'w11_eng', 'w12_eng']:
    cov_imp2[i] = 0.0
    
categories = cov_imp2['Basic_category'].unique().tolist()
for i in categories:
    cov_imp2["mean_access"][categories.index(i)] = round(cat_access.query("time >= '2020-03-09' & time <= '2020-03-13' & Basic_category == @i")['pct_access'].mean(), 2)
    cov_imp2["w1_access"][categories.index(i)] = round((cat_access.query("time >= '2020-03-16' & time <= '2020-03-20' & Basic_category == @i")['pct_access'].mean()
                                                            /cov_imp2['mean_access'][categories.index(i)] - 1)*100, 1)
    cov_imp2["w2_access"][categories.index(i)] = round((cat_access.query("time >= '2020-03-23' & time <= '2020-03-27' & Basic_category == @i")['pct_access'].mean()
                                                             /cat_access.query("time>= '2020-03-16' & time<= '2020-03-20' & Basic_category == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp2["w3_access"][categories.index(i)] = round((cat_access.query("time >= '2020-03-30' & time <= '2020-04-03' & Basic_category == @i")['pct_access'].mean()
                                                             /cat_access.query("time>= '2020-03-23' & time<= '2020-03-27' & Basic_category == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp2["w4_access"][categories.index(i)] = round((cat_access.query("time >= '2020-04-06' & time <= '2020-04-10' & Basic_category == @i")['pct_access'].mean()
                                                             /cat_access.query("time>= '2020-03-30' & time<= '2020-04-03' & Basic_category == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp2["w5_access"][categories.index(i)] = round((cat_access.query("time >= '2020-04-13' & time <= '2020-04-17' & Basic_category == @i")['pct_access'].mean()
                                                             /cat_access.query("time>= '2020-04-06' & time<= '2020-04-10' & Basic_category == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp2["w6_access"][categories.index(i)] = round((cat_access.query("time >= '2020-04-20' & time <= '2020-04-24' & Basic_category == @i")['pct_access'].mean()
                                                             /cat_access.query("time>= '2020-04-13' & time<= '2020-04-17' & Basic_category == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp2["w7_access"][categories.index(i)] = round((cat_access.query("time >= '2020-04-27' & time <= '2020-05-01' & Basic_category == @i")['pct_access'].mean()
                                                             /cat_access.query("time>= '2020-04-20' & time<= '2020-04-24' & Basic_category == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp2["w8_access"][categories.index(i)] = round((cat_access.query("time >= '2020-05-04' & time <= '2020-05-08' & Basic_category == @i")['pct_access'].mean()
                                                             /cat_access.query("time>= '2020-04-27' & time<= '2020-05-01' & Basic_category == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp2["w9_access"][categories.index(i)] = round((cat_access.query("time >= '2020-05-11' & time <= '2020-05-15' & Basic_category == @i")['pct_access'].mean()
                                                             /cat_access.query("time>= '2020-05-04' & time<= '2020-05-08' & Basic_category == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp2["w10_access"][categories.index(i)] = round((cat_access.query("time >= '2020-05-18' & time <= '2020-05-22' & Basic_category == @i")['pct_access'].mean()
                                                             /cat_access.query("time>= '2020-05-11' & time<= '2020-05-15' & Basic_category == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp2["w11_access"][categories.index(i)] = round((cat_access.query("time >= '2020-05-25' & time <= '2020-05-29' & Basic_category == @i")['pct_access'].mean()
                                                             /cat_access.query("time>= '2020-05-18' & time<= '2020-05-22' & Basic_category == @i")['pct_access'].mean() - 1)*100, 1)
    cov_imp2["w12_access"][categories.index(i)] = round((cat_access.query("time >= '2020-06-01' & time <= '2020-06-05' & Basic_category == @i")['pct_access'].mean()
                                                             /cat_access.query("time>= '2020-05-25' & time<= '2020-05-29' & Basic_category == @i")['pct_access'].mean() - 1)*100, 1)
    
    cov_imp2["mean_eng"][categories.index(i)] = round(cat_eng.query("time >= '2020-03-09' & time <= '2020-03-13' & Basic_category == @i")['engagement_index'].mean(), 1)
    cov_imp2["w1_eng"][categories.index(i)] = round((cat_eng.query("time >= '2020-03-16' & time <= '2020-03-20' & Basic_category == @i")['engagement_index'].mean()
                                                            /cov_imp2['mean_eng'][categories.index(i)] - 1)*100, 1)
    cov_imp2["w1_eng"][categories.index(i)] = round((cat_eng.query("time >= '2020-03-23' & time <= '2020-03-27' & Basic_category == @i")['engagement_index'].mean()
                                                             /cat_eng.query("time>= '2020-03-16' & time<= '2020-03-20' & Basic_category == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp2["w3_eng"][categories.index(i)] = round((cat_eng.query("time >= '2020-03-30' & time <= '2020-04-03' & Basic_category == @i")['engagement_index'].mean()
                                                             /cat_eng.query("time>= '2020-03-23' & time<= '2020-03-27' & Basic_category == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp2["w4_eng"][categories.index(i)] = round((cat_eng.query("time >= '2020-04-06' & time <= '2020-04-10' & Basic_category == @i")['engagement_index'].mean()
                                                             /cat_eng.query("time>= '2020-03-30' & time<= '2020-04-03' & Basic_category == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp2["w5_eng"][categories.index(i)] = round((cat_eng.query("time >= '2020-04-13' & time <= '2020-04-17' & Basic_category == @i")['engagement_index'].mean()
                                                             /cat_eng.query("time>= '2020-04-06' & time<= '2020-04-10' & Basic_category == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp2["w6_eng"][categories.index(i)] = round((cat_eng.query("time >= '2020-04-20' & time <= '2020-04-24' & Basic_category == @i")['engagement_index'].mean()
                                                             /cat_eng.query("time>= '2020-04-13' & time<= '2020-04-17' & Basic_category == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp2["w7_eng"][categories.index(i)] = round((cat_eng.query("time >= '2020-04-27' & time <= '2020-05-01' & Basic_category == @i")['engagement_index'].mean()
                                                             /cat_eng.query("time>= '2020-04-20' & time<= '2020-04-24' & Basic_category == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp2["w8_eng"][categories.index(i)] = round((cat_eng.query("time >= '2020-05-04' & time <= '2020-05-08' & Basic_category == @i")['engagement_index'].mean()
                                                             /cat_eng.query("time>= '2020-04-27' & time<= '2020-05-01' & Basic_category == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp2["w9_eng"][categories.index(i)] = round((cat_eng.query("time >= '2020-05-11' & time <= '2020-05-15' & Basic_category == @i")['engagement_index'].mean()
                                                             /cat_eng.query("time>= '2020-05-04' & time<= '2020-05-08' & Basic_category == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp2["w10_eng"][categories.index(i)] = round((cat_eng.query("time >= '2020-05-18' & time <= '2020-05-22' & Basic_category == @i")['engagement_index'].mean()
                                                             /cat_eng.query("time>= '2020-05-11' & time<= '2020-05-15' & Basic_category == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp2["w11_eng"][categories.index(i)] = round((cat_eng.query("time >= '2020-05-25' & time <= '2020-05-29' & Basic_category == @i")['engagement_index'].mean()
                                                             /cat_eng.query("time>= '2020-05-18' & time<= '2020-05-22' & Basic_category == @i")['engagement_index'].mean() - 1)*100, 1)
    cov_imp2["w12_eng"][categories.index(i)] = round((cat_eng.query("time >= '2020-06-01' & time <= '2020-06-05' & Basic_category == @i")['engagement_index'].mean()
                                                             /cat_eng.query("time>= '2020-05-25' & time<= '2020-05-29' & Basic_category == @i")['engagement_index'].mean() - 1)*100, 1)

    
    def color_values(val):
        color = 'red' if val < 0 else 'green'
        return 'color: %s' % color
    
    slice_ = ['w1_access', 'w2_access', 'w3_access', 'w4_access','w5_access','w6_access','w7_access','w8_access','w9_access','w10_access','w11_access','w12_access', 'w1_eng', 'w2_eng', 'w3_eng', 'w4_eng', 'w5_eng','w6_eng', 'w7_eng', 'w8_eng','w9_eng', 'w10_eng', 'w11_eng', 'w12_eng']
    slice_2 = ["mean_access", 'w1_access', 'w2_access', 'w3_access', 'w4_access','w5_access','w6_access','w7_access','w8_access','w9_access','w10_access','w11_access','w12_access']
    slice_3 = ['mean_eng', 'w1_eng', 'w2_eng', 'w3_eng', 'w4_eng', 'w5_eng','w6_eng', 'w7_eng', 'w8_eng','w9_eng', 'w10_eng', 'w11_eng', 'w12_eng']
    aaa = cov_imp2.style.applymap(color_values, subset = slice_).set_precision(1).set_properties(**{'background-color': '#fafafa'}, subset = slice_2).set_properties(**{'background-color': '#f7f7f7'}, subset = slice_3)                                        

In [None]:
aaa

#### Table 3

# Conclusion
Upon declaring covid-19 pandemic, most students, teachers and users of educational facilities and materials didn’t immediately engage in digital learning, but after a week. Right after the third week users went offline on digital learning and this mostly occurred in the month of May 2020 probably due to conspiracy theorists claim of covid-19 not being real and most schools were on summer holidays.
 
After summer holidays users of educational facilities and materials were actively engaged in digital learning as can be evident from figures 12 to 15 above from middle of August 2020 onwards. There is an increase in pct_access and engagement_index of products by most states, locale and ofprimary function of the products

Covid -19 has urged and encouraged us to make good use of technology in education and from this point onwards digital learning will become a major and an integral part of the educational system across states and locale.
