In [None]:
import pandas as pd
import numpy as np
import datetime
import glob
import copy

from plotly import subplots
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objects as go
import plotly.express as px

import gc

import warnings  
warnings.filterwarnings('ignore')
init_notebook_mode(connected=True)

from IPython.core.display import display, HTML, Javascript


# define the colour palette to be used
palette_darkgrey = "#383C45"
palette_silver = "#A2A5A9"
palette_green = "#4DC000"
palette_blue = "#278BD3"
palette_platinum = "#E3E4E5"

palette_grey2 = "#676A6C"
palette_grey3 = "#959894"
palette_grey4 = "#C4C5BB"

# dictionary to map state names to their abbreviation. Credits @rogerallen on github
us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}


'''
function annotation_helper(...)

Helper for annotations in plotly, while reducing the amount of code in the block, it also:
- Allows us to provide the text into an array of 
  strings(one for each new line) instead of one really long <br> separated text param
- Provides basic functionality for line spacing(s) between each line
'''

def annotation_helper(fig, texts, x, y, line_spacing, align="left", bgcolor="rgba(0,0,0,0)", borderpad=0, ref="axes", width=100):
    
    is_line_spacing_list = isinstance(line_spacing, list)
    total_spacing = 0
    
    for index, text in enumerate(texts):
        if is_line_spacing_list and index!= len(line_spacing):
            current_line_spacing = line_spacing[index]
        elif not is_line_spacing_list:
            current_line_spacing = line_spacing
        
        fig.add_annotation(dict(
            x= x,
            y= y - total_spacing,
            width = width,
            showarrow=False,
            text= text,
            bgcolor= bgcolor,
            align= align,
            borderpad=4,
            xref= "x" if ref=="axes" else "paper",
            yref= "y" if ref=="axes" else "paper"
        ))
        
        total_spacing  += current_line_spacing

# Helper functions to assign the values for high_hispanic_black_pct        
def assign_high_hispanic_black_pct(x):
    pct = x["pct_black_hispanic"]
    if pct >= 0.4:
        return True
    elif pct < 0.4:
        return False
    else:
        return np.NaN
    
def assign_high_hispanic_black_pct_categorical(x):
    pct = x["pct_black/hispanic"]
    if pct in ["[0.4, 0.6[", "[0.6, 0.8[", "[0.8, 1["]:
        return True
    elif pct in  ["[0, 0.2[", "[0.2, 0.4["]:
        return False
    else:
        return np.NaN


In [None]:
# concat the district-wise engagement data from all files
# code excerpt to merge the district-wise data from Ruchi Bhatia - "üò∑COVID-19 Impact on Digital Learningüíª: EDA + W&B"

path = '../input/learnplatform-covid19-impact-on-digital-learning/engagement_data/' 
all_files = glob.glob(path + "/*.csv")

engagement_data = []

for filename in all_files:
    district_data = pd.read_csv(filename, index_col=None, header=0)
    district_id = filename.split("/")[4].split(".")[0]
    district_data["district_id"] = district_id
    engagement_data.append(district_data)

engagement_df = pd.concat(engagement_data)
engagement_df["district_id"] = engagement_df["district_id"].astype(int)

# read the district and product info files
districts_info = pd.read_csv("../input/learnplatform-covid19-impact-on-digital-learning/districts_info.csv")
products_info = pd.read_csv("../input/learnplatform-covid19-impact-on-digital-learning/products_info.csv")

engagement_districts_merged = pd.merge(engagement_df, districts_info,how="left", on="district_id")
engagement_districts_products_merged = pd.merge(engagement_districts_merged, products_info, how="left", left_on="lp_id", right_on="LP ID")

# dropping all engagement info without a state
engagement_full = engagement_districts_products_merged[engagement_districts_products_merged["state"].notnull()]

# garbage collection to free up space
del engagement_df
del engagement_districts_merged
del engagement_districts_products_merged
gc.collect()

In [None]:
nces_data = pd.read_csv("../input/nces-public-school-characteristics-20182019/Public_School_Characteristics_2018-19.csv")
nces_df = nces_data[ ["OBJECTID","STABR","NMCNTY","LEAID","TOTFRL","FRELCH","REDLCH","TOTAL","HI","BL","WH","STUTERATIO","CHARTER_TEXT","MAGNET_TEXT"] ]

nces_df["pct_black_hispanic"] = (nces_df["HI"] + nces_df["BL"]) / nces_df["TOTAL"]
nces_df["pct_free_reduced_lunch"] = nces_df["TOTFRL"] / nces_df["TOTAL"]

# nces_df["high_pct_black_hispanic"] = (nces_df["pct_black_hispanic"] >= 0.4) # Previous method that would incorrectly label nulls as False. Redone next
nces_df["high_pct_black_hispanic"] = nces_df.apply(lambda x: assign_high_hispanic_black_pct(x), axis=1)

# The Learning Gap
In early 2020 the spread of the pandemic prompted the closing of schools in the US, with many to all shifting their focus to online learning. Students and teachers would utilise an online mode of learning to make up for the learning loss due lack of in-person teaching.

With this shift to remote learning, students from lower-income families are being left behind because they dont have acess to high-quality remote education or an environment conducive for online learning - including a quite space without distractions, devices that they do not need to share, high speed internet and parental/academic supervision. This has lead to the further widening of the already existing learning gap for these students - a group which is arguably the most in need of quality learning.

In this notebook we try to leverage LearnPlatform's student engagement data to better evaluate this learning gap and to understand how it is prevalent even with the shift to online learning.

## Engagement Gap by Racial Diversity
LearnPlatform provides us with information relating to the page load activity of students on a school district level and has supplemented this data with details about the district(racial breakdown, per student spending, etc.) and the product for which the engagement is recorded. 

So we start by looking at a broad overview of how the average engagement varied based on the percentage of black or hispanic students in the district.
From past trends one might expect that school districts with higher **white populations might have better quailty of remote education**, while those with higher black and hispanic populations might suffer with the shift to digital learning platforms.

<p class="sidenote"> Source: LearnPlatform COVID-19 Impact on Digital Learning
    <br>
    <span style="font-style:italic">note: engagement data which doesn't belong to any state is excluded. Data is plotted with a 14 day average.</span> 
</p>

In [None]:
engagement_by_hispanic_black = engagement_full[engagement_full["state"].notnull()].groupby(["pct_black/hispanic","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 800,
    height = 550,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_02_04= engagement_by_hispanic_black[engagement_by_hispanic_black["pct_black/hispanic"]=="[0.2, 0.4["]
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_by_hispanic_black[engagement_by_hispanic_black["pct_black/hispanic"]=="[0.4, 0.6["]
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_by_hispanic_black[engagement_by_hispanic_black["pct_black/hispanic"]=="[0.6, 0.8["]
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

engagement_08_10= engagement_by_hispanic_black[engagement_by_hispanic_black["pct_black/hispanic"]=="[0.8, 1["]
fig.add_trace(go.Scatter(
                    x= engagement_08_10["time"],
                    y= engagement_08_10["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_darkgrey, width=1.8),
                    name='80-100%'))

engagement_00_02= engagement_by_hispanic_black[engagement_by_hispanic_black["pct_black/hispanic"]=="[0, 0.2["]


# draws the filled in learning gap
fig.add_trace(go.Scatter(
                    x=engagement_00_02["time"], 
                    y= engagement_00_02["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color="#ccc", width=0),
                    fill="tonexty",                                                                                              
                    hoverinfo='none', 
))

fig.add_trace(go.Scatter(
                    x=engagement_00_02["time"], 
                    y= engagement_00_02["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     0-20%'))



text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_darkgrey),
    "<b>Hispanic/black students </b>",
    "8 districts" 
]

annotation_helper(fig, text, datetime.date(2020, 11, 20), 440, [25,30], width=200)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>Hispanic/black </b>" % (palette_green),
    "116 districts" 
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 150, [25,30])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
]

annotation_helper(fig, text, datetime.date(2020, 11, 5), 175, [25,30], width=50, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>average engagement index</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 3, 15), 520, [35], width=200)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Did districts with students of color engage less?</span>", 
    "<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'> Districts with more hispanic/black students</b> seem to engage more, contrary to our expectation. </span>" % (palette_darkgrey) ,
    "<span style='font-size:13px; font-family:Helvetica'> This result could skewed due to a smaller sample size in these districts,ie 8 compared to the 116 </span>",
    "<span style='font-size:13px; font-family:Helvetica'> in <b style='color:%s'>white-dominated school districts</b>. </span>" % (palette_green)
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=600)

fig.show()

We see that **districts up till the 80% black/hispanic student mark are line with our intuition** that the greater the black or hispanic population, the lower the online engagement. However contrary to this trend, students in the category with the highest percentage of minority race students had the highest engaement scores. We will look into some of the reasons for the same later on.

### Reasons for anomalies in 80-100% hispanic/black group

Simply exploring the reason for these results uncovers several underlying issues in our dataset. Simply plotting the average engagements of this group might not immediately reveal an accurate picture of their online learning, and we will dive deeper into the data and look at some of the reasons behind why we get these results:

### 1. Outliers in the district data
When we look into districts with higher percentages of hispanic or black students we find a few cases like those of New York and Arizona, which have **unusally higher engagement** records. These districts seem to bias our 80-100% hispanic black group in our previous chart, especially since it has only 8 districts in total.


<p class="sidenote"> Source: LearnPlatform COVID-19 Impact on Digital Learning
    <br>
    <span style="font-style:italic">note: Districts without any state are excluded. Data is plotted with a 14 day average.</span> 
</p>

In [None]:
engagement_grouped = engagement_full.groupby(["district_id","time"])["engagement_index"].mean().reset_index()
engagement_rolling = engagement_grouped.groupby("district_id")["engagement_index"].rolling(14).mean().reset_index()

engagement_grouped = engagement_grouped.join(engagement_rolling["engagement_index"], rsuffix="_rolling")
engagement_by_district = pd.merge(engagement_grouped, districts_info,how="left", on="district_id")

districts = districts_info[districts_info["state"].notnull()]["district_id"].unique()

fig = subplots.make_subplots(
    rows=1, 
    cols=2, 
    horizontal_spacing = 0.1, 
    shared_yaxes=True
)

for district_id in districts:
    
    district_df = engagement_by_district[engagement_by_district["district_id"]== district_id]
    
    state = district_df["state"].iloc[0]
    
    color = "#bbb";
    opacity = 0.6;
    
    engagement = district_df["engagement_index"].rolling(moving_average_window).mean()
    
    if not district_df["state"].isnull().values.any():
        trace = go.Scatter(
            x= district_df["time"],
            y= engagement,
            mode='lines', 
            opacity= opacity,
            line= dict(color=color, width=1),
            customdata = district_df[["state","district_id"]].join(engagement),
            hovertemplate = '<b>%{customdata[0]}</b><br>district id: %{customdata[1]}<br>Avg. engagement: %{customdata[2]:.0f}',
            name= ""
        )
        
    if state=="New York" or state=="Arizona":
        
        trace_highlighted = go.Scatter(
            x= district_df["time"],
            y= engagement,
            mode='lines', 
            opacity= 1,
            line= dict(color=palette_darkgrey, width=1.3),
            customdata = district_df[["state","district_id"]].join(engagement),
            hovertemplate = '<b>%{customdata[0]}</b><br>district id: %{customdata[1]}<br>Avg. engagement: %{customdata[2]:.0f}',
            name= ""
        )
        
        if state == "New York":
            fig.append_trace(trace_highlighted,1,1);
            fig.append_trace(trace,1,2);
        else:
            fig.append_trace(trace,1,1);
            fig.append_trace(trace_highlighted,1,2);
        
    else:    
        fig.append_trace(trace,1,1);
        fig.append_trace(trace,1,2);


large_title_format = "<span style='font-size:28px; font-family:Times New Roman'>Engagement outliers</span>"
small_title_format = "<span style='font-size:14px; font-family:Helvetica'>Average engagement index of each district, highlighting those in NY (left) <br>and AZ (right) with <b>higher share of black and hispanic students</b>."


layout = dict(
    title = large_title_format + "<br>" + small_title_format,
    showlegend = False,
    margin = dict(t=160,pad=0),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M3",tickformat="%b\n%Y"),
    xaxis2 = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M3",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    yaxis2 = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    plot_bgcolor='#fff',
    height = 400,
    width = 700
)

# New York has a hispanic/black dominant district which performs abnormally better than all other districts
text = [
    "<span style='font-family:Tahoma; font-size:12px'><b>Hispanic/black district </b> </span>",
    "<span style='font-family:Tahoma; font-size:12px'>in New York that performs </span>",
    "<span style='font-family:Tahoma; font-size:12px'><b>unusally better</b> </span>",
]

annotation_helper(fig, text, 0.17, 1., [0.075,0.075], width=250, ref="paper")
fig.add_shape(
    type="line",xref="paper", yref="paper",
    x0=0.17, x1=0.122,
    y0=0.91, y1=0.91,
    line=dict( color= palette_grey2, width=1),
)


# Arizona has only 1 district which has a massive peak in August
text = [
    "<span style='font-family:Tahoma; font-size:12px'><b>Arizona</b> has only one</span>",
    "<span style='font-family:Tahoma; font-size:12px'>district, which has a </span>",
    "<span style='font-family:Tahoma; font-size:12px'><b>sudden growth</b> in August </span>",
]

annotation_helper(fig, text, 1.2, 1.02, [0.075,0.075], width=250, ref="paper")
fig.add_shape(
    type="line",xref="paper", yref="paper",
    x0=0.842, x1=0.842,
    y0=0.755, y1=0.505,
    line=dict( color= palette_grey2, width=1),
)

fig['layout'].update(layout)
iplot(fig)

Due to working with an incomplete set of school districts it is hard to tell whether these two highlighted districts just happen to be very high engagement districts or if districts in NY and AZ just have higher engagements in general. Currently **Arizona** (with only 1 district) **has the highest average engagement** among all the states followed by **New York**( 8 districts) **in the second** spot.

### 2. State-wise data
In our initial graph we simply averaged across data from all states. This worked well for a broad overview but we have to consider that **not all states can be treated the same** - districts in different states may not always be comparable to one another due to the general quality of education in a particular state, school shutdown policies, differences in COVID case numbers/deaths, etc. So now we look at the same data, but only grouping the districts in each state


<p class="sidenote"> Source: LearnPlatform COVID-19 Impact on Digital Learning
    <br>
    <span style="font-style:italic">note: Data is aggregated on weekly basis, of which we plot the data with a moving average. We also group data with &lt40% and >=40% black/hispanic students separately to get a better visual of the learning gap in each state. <br>Only selected states shown below. </span> 
</p>

In [None]:
states = ["Illinois", "California", "Connecticut", "Virginia", "Massachusetts", "Ohio"]

state_counts = {
    "Illinois": {"higher": 5, "lower": 13},
    "California": {"higher": 6, "lower": 6},
    "Connecticut": {"higher": 5, "lower": 25},
    "Virginia": {"higher": 1, "lower": 3},
    "Massachusetts": {"higher": 1, "lower": 20},
    "Ohio": {"higher": 1, "lower": 10},
}


weeks_window_size = 3

fig = subplots.make_subplots(
    rows=3, 
    cols=2, 
    horizontal_spacing = 0.075, 
    vertical_spacing = 0.18, 
    shared_yaxes = True,
    subplot_titles = ["  "]*6,
 )


for index, state in enumerate(states):
    
    state_df = engagement_full[engagement_full["state"] == state]
    
    state_df["high_pct_black_hispanic"] = state_df.apply(lambda x: assign_high_hispanic_black_pct_categorical(x), axis=1)
    state_df["time"] = pd.to_datetime(state_df["time"])
    
    color = "#bbb";
    opacity = 0.6;
    
    low_pct_black_hispanic = state_df[state_df["high_pct_black_hispanic"]== False].resample('W', on='time')['time','engagement_index'].mean()
    trace2 = go.Scatter(
                    x= low_pct_black_hispanic.index, 
                    y= low_pct_black_hispanic["engagement_index"].rolling(weeks_window_size, center=True).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=1.7),
                    name='Low blk/hisp',
                    showlegend= False
    )
    
    trace_learning_gap = go.Scatter(
                    x= low_pct_black_hispanic.index,
                    y= low_pct_black_hispanic["engagement_index"].rolling(weeks_window_size, center=True).mean(),
                    mode='lines',
                    line= dict(color= palette_grey3, width=0),
                    fill="tonexty", 
#                     name='learning gap',
                    showlegend= False,
                    hoverinfo= "none"
    )
    
    high_pct_black_hispanic = state_df[state_df["high_pct_black_hispanic"]==True].resample('W', on='time')['time','engagement_index'].mean()
    trace1 = go.Scatter(
                    x= high_pct_black_hispanic.index, 
                    y= high_pct_black_hispanic["engagement_index"].rolling(weeks_window_size, center=True).mean(),
                    mode='lines',
                    line= dict(color=palette_darkgrey, width=1.5),
                    name='High blk/hisp',
                    showlegend= False
    )
        
    fig.append_trace(trace1, index//2  + 1, index % 2 + 1);
    fig.append_trace(trace_learning_gap, index//2  + 1, index % 2 + 1);
    fig.append_trace(trace2, index//2  + 1, index % 2 + 1);
    

fig.add_trace(go.Bar(x=[low_pct_black_hispanic.index[0]], y=[low_pct_black_hispanic], marker=dict(color="#ccc"), name="Learning Gap", visible="legendonly"))
fig.add_trace(go.Bar(x=[low_pct_black_hispanic.index[0]], y=[low_pct_black_hispanic], marker=dict(color=palette_green), name="< 40% students are black/hispanic", visible="legendonly"))
fig.add_trace(go.Bar(x=[low_pct_black_hispanic.index[0]], y=[low_pct_black_hispanic], marker=dict(color="#000"), name=">= 40% students are black/hispanic", visible="legendonly"), 1, 1)


large_title_format = "<span style='font-size:28px; font-family:Times New Roman'>State-wise engagement trends</span>"
small_title_format = "<span style='font-size:14px; font-family:Helvetica'> We see that most states show a marked learning gap with <b style='color:%s'>white-dominated districts</b> leading the pack</span>" % (palette_green)

layout = dict(
    title = large_title_format + "<br>" + small_title_format,
    legend = dict(
        orientation="h",
        traceorder="reversed",
        yanchor="top",
        y=1.250,
        font=dict(family="Helvetica", size=14, color="rgba(0,0,0,100)"),
        bgcolor = 'rgba(255,255,255,100)',
        xanchor="left",
        x=0.4,
    ),

    margin = dict(t=350, b=100, pad=0),
    
    xaxis  = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M2",tickformat="%b\n%Y"),
    xaxis2 = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M2",tickformat="%b\n%Y"),
    xaxis3 = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M2",tickformat="%b\n%Y"),
    xaxis4 = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M2",tickformat="%b\n%Y"),
    xaxis5 = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M2",tickformat="%b\n%Y"),
    xaxis6 = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M2",tickformat="%b\n%Y"),
    
    yaxis1 = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey, range=[0,410]),
    yaxis2 = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey, range=[0,410]),
    yaxis3 = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey, range=[0,410]),
    yaxis4 = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey, range=[0,410]),
    yaxis5 = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey, range=[0,410]),
    yaxis6 = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey, range=[0,410]),
    
    hovermode = "x unified",

    plot_bgcolor='#fff',
    height= 1000,
    width = 750
)

for index, annotation in enumerate(states):
    state = states[index]
    text = [
        "<span style='font-size:15px; font-family: Helvetica; font-weight: 600'>" + state + "</span>",
        "<span style='font-size:11px; font-style: italic'>(n_lower=" + str(state_counts[state]["lower"]) + ", n_higher=" + str(state_counts[state]["higher"]) + ")</span>"
    ]

    x = [0.01, 0.9][index % 2]
    y = [1.065, 0.665, 0.24][index// 2]
    
    annotation_helper(fig, text, x, y, [0.03], width=200, ref="paper")

text = [
    "<span style='font-size:12px; font-family: Tahoma;'>Where <b>n_higher</b> = num. districts with more than 40%, </span>",
    "<span style='font-size:12px; font-family: Tahoma;'>           <b>n_lower</b> = num. districts with less than 40% hispanic/black students </span>"
]

annotation_helper(fig, text, 0.00, -0.125, [0.025], width=400, ref="paper")


fig['layout'].update(layout)
iplot(fig)

While I picked some of the cases where the learning gap was more pronounced, it was a similar pattern that would be seen across almost all states. There seems to be a trend of **lower engagement by districts with higher black and hispanic populations** across the board.


Grouping the race data in the above manner makes the learning gap **easier to visualise** - we are no longer working with 5 different race categories which might be difficult to compare all five at the same time. Additionally it also **increases the sample size** on either side of the divide helping us deal with oddities due to smaller sample sizes for the high hispanic/black districts. For these reasons you will see this grouping quite a bit in this analysis.

In fact when we use this treatment on the first graph in this notebook, we get the following:

In [None]:
engagement_by_hispanic_black = engagement_full[(engagement_full["state"].notnull()) & (engagement_full["pct_black/hispanic"].notnull())]
engagement_by_hispanic_black["high_pct_black_hispanic"] = engagement_by_hispanic_black["pct_black/hispanic"].isin(["[0.4, 0.6[", "[0.6, 0.8[", "[0.8, 1["])

engagement_by_hispanic_black = engagement_by_hispanic_black.groupby(["high_pct_black_hispanic","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey, range=[0,255]),
    showlegend = False,
    hovermode="x unified",
    width = 800,
    height = 550,
    plot_bgcolor= "#fff",
    hoverlabel = dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_08_10= engagement_by_hispanic_black[engagement_by_hispanic_black["high_pct_black_hispanic"]== True ]
fig.add_trace(go.Scatter(
                    x= engagement_08_10["time"],
                    y= engagement_08_10["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_darkgrey, width=1.5),
                    name='80-100%'))

# draws the filled in learning gap
engagement_00_02= engagement_by_hispanic_black[engagement_by_hispanic_black["high_pct_black_hispanic"]== False ]
fig.add_trace(go.Scatter(
                    x=engagement_00_02["time"], 
                    y= engagement_00_02["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color="#ccc", width=0),
                    fill="tonexty",                                                                                              
                    hoverinfo='none', 
))

fig.add_trace(go.Scatter(
                    x=engagement_00_02["time"], 
                    y= engagement_00_02["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=2),
                    name='     0-20%'))


text = [
    "<span style='font-family:Tahoma; font-size:12px'>Districts with</span>",
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-100%%</b>" % (palette_darkgrey),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>Hispanic/black </b>" % (palette_darkgrey),
    "<span style='font-family:Tahoma; font-size:11px'>show lower engagement</span>",
    "<span style='font-family:Tahoma; font-size:11px'>for most of the year.</span>" 
]
annotation_helper(fig, text, datetime.date(2020, 10, 28), 100, [11,9,12,11], align="right", width=200)


text = [
    "<span style='font-family:Tahoma; font-size:12px'>Districts with</span>",
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-40%%</b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>Hispanic/black</b>" % (palette_green),
    "<span style='font-family:Tahoma; font-size:11px'>Perform better on average.</span>" 
]
annotation_helper(fig, text, datetime.date(2020, 7, 15), 200, [11,9,12], width=150)


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>average engagement index</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day average</span>" % palette_grey2,
]
annotation_helper(fig, text, datetime.date(2020, 3, 5), 255, [10], width=200)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Alternate grouping of Black/Hispanic percent</span>", 
    "<span style='font-size:13px; font-family:Helvetica'> When grouping the race data as mentioned earlier, we see how <b style='color:%s'>white-dominated</b></span>" % (palette_green) ,
    "<span style='font-size:13px; font-family:Helvetica'> <b style='color:%s'>districts</b> have higher engagement values when compared with <b style='color:%s'>black/hispanic</b></span>" % (palette_green, palette_darkgrey), 
    "<span style='font-size:13px; font-family:Helvetica'> <b style='color:%s'>majority school districts</b>.</span>" % (palette_darkgrey) 
]
annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=600)

fig.show()

### 3. Distribution of our data
The competition data includes engagement data on 233 districts across 23 states. In and of itself this is a large dataset and an invaluable source of student engagement data. 

But one has to understand the US has approximately 13,800 school districts spread over 52 state/jurisdictions. Since we're working with an incomplete dataset of the education landscape in the US, its possible that our specific selection of districts might affect the analysis. So when comparing districts across different diversity groups, it makes sense to check if the data is coming from similar types of locales. 

<p class="sidenote"> Source: LearnPlatform COVID-19 Impact on Digital Learning - District info
    <br>
    <span style="font-style:italic">note: Data only includes districts whose locale data is present.</span> 
</p>

In [None]:
counts = districts_info[districts_info["locale"].notnull()].groupby(["locale","pct_black/hispanic"])["pct_black/hispanic"].count()

x_labels = counts["City"].keys().tolist()
totals = {}

for group in x_labels:
    totals[group] = districts_info[districts_info["pct_black/hispanic"] == group]["district_id"].count()

locales = ["Rural", "Town", "Suburb", "City"]


fig = go.Figure()

for index,locale in enumerate(locales):
    y = counts[locale].astype("float")
    bar_widths = []
    color = [palette_grey2, palette_grey3, palette_platinum, palette_green][3 - index]
    
    for key in y.keys():
        y[key] = (y[key] / totals[key])       
        bar_widths.append(0.5  * ( totals[key] / np.sum(list(totals.values())) ) )
    
    trace = go.Bar(
        name= locale, 
        x= np.cumsum(bar_widths)-np.array(bar_widths)/2, 
        y= y, 
        width=bar_widths,
        marker_color= color,
        marker_line_color = "#fefefe",
        marker_line_width = 1.5,
        hoverinfo = "none",
#         offset=0
    )
    fig.add_trace(trace)


large_title_format = "<span style='font-size:28px; font-family:Times New Roman'>Makeup of each diversity group</span>"
small_title_format = "<span style='font-size:14px; font-family:Helvetica'>Data on districts with higher hispanic or black populations include <b>almost no <b style='color:%s'>town</b> or <b style='color:%s'>rural</b> locales</b></span>" % (palette_grey4, palette_green)

labels = [
    "<span style='font-family:Tahoma'><b style='font-size:13px;'> 0-20% Hispanic/Black students</b> <br><span style='font-size:11px;'> 116 districts </span> </span>",
    "<span style='font-family:Tahoma'><b style='font-size:12px;'>20-40</b> <br><span style='font-size:11px;'>24 districts </span> </span>",
    "<span style='font-family:Tahoma'><b style='font-size:12px;'>40-60</b> <br><span style='font-size:11px;'>17 </span> </span>",
    "<span style='font-family:Tahoma'><b style='font-size:12px;'>60-80</b> <br><span style='font-size:11px;'>11 </span> </span>",
    "<span style='font-family:Tahoma'><b style='font-size:12px;'>        80-100</b> <br><span style='font-size:11px;'>8 </span> </span>",
] 

layout = dict(
    title = large_title_format + "<br>" + small_title_format,
    margin = dict(t=200, b=0, pad=0),
    
    legend = dict(
        orientation="h",
        traceorder="reversed",
        yanchor="top",
        y=1.13,
        font=dict(family="Helvetica", size=14, color="rgba(0,0,0,100)"),
        xanchor="left",
        x=-0.01
    ),
    xaxis = dict(tickvals = np.cumsum(bar_widths)- np.array(bar_widths)/2, side="top", tickangle=0, ticktext=labels),
    yaxis = dict( dtick=0.2, tickformat="% ", tickfont = dict(size=11), ticks="outside"),
    barmode='stack',

    plot_bgcolor='#fff',
    height= 750,
    width = 800
)

# High black/hispanic districts have a smaller sample size from city and suburban areas only.

text = [
    "<span style='font-size:12px; font-family: Tahoma;'> High hispanic/black population </span>",
    "<span style='font-size:12px; font-family: Tahoma;'> districts have <b>smaller sample</b>  </span>",
    "<span style='font-size:12px; font-family: Tahoma;'> <b>sizes</b>, while only coming from  </span>",
    "<span style='font-size:12px; font-family: Tahoma;'> <b style='color: %s'>city</b> and <b style='color: %s'>suburban</b> locales</span>" % (palette_darkgrey, palette_grey3),
]

annotation_helper(fig, text, 0.424, 0.96, [0.036, 0.036, 0.036], bgcolor="#fefefe", width=170)

# Locale distribution of each diversity group
text = [
    "<span style='font-size:11px; font-family: Tahoma; color: #444;'> Distribution of</span>",
    "<span style='font-size:11px; font-family: Tahoma; color: #444;'> each diversity</span>",
    "<span style='font-size:11px; font-family: Tahoma; color: #444;'> group by locale</span>"
]

annotation_helper(fig, text, -0.155, 0.935, [0.0243, 0.0243], align="right", width=90, ref="paper")

# bar widths indicate proportion of districts in each group compared to the total
text = [
    "<span style='font-size:11px; font-family: Tahoma; color: #444;'> <b>Bar width indicates number of districts</b> in each group compared to the total</span>",
]

annotation_helper(fig, text, 0.002, 0.988, [0.0243], width=450, ref="paper")

# Change the bar mode
fig.update_layout(layout)
fig.show()


While we were aware of the fact that the higher black/hispanic districts had smaller sample sizes, we now see that a lot of their data also comes from **more developed locales** which could possibly higher engagement levels in general. This makes the fact that we are still able to find evidence of these districts underperforming all the more surprising.


We may also entertain the possibility of a **selection bias** in the data. With page loads being recorded via a chrome extension, we only survey those that actually have **internet access and a laptop/desktop** (chrome doesn't have browser extensions on mobile) for learning. These assumptions would fail to capture engagement data from lower income households that are unable to access online learning resources in the first place.


# The victims of the learning gap
The students that are the most affected by the learning gap tend to belong to **racial minorities, lower income level families**, with inadequate access to quality education and online resources. Many households also lack a dedicated space or device for students to learn on.

The schools these students are often **understaffed**, have **obsolete learning equipment** and **limited funds** to work with. It is also these same institutions which struggled the most with the shift to online learning - the first few weeks of lockdown would be spent on training staff members to use remote teaching tools, distributing laptops to students and getting meals to low-income families.

In the below chart we look at the gaps in expenditures per student, within the same state.

<p class="sidenote"> Source: LearnPlatform COVID-19 Impact on Digital Learning - District info
    <br>
    <span style="font-style:italic">note: We consider the per student spending of a district to be the mean of the upper and lower bounds of its 'ppt_total_raw' field. Data includes all states that have spending data for both groups.</span> 
</p>

In [None]:
district_spending_df = districts_info[["state","pp_total_raw", "pct_black/hispanic"]]

# take the average of upper and lower bounds of pp_total_raw
district_spending_df["pp_total_raw_mean"] = district_spending_df["pp_total_raw"].str.replace("[","").str.split(",").str[0].astype(float) + districts_info["pp_total_raw"].str.replace("[","").str.split(",").str[1].astype(float)
district_spending_df["pp_total_raw_mean"] = district_spending_df["pp_total_raw_mean"] / 2


# create a field to indicate higher black/hispanic population group
district_spending_df["high_pct_black_hispanic"] = district_spending_df.apply(lambda x: assign_high_hispanic_black_pct_categorical(x), axis=1)

spending_by_pct_black_hisp = district_spending_df.groupby(["state","high_pct_black_hispanic"])["pp_total_raw_mean"].mean()


# Chart start
fig = go.Figure()

spending_by_pct_black_hisp_high = spending_by_pct_black_hisp[(spending_by_pct_black_hisp.index.get_level_values("high_pct_black_hispanic") == True) & (spending_by_pct_black_hisp.notnull())]
spending_by_pct_black_hisp_low = spending_by_pct_black_hisp[(spending_by_pct_black_hisp.index.get_level_values("high_pct_black_hispanic") == False) & (spending_by_pct_black_hisp.notnull())]

for state in spending_by_pct_black_hisp.index.get_level_values("state").append(spending_by_pct_black_hisp.index.get_level_values("state")).unique():
    
    state_spending = spending_by_pct_black_hisp[spending_by_pct_black_hisp.index.get_level_values("state") == state]
    state_spending_high = spending_by_pct_black_hisp_high[spending_by_pct_black_hisp_high.index.get_level_values("state") == state]
    state_spending_low = spending_by_pct_black_hisp_low[spending_by_pct_black_hisp_low.index.get_level_values("state") == state]
    
    if state_spending.isnull().sum() == 0 and state_spending.shape[0]==2:     

        fig.add_shape(
            type = 'line',
            x0 = state_spending_high.values[0],
            y0 = state,
            x1 = state_spending_low.values[0],
            y1 = state,
            line = dict(color= palette_silver , width = 5)
        )
    

states_with_low_and_high_spendings = spending_by_pct_black_hisp_high.index.get_level_values("state").intersection(spending_by_pct_black_hisp_low.index.get_level_values("state"))

fig.add_trace(go.Scatter(
    x = spending_by_pct_black_hisp_high[spending_by_pct_black_hisp_high.index.get_level_values("state").isin(states_with_low_and_high_spendings)].values, 
    y = states_with_low_and_high_spendings,
    mode = 'markers',
    marker_color = palette_darkgrey,
    marker_size = 14,
#     marker_line = dict(width=5, color="#fff"),
    name = 'more than 40%'
    ))

fig.add_trace(go.Scatter(
    x = spending_by_pct_black_hisp_low[spending_by_pct_black_hisp_low.index.get_level_values("state").isin(states_with_low_and_high_spendings)].values, 
    y = states_with_low_and_high_spendings,
    mode = 'markers',
    marker_color = palette_green,
    marker_size = 9,
#     marker_line = dict(width=1, color="#fff"),
    name = 'less than 40%'
))

text = [
    "<span style='font-size:11px; font-family: Tahoma;'>   USD spent per </span>",
    "<span style='font-size:11px; font-family: Tahoma;'>student, on average  </span>"
]

annotation_helper(fig, text, -0.02, -0.115, [0.065], width=450, ref="paper")
   
large_title_format = "<span style='font-size:28px; font-family:Times New Roman'>The gap in spending on students</span>"
small_title_format = "<span style='font-size:14px; font-family:Helvetica'> We look at how the average per pupil expenditure varies between the <br> <b style='color: %s'>high</b> and <b style='color: %s'>low</b> black or hispanic populations in each state.</span>" % (palette_darkgrey, palette_green)
        
layout = dict(
    title = large_title_format + "<br>" + small_title_format,
    margin = dict(t=110, b=40, pad=0),
    
    legend = dict(
        title="Spending where percentage of <br>black or hispanic students:",
        yanchor="top",
        y=0.92,
        xanchor="left",
        x=0.1,
        bgcolor="rgba(0,0,0,0)",
        font=dict(size=11),
        itemsizing="constant"
    ),
    
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_grey2, range=[0,18000], dtick=4000, tickformat="r", ticks="outside"),
    yaxis= dict(
        side="right",
        showline=False, 
        showgrid=True, 
        gridwidth=1, 
        gridcolor='#ddd', 
        linecolor=palette_darkgrey,
        categoryorder = 'array', 
        categoryarray = ["Indiana", "North Carolina", "Virginia", "Illinois", "Washington", "Massachusetts"]
    ),

    plot_bgcolor='#fff',
    height= 400,
    width = 650
)
        
fig.update_layout(layout)
fig.show()

Of the six available states we see how in three states- Massachusetts, Illinois and Virginia, **spending is more for higher white population districts**, whereas North Carolina shows the **opposite trend**. Washington and Indiana both show the same spending in both groups.

## Free or Reduced Lunch eligibility
At a quick glance, this metric simply shows the percentage of students in a school who are eligible for free or reduced-price lunch (FRPL) under the National School Lunch Program (NSLP), but beneath the surface, it serves as a **proxy measure for the concentration of low-income students** in the school. The NCES itself uses this metric to group schools into the following categories:


<br>
<table class="table" style="width: 500px">
  <thead class="thead-dark">
  <tr>
      <th>Free or Reduced-Price Lunch (FRPL) %</th>
      <th>Category</th>
  </tr>
  </thead>
    
  <tbody>
  <tr>
      <td>0-25%</td>
      <td>Low-poverty school</td>
  </tr>    
  <tr>
      <td>25-50%</td>
      <td>Mid-low poverty school</td>
  </tr>  
  <tr>
      <td>50-75%</td>
      <td>Mid-high poverty school</td>
  </tr>   
  <tr>
      <td>75-100%</td>
      <td>High-poverty school</td>
  </tr> 
  </tbody>
</table>

<br>

With this classification in mind, lets look at whether there is any correlation between the poverty level of school districts and the percentage of its students that are of black or hispanic origins.

Earlier we used the percentage of black and hispanic students in the district to determine how well off the district was. With this new classification in mind, it would be interesting to see how our old basis of comparison of districts stacks against the current poverty classifications. We do so it the following chart.

<p class="sidenote"> Source: NCES Public School Characteristics 2018-2019
    <br>
    <span style="font-style:italic">note: The data is from 2018-2019 and being well before the pandemic, is only intended to give us a general sense of the state of public schools in the US.</span> 
</p>

In [None]:
'''
#Plotly version of above table

trace1 = go.Table(
            columnwidth = [1,2.2],
            header = dict(values=[ 'Free or Reduced-Price <br>Lunch (FRPL) %', '<br>Category',],                        
                        line_color= palette_grey2,
                        fill_color= palette_darkgrey,
                        font_color= "white",
                        font_size = 12,
                        height = 30,
                        align='left'),
            cells = dict(
                values=[
                            ["    0-25%", "  25-50%", "  50-75%", " 75-100%"],
                            ["Low-poverty school", "Mid-low poverty school", "Mid-high poverty school", "High-poverty school"], 
                ], 
            line_color='darkslategray',
            line_width=0.5,
            fill_color='white',
            font_size = 13,
            height = 28,
            align=['left','left'])
        )


layout = dict(
            margin = dict(t=0,b=0,l=100,r=10),
            width = 600,
            height= 160
        )

fig = go.Figure()
fig.add_trace(trace1)

fig['layout'].update(layout)
iplot(fig)
''';


In [None]:
nces_minority_frl = nces_df[(nces_df["pct_black_hispanic"]>0) & (nces_df["pct_free_reduced_lunch"]>0)].groupby("STABR")["pct_black_hispanic","pct_free_reduced_lunch"].mean().reset_index()

# Using plotly express for trendline feature
fig = px.scatter(
    nces_minority_frl, 
    x='pct_black_hispanic', 
    y='pct_free_reduced_lunch', 
    hover_name="STABR",
    hover_data=["pct_black_hispanic", "pct_free_reduced_lunch"],
    opacity=0.4, 
    trendline='ols', 
    trendline_color_override= palette_grey2,
    labels={
             "pct_free_reduced_lunch": "Percent free or reduced lunch",
             "pct_black_hispanic": "Percent of black/hispanic students",
    },
)

fig.update_traces(marker = dict( color = palette_grey3, size = 12, opacity=0.5, line_width=1.5, line_color= palette_grey2 ), line_dash="dash", line_width=1, showlegend=False)

large_title_format = "<span style='font-size:28px; font-family:Times New Roman'>Students of color and district poverty levels</span>"
small_title_format = "<span style='font-size:15px; font-family:Helvetica'>As the percentage of black or hispanic students rises, we see a trend of <br>larger share of students eligible for the free/reduced lunch program.  </span>"
        
layout = dict(
    title = large_title_format + "<br>" + small_title_format,
    margin = dict(t=110, b=0),
    
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_grey2, range=[0,1], dtick=0.25, tickformat="%" ),
    yaxis= dict(
        showline=True, linewidth=1, linecolor=palette_grey2,
        showgrid=True, 
        gridwidth=1, 
        gridcolor='#ddd', 
        dtick=0.25,
        tickformat="%",
        range=[0,1.05]
    ),

    plot_bgcolor='#fff',
    height= 550,
    width = 600
)

fig.update_layout(layout)
fig.show()

We see a moderate correlation between the between these two metrics. This tells us that in general, school districts with greater populations of black and hispanic students tend to **have higher poverty levels**.


## Student teacher ratios
We look at how the schools are staffed, in particular the number of students per teacher in the school. While the national average is around the 15:1 mark, schools should aim to do better than this number( lower student-teacher ration is better). Hover over each circle for additional information.


<p class="sidenote"> Source: NCES Public School Characteristics 2018-2019
    <br>
    <span style="font-style:italic">note: Student-teacher ratio values are from 2018-2019 and thus only serve to give us an idea of how this metric varies across both groups. Data from schools without student race information were not included.</span> 
</p>

In [None]:
nces_minority_teacher_ratio = nces_df[(nces_df["pct_free_reduced_lunch"]>0) & (nces_df["STUTERATIO"]>0) & (nces_df["pct_black_hispanic"].notnull())].groupby(["NMCNTY","STABR"])["pct_free_reduced_lunch","STUTERATIO","pct_black_hispanic","TOTAL"].mean().reset_index()

fig = go.Figure()
    
trace = go.Scatter(
    x = nces_minority_teacher_ratio['STUTERATIO'], 
    y = nces_minority_teacher_ratio['pct_free_reduced_lunch'],
    customdata = nces_minority_teacher_ratio[["NMCNTY","STABR","TOTAL"]],
    hovertemplate = '<b>%{customdata[0]}</b><br>%{customdata[1]}<br>average number of students: %{customdata[2]:.0f}',
    name="",
    mode = "markers",
    marker = dict( 
        color = [ palette_darkgrey if pct > 0.4 else palette_green for pct in nces_minority_teacher_ratio["pct_black_hispanic"]], 
        size = nces_minority_teacher_ratio['TOTAL'] // 27, 
        line_color = "#ddd",
        opacity= 0.6
    ),
)
    
fig.add_trace(trace)

large_title_format = "<span style='font-size:28px; font-family:Times New Roman'>Student-teacher ratios and poverty levels </span>"
small_title_format = "<span style='font-size:15px; font-family:Helvetica'> At the county level, <b style='color:%s; font-size:16px'>white majority districts</b> and <b 'color:%s; font-size:16px'>black/hispanic districts</b> show <br>similar student teacher ratio values. Circle radius indicates the average number of students.</span>" % (palette_green, palette_darkgrey)
        
layout = dict(
    title = large_title_format + "<br>" + small_title_format,
    margin = dict(t=110, b=0),
    
    xaxis = dict(
        title="Student teacher ratio",
        showline=True, 
        linewidth=1, 
        linecolor=palette_grey2,
        dtick= 5,
        range=[0,40]
        ),
    yaxis = dict(
#         title="Percent Free or Reduced Lunch",
        showline=True, linewidth=1, linecolor=palette_grey2,
        showgrid=True, 
        gridwidth=1, 
        gridcolor='#ddd', 
        dtick=0.25,
        tickformat="%",
        range=[0,1.05]
    ),

    plot_bgcolor='#fff',
    height= 600,
    width = 700
)

fig.add_shape(
    type="line",
    x0=15.32, y0=0, x1=15.32, y1=1,
    line=dict(
        color= palette_darkgrey,
        width=2,
    )
)

text = [
    "<span style='font-size:12px; font-family: Helvetica; color: #222; font-weight: 600; '>15.32</span>",
    "<span style='font-size:11px; font-family: Helvetica; color: #444;'>average student</span>",
    "<span style='font-size:11px; font-family: Helvetica; color: #444;'>to teacher ratio</span>"
]
annotation_helper(fig, text, 18.95, 0.08, [0.027, 0.026], align="left", width=90)

text = [
    "<span style='font-size:12px; font-family: Helvetica; color: #444;'>Percent eligible for</span>",
    "<span style='font-size:12px; font-family: Helvetica; color: #444;'>free or reduced</span>",
    "<span style='font-size:12px; font-family: Helvetica; color: #444;'>lunch program</span>"
]
annotation_helper(fig, text, -0.0045, 0.92, [0.027, 0.026], align="left", width=100, ref="paper")

fig.update_layout(layout)
fig.show()

In [None]:
'''
# uncomment to display data for following table
stats_df = nces_df[(nces_df["pct_free_reduced_lunch"]>0) & (nces_df["STUTERATIO"]>0) & (nces_df["pct_black_hispanic"].notnull()) ].groupby(["high_pct_black_hispanic","NMCNTY","STABR"])["pct_free_reduced_lunch","STUTERATIO","pct_black_hispanic","TOTAL"].mean().reset_index()

print(nces_minority_teacher_ratio.mean())
display(stats_df[stats_df["high_pct_black_hispanic"] == False].describe())
display(stats_df[stats_df["high_pct_black_hispanic"] == True].describe())

# The number of counties above and below national average
avg_str = stats_df["STUTERATIO"].mean()
display((stats_df[stats_df["high_pct_black_hispanic"] == False]["STUTERATIO"] < avg_str).value_counts())
display((stats_df[stats_df["high_pct_black_hispanic"] == True ]["STUTERATIO"] < avg_str).value_counts())
''';

We see that the numbers aren't too different for student teacher ratios and both groups even have a similar share of schools that do better than the national average( *lower* student-teacher ratio than national average). A summary of the same is provided below:
<table class="table" style="width: 550px;">
    
  <thead class="thead-dark">
  <tr>
      <th> Counties with Hispanic<br>& black students (2018-19)</th>
      <th> Less than 40% </th>
      <th> 40% or higher</th>
  </tr>
  </thead>

  <tbody>
  <tr>
      <td>Number of counties</td>
      <td><p class="light-table-data">2745</p></td>
      <td><p class="light-table-data">1381</p></td>
  </tr>     
  <tr>
      <td>Eligible for Free/Reduced Lunch</td>
      <td><p class="light-table-data">49.07</p></td>
      <td><p class="dark-table-data">74.61</p></td>
  </tr>    
  <tr>
      <td>Student teacher ratio</td>
      <td><p class="dark-table-data">15.61</p></td>
      <td><p class="light-table-data">15.41</p></td>
  </tr>
  <tr>
      <td>Counties with better student teacher ratio than national avg.</td>
      <td><p class="dark-table-data"> 61.37% </p></td>
      <td><p class="light-table-data"> 60.24% </p></td>
  </tr> 
  </tbody>
    
</table>

# The shift to online learning

While high engagement numbers from certain districts might indicate that students are motivated to engage in their learning, it doesn't directly tell us about the share of students that are actually engaging in the learning process in that district. The bulk of the engagement might just be coming from those students that have better circumstances, while others might have little to no engagement. 

Luckily for us we are provided with data regarding the percentage of student that have at least one load of the product each day. We see how this differs in different sections of society below.


<p class="sidenote"> Source: LearnPlatform COVID-19 Impact on Digital Learning
    <br>
    <span style="font-style:italic">note: We only focus on Google Docs to prevent our results from being biased by students in certain groups using products that have much lower usage rates in general.</span> 
</p>

In [None]:
states = ["Illinois", "California", "Connecticut", "Virginia", "Massachusetts", "Ohio"]
labels = ["< 20%", "20-40%", "40-60%", "60-80%", "80-100%"]

fig = subplots.make_subplots(
    rows= 3, 
    cols= 2, 
    shared_yaxes=True, 
    shared_xaxes=True, 
    horizontal_spacing = 0.02, 
    vertical_spacing = 0.075 )


pct_access_df = engagement_full[engagement_full["Product Name"]=="Google Docs"]
pct_access_means = pct_access_df.groupby(["state","pct_black/hispanic"])["pct_access"].mean()

for index, state in enumerate(states):
    
    state_df = pct_access_means[state]
    
    trace = go.Bar(
        x = state_df.values,
        y = labels,
        width = 0.9,
        marker = dict( color= [palette_green] + [palette_grey3]*2 + [palette_grey2] + [palette_darkgrey]),
        text = np.round(state_df.values,2),
        texttemplate = " <span style='color: #fff'>%{text}</span> ",
        textposition = "inside",
        insidetextanchor="start",
        name = state,
        orientation = "h",
        hoverinfo = "none",
    )
    
    
    background = go.Bar(
        x = [30]*5,
        y = labels,
        width = 0.9,
        marker = dict( color= "#e1e1e1" ),
#         marker = dict( color= "#fff" ),
        name = state,
        orientation = "h",
        hoverinfo = "none",
    )
        
    fig.append_trace(background, index//2  + 1, index % 2 + 1);
    
    fig.append_trace(trace, index//2  + 1, index % 2 + 1); 
    
large_title_format = "<span style='font-size:28px; font-family:Times New Roman'>Daily page loads across diversity groups</span>"
small_title_format = "<span style='font-size:14px; font-family:Helvetica'> We compare the percentage of students in each group that use a selected online learning<br> product, <b>Google Docs</b>, at least once a day </span>" # % (palette_darkgrey, palette_green)
     
layout = dict(
    title = large_title_format + "<br>" + small_title_format,
    showlegend = False,
    margin = dict(t=250, l=100, pad=6),
    plot_bgcolor='#fff',
    xaxis5 = dict(dtick = 15),
    xaxis6 = dict(dtick = 15),
    yaxis ={'categoryorder':'array', 'categoryarray': labels[::-1]},
    yaxis3={'categoryorder':'array', 'categoryarray': labels[::-1]},
    yaxis5={'categoryorder':'array', 'categoryarray': labels[::-1]},
    bargap = 0.05,
    barmode="overlay",
    height = 800,
    width = 700
)

fig.update_layout(layout)

fig["data"][2].x = np.concatenate((fig["data"][2].x, [0]))
fig["data"][5].x = np.concatenate((fig["data"][5].x, [0,0]))

for index, annotation in enumerate(states):
    state = states[index]
    text = [
        "<span style='font-size:14px; font-family: Helvetica; font-weight: 600'>" + state + "</span> (<span style='font-size:12px'>" + str(state_counts[state]["lower"] +  state_counts[state]["higher"]) + " districts</span>)"
    ]

    x = [-0.01, 0.91][index % 2]
    y = [1.1, 0.69, 0.28][index// 2]
    
    annotation_helper(fig, text, x, y, [0.03], width=200, ref="paper")

text = [
    "<span style='font-size:11px; font-family: Helvetica; color: #444;'> Percentage of</span>",
    "<span style='font-size:11px; font-family: Helvetica; color: #444;'> black or hispanic</span>",
    "<span style='font-size:11px; font-family: Helvetica; color: #444;'> students</span>"
]

annotation_helper(fig, text, -0.2, 1.1, [0.0243, 0.0243], align="right", width=90, ref="paper")

text = [
    "<span style='font-size:12px; font-family: Helvetica; color: #444;'> Average % of students that <b>loaded Google Docs at least once a day</b>.</span>",
]

annotation_helper(fig, text, -0.012, 1.055, [0.0243], align="left", width=500, ref="paper")

text = [
    "<span style='font-size:12px; font-family: Tahoma;'> Empty bars indicate that there is no data available for this group.</span>"
]

annotation_helper(fig, text, -0.025, -0.12, [0.025], width=400, ref="paper")
    
fig.show()

We see in quite a few cases that districts with the lower percentages of black and hispanic students tend to have higher share of students engaging with online learning products.


## Does greater access to learning tools mean higher engagement index?
While I did imply earlier that it could be possible that a district could have higher engagement rates even with a low percentage of students having access to online learning, eg. if a minority of students had good access to learning and made up the majority of the recorded page loads. To see if this is true we plot each district, with its average engagement_index and pct_access.


<p class="sidenote"> Source: LearnPlatform COVID-19 Impact on Digital Learning
</p>

In [None]:
engagement_pct_access_by_district =  engagement_full.groupby(["state","district_id"])["engagement_index","pct_access"].mean().reset_index()

corr = engagement_pct_access_by_district["engagement_index"].corr(engagement_pct_access_by_district["pct_access"])

# we switch to plotly express for this graph to utilise its in-built trendline feature
fig = px.scatter(
    engagement_pct_access_by_district, 
    x='engagement_index', 
    y='pct_access', 
    opacity=0.4, 
    trendline='ols', 
    trendline_color_override= palette_grey2,
    hover_name = "state",
    labels={
             "engagement_index": "Average Engagement index",
             "pct_access": "Average Percent access",
    },
)

fig.update_traces(marker = dict( color = palette_grey3, size = 12, opacity=0.5, line_width=1.5, line_color= palette_grey2 ), line_dash="dash", line_width=1, showlegend=False)

large_title_format = "<span style='font-size:28px; font-family:Times New Roman'>Comparing engagement and percent access</span>"
small_title_format = "<span style='font-size:14px; font-family:Helvetica'> Engagement index and percent access for a given district are <b>highly <br> correlated</b> with one another, having a Pearson correlation of %s" % (np.round(corr,3))
        
layout = dict(
    title = large_title_format + "<br>" + small_title_format,
    margin = dict(t=110, b=0),
    
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_grey2, range=[0,1501] ),
    yaxis= dict(
        showline=False, 
        showgrid=True, 
        gridwidth=1, 
        gridcolor='#ddd', 
        linecolor=palette_darkgrey,
        dtick=1,
    ),

    plot_bgcolor='#fff',
    height= 550,
    width = 600
)

fig.update_layout(layout)
fig.show()

We see how the two are **highly correlated** and that this trend is closely followed by most districts. Deviations from the this are probably due to different average number of page loads per student in each district.

## What did the students engage with?
Having answered the question of **who** actually participates in online learning, a natural follow-up question would be **what are they engaging with**? For this we look at the same division of students, except now we will focus on how they engage with some of the more popular products.


<p class="sidenote"> Source: LearnPlatform COVID-19 Impact on Digital Learning - Product info
    <br>
    <span style="font-style:italic">note: Only the 14 products with highest engagement numbers were included.</span> 
</p>

In [None]:
# Data preparation for product engagement table
products_list = ["Google Docs", "Google Classroom", "Canvas", "YouTube", "Schoology", "Meet", "Kahoot!", "Google Forms", "Google Drive", "Google Sheets", "ClassLink", "Instagram", "Big Ideas Math", "Zoom"]

selected_products_data = engagement_full[engagement_full["Product Name"].isin(products_list)]
selected_products_data["high_pct_black_hispanic"] = selected_products_data.apply(lambda x: assign_high_hispanic_black_pct_categorical(x), axis=1)


selected_products_data_grouped = selected_products_data.groupby(["Product Name", "high_pct_black_hispanic"])["engagement_index"].mean().reset_index()
selected_products_data_grouped = selected_products_data_grouped.merge(products_info[["Product Name", "Primary Essential Function"]])


# Chart creation for product engagement table
product_engagement_high = selected_products_data_grouped[selected_products_data_grouped["high_pct_black_hispanic"] == True ].set_index('Product Name').loc[products_list].reset_index()
product_engagement_low  = selected_products_data_grouped[selected_products_data_grouped["high_pct_black_hispanic"] == False].set_index('Product Name').loc[products_list].reset_index()

diff = np.round(product_engagement_high["engagement_index"] - product_engagement_low["engagement_index"])
diff_colors = ["grey" if d>0 else "white" for d in diff ]
diff_font_colors = ["white" if d>0 else "black" for d in diff ]

trace = go.Table(
            columnwidth = [1.3,2.5,1.5,1.5,1],
            header = dict(values=[ 'Product Name', 'Category', 'Low hispanic black population', 'High hispanic black population', 'Difference'],                        
                        line_color= palette_grey2,
                        fill_color= "#494949",
                        font_color= "white",
                        font_size = 12,
                        height = 30,
                        align='left'),
            cells = dict(
                values=[
                    products_list,
                    product_engagement_high["Primary Essential Function"].str.split("-").str[0:2].str.join("-"),
                    np.round(product_engagement_high["engagement_index"], 2), 
                    np.round(product_engagement_low["engagement_index"] , 2),
                    diff
                ], 
            line_color='darkslategray',
            line_width=0.5,
            fill_color=['white','white','white', 'white', diff_colors],
            font_size = 13,
            font_color= ['black','black','black','black', diff_font_colors],
            height = 24,
            align=['left','left','left','left','right']
            )
        )


layout = dict(
            margin = dict(t=0,b=0,l=50,r=10),
            width = 800,
            height = 650,
)

fig = go.Figure()
fig.add_trace(trace)

fig['layout'].update(layout)
iplot(fig)

While the previous trend prevails with white-majority schools showing higher engagement across the table - we see a few exceptions to this. Namely **YouTube, Meet** and **Zoom** all have higher engagement in districts with higher share of black and hispanic students. 

## How did engagement levels change in each state due to the pandemic?
We see how student participation changed for the two groups on either side of this educational divide in the following chart. Data was aggregated on a state level to see where online learning activity rose or fell in comparison with their pre-pandemic levels.

<p class="sidenote"> Source: LearnPlatform COVID-19 Impact on Digital Learning
</p>

In [None]:
# Data preparation for engagement slope chart
engagement_full["time"] = pd.to_datetime(engagement_full["time"])

engagement_march    = engagement_full[( engagement_full["time"].dt.month == 3 ) & ( engagement_full["state"].notnull() ) & ( engagement_full["pct_black/hispanic"].notnull() ) & ( engagement_full["Provider/Company Name"] == "Google LLC")]
engagement_march["high_pct_black_hispanic"] = engagement_march.apply(lambda x: assign_high_hispanic_black_pct_categorical(x), axis=1)


engagement_december = engagement_full[(engagement_full["time"].dt.month == 12) & ( engagement_full["state"].notnull() ) & ( engagement_full["pct_black/hispanic"].notnull() ) & ( engagement_full["Provider/Company Name"] == "Google LLC")]
engagement_december["high_pct_black_hispanic"] = engagement_december.apply(lambda x: assign_high_hispanic_black_pct_categorical(x), axis=1)

engagement_march_groupby    = engagement_march.groupby(["state","high_pct_black_hispanic"])["engagement_index"].mean()
engagement_december_groupby = engagement_december.groupby(["state","high_pct_black_hispanic"])["engagement_index"].mean()



# Chart creation for engagement slope chart
fig = subplots.make_subplots(
    rows=1, 
    cols=2, 
    horizontal_spacing = 0.035, 
    vertical_spacing = 0.18, 
    shared_yaxes = True,
 )

engagement_month_groupby = pd.merge(engagement_december_groupby, engagement_march_groupby, right_index = True, left_index = True, suffixes=["_december","_march"], how="inner")

def label_helper(index, positive_slope):
    state = index[0]
    state_abbrev = us_state_to_abbrev[state.replace("Of","of")]
    high = index[1] == True
    
    color = palette_grey3 if positive_slope else palette_darkgrey
    font_weight = 400 if positive_slope else 600
    
    high_exclude_list = ["FL","NC", "CA"]
    
    low_exclude_list  = ["IL", "CA",  "WA", "VA", "MA", "NC"]
    
    if state_abbrev not in (high_exclude_list if high else low_exclude_list):
        return ["", "<span style='color: %s; font-weight: %d; font-size:9px'>%s</span>" % (color, font_weight, us_state_to_abbrev[index[0].replace("Of","of")])]
    else:
        return ["",""]
    

for index in engagement_month_groupby.index:
    
    engagement_slope_data = engagement_month_groupby[engagement_month_groupby.index  == index]
    engagement_march = engagement_slope_data["engagement_index_march"].values[0]
    engagement_december = engagement_slope_data["engagement_index_december"].values[0]
    
    high_hispanic_black = index[1]
    positive_slope = engagement_march < engagement_december
    
    color = palette_grey3 if positive_slope else palette_darkgrey
    width = 1 if positive_slope else 2.5
    
    trace = go.Scatter(
        x= [0,1],
        y= [engagement_march, engagement_december], 
        mode='lines+markers+text', 
        showlegend = False,
        text= label_helper(index, positive_slope),
        textposition=['middle left', 'middle right'],
        marker = dict(color=color),
        line = dict(
            color = color, 
            width = width
        ),
        hoverinfo = "none",
    )

    fig.add_trace(trace, 1, 1 if index[1]==True else 2)
    
fig.add_shape(type='line', x0=0, x1=0, y0=0, y1=1, xref='x1', yref='paper', line = dict(color= palette_grey2, width = 1))
fig.add_shape(type='line', x0=1, x1=1, y0=0, y1=1, xref='x1', yref='paper', line = dict(color= palette_grey2, width = 1))
fig.add_shape(type='line', x0=0, x1=0, y0=0, y1=1, xref='x2', yref='paper', line = dict(color= palette_grey2, width = 1))
fig.add_shape(type='line', x0=1, x1=1, y0=0, y1=1, xref='x2', yref='paper', line = dict(color= palette_grey2, width = 1))

large_title_format = "<span style='font-size:28px; font-family:Times New Roman'>Impact of the pandemic on engagement</span>"
small_title_format = "<span style='font-size:14px; font-family:Helvetica'>We compare the change in engagement for the two types of districts by state, from the start of the<br>pandemic to when case numbers reached their peak in December.</span>"
        
layout = dict(
    title = large_title_format + "<br>" + small_title_format,
    margin = dict(t=150, b=40, r=20),
    
    xaxis  = dict( range=[-0.025,1.075], showticklabels=False),
    xaxis2 = dict( range=[-0.025,1.075], showticklabels=False),
    yaxis  = dict(
        range=[0,3900],
        showline=False, 
        showgrid=True, 
        gridwidth=1, 
        gridcolor='#ddd', 
        linecolor=palette_grey2,
    ),
    yaxis2 = dict(
        range=[0,3900],
        showline=False, 
        showgrid=True, 
        gridwidth=1, 
        gridcolor='#ddd', 
        linecolor=palette_grey2,
    ),
    
    plot_bgcolor='#fff',
    height= 600,
    width = 600
)

text = [
    "<span style='font-size:11px; font-family: Tahoma; color: %s'>Engagement</span>" % (palette_darkgrey),
    "<span style='font-size:11px; font-family: Tahoma; color: %s'>index in</span>" % (palette_darkgrey)
]
annotation_helper(fig, text, -0.207, 1.025, [0.035], width=100, align="right", ref="paper")

text = [
    "<span style='font-size:11px; font-family: Tahoma; color: %s'>March</span>" % (palette_grey2),
    "<span style='font-size:11px; font-family: Tahoma; color: %s'>2020</span>" % (palette_grey2)
]
annotation_helper(fig, text, 0.01 , 1.025, [0.035], width=100, ref="paper")
annotation_helper(fig, text, 0.635, 1.025, [0.035], width=100, ref="paper")

text = [
    "<span style='font-size:11px; font-family: Tahoma; color: %s'>December</span>" % (palette_grey2),
    "<span style='font-size:11px; font-family: Tahoma; color: %s'>2020</span>" % (palette_grey2)
]
annotation_helper(fig, text, 0.233, 1.025, [0.035], width=100, align="right", ref="paper")
annotation_helper(fig, text, 0.968, 1.025, [0.035], width=100, align="right", ref="paper")

text = [
    "<span style='font-size:13px; font-weight:600; font-family: Tahoma; color: %s'>Districts with higher black</span>" % (palette_darkgrey),
    "<span style='font-size:13px; font-weight:600; font-family: Tahoma; color: %s'>and hispanic population</span>" % (palette_darkgrey)
]
annotation_helper(fig, text, 0.025, 0.91, [0.035], width=200, align="center", ref="paper")

text = [
    "<span style='font-size:13px; font-weight:600; font-family: Tahoma; color: %s'>Districts with lower black</span>" % (palette_darkgrey),
    "<span style='font-size:13px; font-weight:600; font-family: Tahoma; color: %s'>and hispanic population</span>" % (palette_darkgrey)
]
annotation_helper(fig, text, 0.96, 0.91, [0.035], width=200, align="center", ref="paper")

text = [
    "<span style='font-size:12px; font-family: Tahoma; '>* some engagement data for New York is not shown, see below.</span>"
]
annotation_helper(fig, text, -0.025, -0.08, [0.025], width=400, ref="paper")

fig.update_layout(layout)
fig.show()



# Statistics on data for next table
engagement_month_changes = engagement_month_groupby[engagement_month_groupby.index.get_level_values("state") != "New York"].groupby("high_pct_black_hispanic").mean()
engagement_month_changes["change"] = (engagement_month_changes["engagement_index_december"] - engagement_month_changes["engagement_index_march"]) / engagement_month_changes["engagement_index_march"]

# uncomment below to print summary table 
# display(engagement_month_changes.join(engagement_month_groupby.groupby("high_pct_black_hispanic")["engagement_index_december"].count(), rsuffix="_count"))

Certain district data for New York was removed due to the fact that it would require a completely different scale than the above graph to be shown clearly. For instance New York has a march engagement value of 11,572 while the rest of the states have their corresponding engagement values in the 0 - 2,300 range. (*tip: clicking the "Autoscale" button at the top of the Plotly chart will reset the axes to include the NY data.*)


We see a summary of our results in the table below.


<table class="table" style="width: 550px;">
    
  <thead class="thead-dark" style="background:#494949 !important; ">
  <tr>
      <th> Districts with<br>Hispanic/ black students(%)</th>
      <th> Less than 40% </th>
      <th> 40% or higher</th>
  </tr>
  </thead>

  <tbody>
  <tr>
      <td>Number of districts</td>
      <td><p class="dark-table-data">16</p></td>
      <td><p class="light-table-data">14</p></td>
  </tr>     
  <tr>
      <td>Average Engagment in March 2020</td>
      <td><p class="dark-table-data">1692.57</p></td>
      <td><p class="light-table-data">1200.51</p></td>
  </tr>    
  <tr>
      <td>Average Engagment in December 2020</td>
      <td><p class="dark-table-data">1324.75</p></td>
      <td><p class="light-table-data">812.64</p></td>
  </tr>
  <tr>
      <td>Engagement change (March to December)</td>
      <td><p class="light-table-data"> +27% </p></td>
      <td><p class="dark-table-data"> +47% </p></td>
  </tr> 
  </tbody>
    
</table>

<br>
We see how even though districts with higher black and hispanic population had lower engagement rates across the board, they had a higher percentage change from their original engagement rates. This is in part due to their <b>lower online activity at the start of the pandemic</b> , when online learning wasn't essential.

In [None]:
html = """
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
<style>
    .dark-table-data{
        border-radius: 5px;
        background: #494949;
        color: #efefef !important;
        font-weight: 600;
        margin: 0px !important;
        padding: 2px 10px;
    }
    
    .light-table-data{
        margin: 0px !important;
        padding: 2px 10px;
    }
    
    thead.thead-dark tr th{
        background: #494949 !important;
    }
    
    .sidenote{
        font-size: 13px;
        border: 1px solid #d7d7d7;
        padding: 1px 10px 2px;
        box-shadow: 1px 1px 2px 1px rgba(0,0,0,0.3);
        margin-bottom: 3px;
    }
    .sidenote span{
        font-size: 12px;
    }
</style>
"""
HTML(html)

## Conclusion

Throughout the course of this notebook we see how students from less-privileged backgrounds face inequalities in their online learning experience. From the data we see how the coronavirus pandemic has done little to level the playing field of American education, and instead has widened the gaps that have always existed. No student should have his success or education predetermined by the circumstances of his birth and steps must be taken to bridge this inequality.

Perhaps once schools have adapted to this online learning approach we may see changes as students are able to leverage learning resources that are freely available online. This may even pave the way for reimagining the traditional methods of education in US, adopting the useful parts of the current remote learning model.

## References:
1. [NCES - Public school students eligible for free or reduced-price lunch](https://nces.ed.gov/fastfacts/display.asp?id=898): Dives deeper into the free reduced lunch plan and how it is utilised in their analyses.
2. [NCES - Public School Characteristics 2018-19 Edge Open Data](https://data-nces.opendata.arcgis.com/datasets/nces::public-school-characteristics-2018-19/about): Public school data to give a better understanding of public schooling.
3. [UM-Dearborn - How serious is the COVID ‚Äúlearning gap‚Äù?](https://umdearborn.edu/news/articles/how-serious-covid-learning-gap): Article on the learning loss due to widespread school shutdowns.
4. [McKinsey - Mind the Gap](https://www.mckinsey.com/industries/public-and-social-sector/our-insights/covid-19-and-learning-loss-disparities-grow-and-students-need-help): In depth exploration of the widening of the learning gap in the midst of the pandemic.
5. [ChalkBeat - A nationwide divide](https://www.chalkbeat.org/2020/9/11/21431146/hispanic-and-black-students-more-likely-than-white-students-to-start-the-school-year-online): Explores the learning gap and the way that schools and students cope with the learning loss.

## Massive Thanks
- Ruchi Bhatia: Used her amazing notebook [üò∑COVID-19 Impact on Digital Learningüíª: EDA + W&B](https://www.kaggle.com/ruchi798/covid-19-impact-on-digital-learning-eda-w-b) for reference in initial data prep to join the competition data.

- Dar√≠o Weitz: [Marimekko Charts with Plotly Express](https://towardsdatascience.com/marimekko-charts-with-plotly-express-you-can-also-lie-with-charts-4ee98a30ed66): Marimekko charts aren't available out of the box in Plotly, so I used this article to get started. 
