In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import glob
import os

from plotly import subplots
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objects as go
import plotly.express as px
import datetime
import plotly.express as px

# define the colour palette to be used
palette_darkgrey = "#383C45"
palette_silver = "#A2A5A9"
palette_green = "#4DC000"
palette_blue = "#278BD3"
palette_platinum = "#E3E4E5" 
palette_red = '#d32744'
palette_grey2 = "#676A6C"
palette_grey3 = "#959894"
palette_grey4 = "#C4C5BB"

# engagement data
eng_path = '../input/learnplatform-covid19-impact-on-digital-learning/engagement_data'
eng_files = glob.glob(eng_path + "/*.csv")
files = []

for file in eng_files:
    df = pd.read_csv(file, index_col = None, header = 0)
    district_id = file.split('/')[4].split('.')[0]
    df['district_id'] = district_id
    files.append(df)

engagement = pd.concat(files)
engagement = engagement.reset_index(drop = True)
engagement['time'] = pd.to_datetime(engagement['time'])

# districts and products data
districts = pd.read_csv('../input/learnplatform-covid19-impact-on-digital-learning/districts_info.csv')
products = pd.read_csv('../input/learnplatform-covid19-impact-on-digital-learning/products_info.csv')

# NAEP test scores by state
naep = pd.read_csv('../input/us-education-datasets-unification-project/output_data/output_data/naep_states.csv')

# enrollment data by state
enrollment = pd.read_csv('../input/us-education-datasets-unification-project/output_data/output_data/enroll_states.csv')

In [None]:
# districts data clean up
mapping_dict = {'[0, 0.2[':0.1,
                '[0.2, 0.4[':0.3,
                '[0.4, 0.6[':0.5,
                '[0.6, 0.8[':0.7,
                '[0.8, 1[':0.9,
                np.nan:np.nan}
districts['pct_black/hispanic'] = districts['pct_black/hispanic'].map(mapping_dict)
districts['pct_free/reduced'] = districts['pct_free/reduced'].map(mapping_dict)
pp_total_dict = {np.nan:np.nan,
                '[8000, 10000[':9000,
                '[10000, 12000[':11000,
                '[14000, 16000[':15000,
                '[12000, 14000[':13000,
                '[16000, 18000[':17000,
                '[6000, 8000[':7000,
                '[18000, 20000[':19000,
                '[20000, 22000[':21000,
                '[22000, 24000[':23000,
                '[4000, 6000[':5000,
                '[32000, 34000[':33000}
districts['pp_total_raw'] = districts['pp_total_raw'].map(pp_total_dict)

# get latest NAEP scores
naep = naep[naep['YEAR']==2019]

# get latest enrollment
enrollment = enrollment[enrollment['YEAR'] == 2016]

# calculating percent of students enrolled by demographic
for i in ['AM','AS','HI','BL','WH','HP','TR']:
    enrollment['G05_percent_{}'.format(i)] = (enrollment['G05_{}_F'.format(i)]+enrollment['G05_{}_M'.format(i)])/enrollment['G05_A_A']
    enrollment['G01_percent_{}'.format(i)] = (enrollment['G01_{}_F'.format(i)]+enrollment['G01_{}_M'.format(i)])/enrollment['G01_A_A']
    
# enrollment by sex, this wasn't used
for i in ['M','F']:
    enrollment['G05_percent_{}'.format(i)] = 0
    enrollment['G01_percent_{}'.format(i)] = 0
    for j in ['AM','AS','HI','BL','WH','HP','TR']:
        enrollment['G05_percent_{}'.format(i)] += enrollment['G05_{}_{}'.format(j,i)]/enrollment['G05_A_A']
        enrollment['G01_percent_{}'.format(i)] += enrollment['G01_{}_{}'.format(j,i)]/enrollment['G05_A_A']

# some districts are missing data on american indians
enrollment['G05_percent_AM'] = 1 - (enrollment['G05_percent_AS'] + enrollment['G05_percent_HI'] + enrollment['G05_percent_BL'] + enrollment['G05_percent_WH'] + enrollment['G05_percent_HP'] + enrollment['G05_percent_TR'])

# calculate these as a sanity check to ensure the demographics add up to 1
enrollment['G05_demo_total_percent'] = enrollment['G05_percent_AM'] + enrollment['G05_percent_AS'] + enrollment['G05_percent_HI'] + \
          enrollment['G05_percent_BL'] + enrollment['G05_percent_WH'] + enrollment['G05_percent_HP'] + enrollment['G05_percent_TR']
enrollment['G01_demo_total_percent'] = enrollment['G01_percent_AM'] + enrollment['G01_percent_AS'] + enrollment['G01_percent_HI'] +  \
          enrollment['G01_percent_BL'] + enrollment['G01_percent_WH'] + enrollment['G01_percent_HP'] + enrollment['G01_percent_TR']

# keep columns that we want
enrollment = enrollment[['G05_percent_AM','G05_percent_AS','G05_percent_HI','G05_percent_BL','G05_percent_WH','G05_percent_HP','G05_percent_TR','STATE',
                        'G01_percent_AM','G01_percent_AS','G01_percent_HI','G01_percent_BL','G01_percent_WH','G01_percent_HP','G01_percent_TR',
                        'G05_demo_total_percent','G01_demo_total_percent']]

# merge naep data with enrollment numbers
naep_join_enrollment = enrollment.merge(naep, how='inner', on=['STATE'])

# format state columns to join later
districts['state'] = districts['state'].str.upper()
districts['state'] = districts['state'].str.replace(' ','_')
districts.rename(columns={'state':'STATE'},inplace=True)

# some test scores aren't filled for certain states + demographics, this is meant to fill in the average state score for those
for i in naep_join_enrollment.columns.tolist():
    # top level if check for grade 04 or grade 08
    if i[:3] == 'G04':
        # check if reading score or not
        if i[-4:] == 'DING':
            naep_join_enrollment[i].fillna(naep_join_enrollment['G04_A_A_READING'],inplace=True)
        else:
            naep_join_enrollment[i].fillna(naep_join_enrollment['G04_A_A_MATHEMATICS'],inplace=True)
    # top level if check for grade 04 or grade 08
    if i[:3] == 'G08':
        # check if reading score or not
        if i[-4:] == 'DING':
            naep_join_enrollment[i].fillna(naep_join_enrollment['G08_A_A_READING'],inplace=True)
        else:
            naep_join_enrollment[i].fillna(naep_join_enrollment['G08_A_A_MATHEMATICS'],inplace=True)

In [None]:
# merge districts with naep data
dist_naep_demo = districts.merge(naep_join_enrollment, how='left', on=['STATE'])

# function to calculate demographic adjusted NAEP scores for each district
def calculate_adjusted_naep_score(df):
    test_types = ['MATHEMATICS','READING']
    grade_levels = ['08','04']
    for test in test_types:
        for grade in grade_levels:
            if grade == '08':
                enroll = '05'
                df['ADJ_GR{}_{}'.format(grade, test)] = df['pct_black/hispanic']*(df['G{}_HI_A_{}'.format(grade, test)]*(df['G{}_percent_HI'.format(enroll)]/( df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)])) \
                                                                              + df['G{}_BL_A_{}'.format(grade, test)]*(df['G{}_percent_BL'.format(enroll)]/( df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)])))  \
               + (1-df['pct_black/hispanic'])*(df['G{}_AS_A_{}'.format(grade, test)]*(df['G{}_percent_AS'.format(enroll)]/(1-(df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)]))) + \
                                               df['G{}_AM_A_{}'.format(grade, test)]*(df['G{}_percent_AM'.format(enroll)]/(1-(df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)]))) + \
                                             df['G{}_WH_A_{}'.format(grade, test)]*(df['G{}_percent_WH'.format(enroll)]/(1-(df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)]))) + \
                                             df['G{}_HP_A_{}'.format(grade, test)]*(df['G{}_percent_HP'.format(enroll)]/(1-(df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)]))) + \
                                             df['G{}_TR_A_{}'.format(grade, test)]*(df['G{}_percent_TR'.format(enroll)]/(1-(df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)]))) )
            else:
                enroll = '01'
                df['ADJ_GR{}_{}'.format(grade, test)] = df['pct_black/hispanic']*(df['G{}_HI_A_{}'.format(grade, test)]*(df['G{}_percent_HI'.format(enroll)]/( df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)])) \
                                                                              + df['G{}_BL_A_{}'.format(grade, test)]*(df['G{}_percent_BL'.format(enroll)]/( df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)])))  \
               + (1-df['pct_black/hispanic'])*(df['G{}_AS_A_{}'.format(grade, test)]*(df['G{}_percent_AS'.format(enroll)]/(1-(df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)]))) + \
                                               df['G{}_AM_A_{}'.format(grade, test)]*(df['G{}_percent_AM'.format(enroll)]/(1-(df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)]))) + \
                                             df['G{}_WH_A_{}'.format(grade, test)]*(df['G{}_percent_WH'.format(enroll)]/(1-(df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)]))) + \
                                             df['G{}_HP_A_{}'.format(grade, test)]*(df['G{}_percent_HP'.format(enroll)]/(1-(df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)]))) + \
                                             df['G{}_TR_A_{}'.format(grade, test)]*(df['G{}_percent_TR'.format(enroll)]/(1-(df['G{}_percent_HI'.format(enroll)] + df['G{}_percent_BL'.format(enroll)]))) )
            
    return df
    
dist_naep_demo = calculate_adjusted_naep_score(dist_naep_demo)

# drop all districts in which the state is not known
dist_naep_demo = dist_naep_demo[dist_naep_demo['STATE'].notna()]

# LearnPlatform COVID-19 Impact on Digital Learning

The purpose of this notebook is to look at simple trends in LearnPlatform usage among school districts that have different outcomes

# Problem Statement:

As lockdowns began pupils all around the US and the world moved immediately towards online learning platforms to continue their education. With the landscape of education changing overnight school districts had to act quickly to adapt to online-based solutions. Difficulties around utilizing new technologies, lack of face to face learning, social isolation and many other factors contributed to learning loss deeply impact students future outcomes. 

LearnPlatform is uniquely positioned to provide analytics, insights, and possible solutions to issues that may arise from the use of online learning platforms. Educators and LearnPlatform alike are interested in optimizing and improving educational outcomes across the nation. With the ability to clearly see activity across school districts LearnPlatform is able to provide insight into what online learning platforms are used and how often they are utilized.While socio-economic inequality clearly remains an important factor in educational outcomes most educational tools on the internet are generally open source and can be purposed by any school district regardless of their economic prosperity. 

<b>What actionable insights can we make into the usage of online platforms and how they relate to student outcomes?</b>

To determine district level student outcomes NAEP test scores will be calculated and then grouped into percentile level categories to then complete an analysis of the usage of online platforms.

# Summary:

Through the analysis we have uncovered some insights into better performing school districts. along with possible insights and solutions into what may improve student peformance. Below we outline key findings, trends and possible solutions.

- Usage of online learning platforms is up across <b>all school districts</b>. Understanding the usage of online learning platforms and how it relates with districts that have the best outcomes <b style='color:#1aab45'>makes this analysis important</b>. 

- It is unquestionable that districts with <b style='color:#1aab45'>better outcomes have more engaged students</b>, while districts with <b style='color:#d32744'> worse outcomes have less engaged students</b>. A simple remedy to this may be to introduce a <b>participation grade</b> to all classes where one does not exist. The participation portion of the grade can be small, but meaningful, and should be related to students paying attention in class. Various forms of this exist already. Some educators use a 'magic word' in their slides that is used to later collect participation marks. Another fashion of this exists where mini-questions are asked mid lesson to all students and if the question is answered correctly then participation marks are given otherwise half-marks are given for just answering.     

- An interesting finding as well was noticed among the engagement data. <b style='color:#1aab45'>Districts with better outcomes start the year later around the beginning of September</b>. Starting the school year in mid to late august may distract students from completing schoolwork as the summer is still going on and students are looking to go outside more instead of studying hard. This effect can be especially pronounced in classes where units build on each other like mathematics. A possible solution to this may be just starting the year right around labour day.

- Another unsuprising finding is that districts with <b style='color:#1aab45'>better outcomes have higher engagement & usage with Career Planning & Job Search URLs</b>. While socio-economic issues may dissaude those who are less fortunate from looking towards university, college, or future careers that are possible, its important that all students are aware of the possibilities that exist including potential scholarships. Districts that have poor outcomes may need to think about introducing topics in various classes that force students to think about their future. When I was in high school there was a mandatory class for careers. While this may exist already at many schools, its possible that creating a district-wide career planning class or session every week or month can possibly make students more aware and interested in their futures.

- As expected districts with <b style='color:#1aab45'>better outcomes have higher engagement & usage with FREE online dictionaries, thesauruses, encyclopedias, etc</b>. All of these resources are free and usable by any student. All districts should make an effort to educate all students on free resources and how they can find and use them. We need to ensure that students are all capable of accessing free resources!

- Districts that have the <b style='color:#d32744'>worst test outcomes are the least likely to use and be engaged with plagiarism detection services</b>. Ensuring academic integrity is incredibly important. Average educators do not have the time to copy and paste excerpts of essays online to catch cheaters when they have 100s of essays left to grade. Utilizing automatic services to detect plagiarism should be the norm. If students are aware that they can get away with cheating it doesn't only make the grades they get unfair but also cheats them out of an education.

- The most interesting finding of the work is that districts that have the <b style='color:#d32744'>worst test outcomes</b> do not have students that are mostly distracted online with social media, video games or streaming platforms. While some trends surfaced like on youtube, the 60-80% test score grouping also had similar levels of youtube engagement. This leads us to the conclusion that poorer performing students aren't performing that way due to being distracted online.

We expect that most of these suggestions should improve student outcomes across the US.

Below in the analysis we provide evidence for the above recommendations

# Analysis:

To complete this analysis we need to understand which school districts perform better than other school districts. To do this we use NAEP test scores.

The <b>National Assessment of Educational Progress</b> or NAEP for short, is the largest continuing and nationally representative assessment of what U.S. students know and can do in various subjects (1). NAEP is a congressionally mandated project administered by the National Center for Education Statistics (NCES), within the Institute of Education Sciences (IES) of the U.S. Department of Education (1). NAEP uses a sampling procedure that allows the assessment to be representative of the geographical, racial, ethnic, and socioeconomic diversity of the schools and students in the United States (1). State NAEP results are available in some subjects for grades 4 and 8 (1). 

Luckily we have 2019 NAEP grade 4 and 8 test scores for mathematics and reading, by race and state. This allows us to create an <b>Adjusted NAEP Score</b> for each district given the proportion of black and hispanic students is given. We also have data on enrollment from 2016 by race which allows us to calculate the proportion of each race of students to create a weighted average NAEP score given a district. 

Since we are using 2016 race statistics and 2019 test scores we use the race statistics that were calculated for grade 5 in 2016, because we assume the demographics 

Since the proportion of black and hispanic students per district is given as a range, we choose the middle of the range as the value to be used. 

NAEP Test Score Source: https://www.kaggle.com/noriuk/us-education-datasets-unification-project

<b>We need some definitions for how to calculate the adjusted NAEP scores: </b>

Here the demographic related definitions: 

Proportion of black and hispanic students in a given district is: $$Percent Black Hispanic_{district_{i}} = PCT BLK HIS_{i}$$

State level proportion of black students is : $$Percent Black_{state} = PCT  BLK_{state}$$

State level proportion of hispanic students is : $$Percent Hispanic_{state} = PCT  HIS_{state}$$

State level proportion of American Indian students is : $$Percent American Indian_{state} = PCT AM_{state}$$

State level proportion of asian students is : $$Percent Asian_{state} = PCT AS_{state}$$

State level proportion of white students is : $$Percent White_{state} = PCT WH_{state}$$

State level proportion of Two or more races students is : $$Percent Two or more races_{state} = PCT TR_{state}$$

Here are the grade related definitions:

Average grade of black students in state : $$Grade Black_{state} = GR  BLK_{state}$$

Average grade of hispanic students in state : $$Grade Hispanic_{state} = GR  HIS_{state}$$

Average grade of American Indian students in state : $$Grade American Indian_{state} = GR AM_{state}$$

Average grade of asian students in state : $$Grade Asian_{state} = GR AS_{state}$$

Average grade of white students in state : $$Grade White_{state} = GR WH_{state}$$

Average grade of Two or more races students in state : $$Grade Two or more races_{state} = GR TR_{state}$$

Adjuted NAEP score for a given district over some grade or subject: $$NAEP_{district_{i}}$$

<b>Below we have the definition for the adjusted NAEP score </b>


$$NAEP_{district_{i}} = (PCT BLK HIS_{i}) *( GR  HIS_{state}* \frac {PCT  HIS_{state}}{PCT  HIS_{state} + PCT  BLK_{state}}  + GR  BLK_{state}* \frac {PCT  BLK_{state}}{PCT  HIS_{state} + PCT  BLK_{state}})  + \\
                         (1 - PCT BLK HIS_{i}) *(GR  AS_{state}* \frac {PCT  AS_{state}}{1 - (PCT  HIS_{state} + PCT  BLK_{state})}  +    \\     
                         GR  AM_{state}* \frac {PCT  AM_{state}}{1 - (PCT  HIS_{state} + PCT  BLK_{state})}  + \\
                         GR  WH_{state}* \frac {PCT  WH_{state}}{1 - (PCT  HIS_{state} + PCT  BLK_{state})}  + \\
                         GR  TR_{state}* \frac {PCT  TR_{state}}{1 - (PCT  HIS_{state} + PCT  BLK_{state})}  )$$

The adjusted NAEP district score is basically just using the percentage of black and hispanic student to weigh the respective scores of each demographic of students, weighted by their representation in each state.

Where there were null values the average state test score was used.

The purpose of this adjusted test score is to better gage which districts score well on standardized tests. While standardized tests have many flaws and the NAEP has been criticized we believe that a test that is taken by a nationally and state representative sample, across demographics, in which the tests that are administered are identical can prove to be a valuable marker on school districts and their performance. 

In [None]:
dist_naep_demo.shape

# NAEP & Districts Data

We are working here with 176 districts worth of data from the original 233.

Below we take a look at some geographic level information on the districts we are looking at:


In [None]:
import plotly.graph_objects as go

import pandas as pd

us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}
codes = {}
for i in us_state_to_abbrev:
    codes[i.upper()] = us_state_to_abbrev[i]

df = dist_naep_demo.groupby('STATE')['ADJ_GR08_MATHEMATICS'].mean().reset_index()
df['STATE'] = df['STATE'].map(codes)

fig = go.Figure(data=go.Choropleth(
    locations=df['STATE'], # Spatial coordinates
    z = df['ADJ_GR08_MATHEMATICS'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "Average Math NAEP Score",
))

fig.update_layout(
    title_text = 'Adjusted Average Math NAEP Score by State',
    geo_scope='usa', # limite map scope to USA
    height= 550,
    width = 900
)

fig.show()

In [None]:
df = dist_naep_demo.groupby('STATE')['district_id'].nunique().reset_index()
df['STATE'] = df['STATE'].map(codes)

fig = go.Figure(data=go.Choropleth(
    locations=df['STATE'], # Spatial coordinates
    z = df['district_id'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "Number of Districts by State",
))

fig.update_layout(
    title_text = 'Number of Districts by State',
    geo_scope='usa', # limite map scope to USA
    height= 550,
    width = 900
)

fig.show()



Now we plot the adjusted mathematics 8th grade score against other variables in the dataset.

In [None]:
# Using plotly express for trendline feature
fig = px.scatter(
    dist_naep_demo, 
    x='ADJ_GR08_MATHEMATICS', 
    y='pct_black/hispanic', 
#     color = 'pp_total_raw',
    trendline='ols', 
    trendline_color_override= palette_red,
)

fig.update_traces(marker = dict( color = palette_grey3, size = 12, opacity=0.35, line_width=1.5, line_color= palette_grey2 ), line_dash="dash", line_width=4, showlegend=False)

large_title_format = "<span style='font-size:28px; font-family:Times New Roman'>Adjusted Math NAEP Scores & Percent Black + Hispanic Pupils</span>"
small_title_format = "<span style='font-size:15px; font-family:Helvetica'>Reading and Math NAEP scores are <b style='color:%s'> slightly negatively correlated</b> </span>" % (palette_red)
        
layout = dict(
    title = large_title_format + "<br>" + small_title_format,
    margin = dict(t=110, b=0),
    plot_bgcolor='#fff',
    height= 550,
    width = 900
)

fig.update_layout(layout)
fig.show()

In [None]:
# Using plotly express for trendline feature
fig = px.scatter(
    dist_naep_demo, 
    x='ADJ_GR08_MATHEMATICS', 
    y='pp_total_raw', 
#     color = 'pp_total_raw',
    trendline='ols', 
    trendline_color_override= palette_darkgrey,
)

fig.update_traces(marker = dict( color = palette_grey3, size = 12, opacity=0.35, line_width=1.5, line_color= palette_grey2 ), line_dash="dash", line_width=4, showlegend=False)

large_title_format = "<span style='font-size:28px; font-family:Times New Roman'>Adjusted Math NAEP Scores & District Spending</span>"
small_title_format = "<span style='font-size:15px; font-family:Helvetica'>Reading and Math NAEP scores are <b style='color:%s'>lightly correlated</b> </span>" % (palette_darkgrey)
        
layout = dict(
    title = large_title_format + "<br>" + small_title_format,
    margin = dict(t=110, b=0),
    plot_bgcolor='#fff',
    height= 550,
    width = 900
)

fig.update_layout(layout)
fig.show()

In [None]:
# Using plotly express for trendline feature
fig = px.scatter(
    dist_naep_demo, 
    x='ADJ_GR08_MATHEMATICS', 
    y='ADJ_GR08_READING', 
#     color = 'pp_total_raw',
    trendline='ols', 
    trendline_color_override= palette_green,
)

fig.update_traces(marker = dict( color = palette_grey3, size = 12, opacity=0.35, line_width=1.5, line_color= palette_grey2 ), line_dash="dash", line_width=4, showlegend=False)

large_title_format = "<span style='font-size:28px; font-family:Times New Roman'>Adjusted Reading & Math NAEP Scores</span>"
small_title_format = "<span style='font-size:15px; font-family:Helvetica'>Reading and Math NAEP scores are <b style='color:%s'>highly correlated</b> </span>" % (palette_green)
        
layout = dict(
    title = large_title_format + "<br>" + small_title_format,
    margin = dict(t=110, b=0),
    plot_bgcolor='#fff',
    height= 550,
    width = 900
)

fig.update_layout(layout)
fig.show()

Since we want to investigate all of the test scores and it looks like the mathematics and reading scores are incredibly related, lets plot a correlation matrix among some of the district variables that were provided along with the test scores.

In [None]:
dist_naep_demo[['ADJ_GR08_MATHEMATICS','ADJ_GR08_READING','ADJ_GR04_MATHEMATICS','ADJ_GR04_READING','pct_black/hispanic','pct_free/reduced','pp_total_raw']].corr().style.background_gradient(cmap='Greens')

All of the test scores are over <b style='color:#1aab45'>90% positively correlated</b> which is to be expected. Better performing school districts should perform better across the board between reading and mathematics tests across both grade levels.

Since all of the adjusted NAEP scores are <b style='color:#1aab45'>highly positively correlated</b> we continue this analysis with only the adjusted grade 8 NAEP mathematics score. 

In [None]:
# drop not needed columns
drop_cols = ['G05_percent_AM',
 'G05_percent_AS',
 'G05_percent_HI',
 'G05_percent_BL',
 'G05_percent_WH',
 'G05_percent_HP',
 'G05_percent_TR',
 'G01_percent_AM',
 'G01_percent_AS',
 'G01_percent_HI',
 'G01_percent_BL',
 'G01_percent_WH',
 'G01_percent_HP',
 'G01_percent_TR',
 'PRIMARY_KEY',
 'YEAR',
 'G04_A_A_READING',
 'G04_A_A_MATHEMATICS',
 'G04_A_M_READING',
 'G04_A_M_MATHEMATICS',
 'G04_A_F_READING',
 'G04_A_F_MATHEMATICS',
 'G04_WH_A_READING',
 'G04_WH_A_MATHEMATICS',
 'G04_BL_A_READING',
 'G04_BL_A_MATHEMATICS',
 'G04_HI_A_READING',
 'G04_HI_A_MATHEMATICS',
 'G04_AS_A_READING',
 'G04_AS_A_MATHEMATICS',
 'G04_AM_A_READING',
 'G04_AM_A_MATHEMATICS',
 'G04_HP_A_READING',
 'G04_HP_A_MATHEMATICS',
 'G04_TR_A_READING',
 'G04_TR_A_MATHEMATICS',
 'G08_A_A_READING',
 'G08_A_A_MATHEMATICS',
 'G08_A_M_READING',
 'G08_A_M_MATHEMATICS',
 'G08_A_F_READING',
 'G08_A_F_MATHEMATICS',
 'G08_WH_A_READING',
 'G08_WH_A_MATHEMATICS',
 'G08_BL_A_READING',
 'G08_BL_A_MATHEMATICS',
 'G08_HI_A_READING',
 'G08_HI_A_MATHEMATICS',
 'G08_AS_A_READING',
 'G08_AS_A_MATHEMATICS',
 'G08_AM_A_READING',
 'G08_AM_A_MATHEMATICS',
 'G08_HP_A_READING',
 'G08_HP_A_MATHEMATICS',
 'G08_TR_A_READING',
 'G08_TR_A_MATHEMATICS',
 'county_connections_ratio',
 'G05_demo_total_percent',
 'G01_demo_total_percent',
 'ADJ_GR04_MATHEMATICS',
 'ADJ_GR08_READING',
 'ADJ_GR04_READING']
dist_naep_demo.drop(columns=drop_cols,inplace=True)
# function to create groupings from percentile ranks of NAEP test scores
def naep_percentile_groupings(x):
    if x >= 0.8:
        return ']0.8 - 1.0'
    elif x < 0.8 and x >= 0.6:
        return '[0.6 - 0.8['
    elif x < 0.6 and x >= 0.4:
        return '[0.4 - 0.6['
    elif x < 0.4 and x >= 0.2:
        return '[0.2 - 0.4['
    elif x <= 0.2:
        return '0 - 0.2['
    else:
        return 'other'
# create rank of percentiles
dist_naep_demo['ADJ_GR08_MATHEMATICS_percentile'] = dist_naep_demo['ADJ_GR08_MATHEMATICS'].rank(pct = True)
# percentile groupings
dist_naep_demo['ADJ_GR08_MATHEMATICS_percentile_groups'] = dist_naep_demo['ADJ_GR08_MATHEMATICS_percentile'].apply(naep_percentile_groupings)

Given the fact we have adjusted math naep scores, we want to group them into districts with similar outcomes. Hence the districts are divided into 5 groups, based on their adjusted math naep score percentile.

We group the adjusted NAEP scores into percentile buckets. This is done so it becomes easier to group districts in the subsequent analysis of the districts.

]0.8 - 1.0 is the top 20 percent of scores, [0.6 - 0.8[ is the next 20 percent, and so forth, finally where 0 - 0.2[ is the bottom 20 percent of scores


Below: average NAEP adjusted math score by grouping

In [None]:
dist_naep_demo.groupby('ADJ_GR08_MATHEMATICS_percentile_groups')['ADJ_GR08_MATHEMATICS'].mean().reset_index().style.background_gradient(cmap='Greens')

Below: number of districts in each group

In [None]:
dist_naep_demo.groupby('ADJ_GR08_MATHEMATICS_percentile_groups')['district_id'].nunique().reset_index().style.background_gradient(cmap='Greens')

Below: geographic breakdowns on where each group exists

In [None]:
df = dist_naep_demo[dist_naep_demo['ADJ_GR08_MATHEMATICS_percentile_groups']==']0.8 - 1.0'].groupby(['STATE'])['district_id'].nunique().reset_index()
df['STATE'] = df['STATE'].map(codes)

fig = go.Figure(data=go.Choropleth(
    locations=df['STATE'], # Spatial coordinates
    z = df['district_id'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "Number of Districts by State",
))

fig.update_layout(
    title_text = '80th to 100th percentile test score districts',
    geo_scope='usa', # limite map scope to USA
    height= 550,
    width = 900
)

fig.show()

School districts with the best outcomes seem to be in the northeast of the country.

In [None]:
df = dist_naep_demo[dist_naep_demo['ADJ_GR08_MATHEMATICS_percentile_groups']=='[0.6 - 0.8['].groupby(['STATE'])['district_id'].nunique().reset_index()
df['STATE'] = df['STATE'].map(codes)

fig = go.Figure(data=go.Choropleth(
    locations=df['STATE'], # Spatial coordinates
    z = df['district_id'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = 'Number of Districts by State',
))

fig.update_layout(
    title_text = '60th to 80th percentile test score districts',
    geo_scope='usa', # limite map scope to USA
    height= 550,
    width = 900
)

fig.show()

In [None]:
df = dist_naep_demo[dist_naep_demo['ADJ_GR08_MATHEMATICS_percentile_groups']=='[0.4 - 0.6['].groupby(['STATE'])['district_id'].nunique().reset_index()
df['STATE'] = df['STATE'].map(codes)

fig = go.Figure(data=go.Choropleth(
    locations=df['STATE'], # Spatial coordinates
    z = df['district_id'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = 'Number of Districts by State',
))

fig.update_layout(
    title_text = '40th to 60th percentile test score districts',
    geo_scope='usa', # limite map scope to USA
    height= 550,
    width = 900
)

fig.show()

In [None]:
df = dist_naep_demo[dist_naep_demo['ADJ_GR08_MATHEMATICS_percentile_groups']=='[0.2 - 0.4['].groupby(['STATE'])['district_id'].nunique().reset_index()
df['STATE'] = df['STATE'].map(codes)

fig = go.Figure(data=go.Choropleth(
    locations=df['STATE'], # Spatial coordinates
    z = df['district_id'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = 'Number of Districts by State',
))

fig.update_layout(
    title_text = '20th to 40th percentile test score districts',
    geo_scope='usa', # limite map scope to USA
    height= 550,
    width = 900
)

fig.show()

In [None]:
df = dist_naep_demo[dist_naep_demo['ADJ_GR08_MATHEMATICS_percentile_groups']=='0 - 0.2['].groupby(['STATE'])['district_id'].nunique().reset_index()
df['STATE'] = df['STATE'].map(codes)

fig = go.Figure(data=go.Choropleth(
    locations=df['STATE'], # Spatial coordinates
    z = df['district_id'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = 'Number of Districts by State',
))

fig.update_layout(
    title_text = '0th to 20th percentile test score districts',
    geo_scope='usa', # limite map scope to USA
    height= 550,
    width = 900
)

fig.show()

There are clearly some trends, specifically with the best performing school districts being located in the northeast. However after that it seems to be a mixed bag with states being represented everywhere in the rest of the buckets, this is probably due to district level differences in demographics and socio-economic status of the pupils located in those districts.

Now, to complete the rest of the analysis we join the district level data with the engagement data and the product data. Our goal from here is to plot the groups of data aggregated and attempt to reveal differences between all of the groupings.

For the rest of this analysis we will be using the math naep percentile groups that were constructed earlier to extract insights and trends from our data on engagement and usage of online platforms.

While socio-economic status is a pervasive issue in the united states, solving problems related to that requires a very long time frame which starts with the pupils of those school districts. Most online learning platforms are free or very low cost and hence are accessible to most students. It is expected that enhancing curriculums and in turn enabling students to use the correct online learning tools will improve outcomes. There is a suspicion that the districts with the best outcomes use online platforms differently and the hope is that the analysis below will prove that.

In [None]:
# change datatype for join
engagement['district_id'] = engagement['district_id'].astype(int)

# join all engagement data with matches in the district and NAEP test scores
engagement_districts = pd.merge(engagement, dist_naep_demo, how='left', on=['district_id'])

# join products with engagement + district/NAEP data
engagement_districts_products = pd.merge(engagement_districts, products, how="left", left_on="lp_id", right_on="LP ID")

In [None]:
# credit goes to https://www.kaggle.com/spitfire2nd/the-learning-gap for the code for these visuals
'''
function annotation_helper(...)

Helper for annotations in plotly, while reducing the amount of code in the block, it also:
- Allows us to provide the text into an array of 
  strings(one for each new line) instead of one really long <br> separated text param
- Provides basic functionality for line spacing(s) between each line
'''

def annotation_helper(fig, texts, x, y, line_spacing, align="left", bgcolor="rgba(0,0,0,0)", borderpad=0, ref="axes", width=100):
    
    is_line_spacing_list = isinstance(line_spacing, list)
    total_spacing = 0
    
    for index, text in enumerate(texts):
        if is_line_spacing_list and index!= len(line_spacing):
            current_line_spacing = line_spacing[index]
        elif not is_line_spacing_list:
            current_line_spacing = line_spacing
        
        fig.add_annotation(dict(
            x= x,
            y= y - total_spacing,
            width = width,
            showarrow=False,
            text= text,
            bgcolor= bgcolor,
            align= align,
            borderpad=4,
            xref= "x" if ref=="axes" else "paper",
            yref= "y" if ref=="axes" else "paper"
        ))
        
        total_spacing  += current_line_spacing

# General Engagement

In [None]:
engagement_naep = engagement_districts_products.groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 900,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2020, 11, 20), 380, [25], width=200)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 150, [25])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>Test Score Group</b>" % (palette_darkgrey),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 400, [25,25,25,25,25], width=120, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Engagement Index Grouped by NAEP Score</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 420, [35], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Do districts with higher test scores have higher engagement?</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> have higher engagement than <b style='color:%s'>districts with lower test scores</b>. </span>""" % (palette_green,palette_red) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=700)

fig.show()

Hypothesis: Do districts with higher test scores have higher engagement?

- The graph confirms our suspicision that higher test scoring districts have higher engagement.
- Slight differences can be observed at the end of august and july where certain school districts start and end the year earlier hence influencing the engagement scores. 
- The top peforming school districts seem to have far and away the highest engagement

This graph also shows a correlation in when the school year begins and how it relates to student outcomes. It looks like schools that begin in mid-late august have worse outcomes. I believe this can be attributed to the fact starting school while summer is still raging can be a distraction for many students as they may want to enjoy the last bits of the hot weather before the weather cools significantly. This probably leads to worse outcomes in math scores because most of the material builds on previously learned material and if the students fall behind or don't learn the first units well they may struggle as the year continues. 

As per the visual above, our suspicisions are confirmed. But lets try to quantify how often each of the test groups average engagement index is larger than the test groups that perform worse.

Below we calculate what percent of days in 2020 did a test group have higher engagement than all of the other test groups that score beneath them

In [None]:
pivot_engagement_naep = engagement_naep.pivot(index='time', columns='ADJ_GR08_MATHEMATICS_percentile_groups',values=['engagement_index'])
percent_greater_than = pd.DataFrame({'Test Score Group':['80-100%','60-80%','40-60%','20-40%'] ,
             'Percent of Days with greater engagement than lower scoring test groups':[
                 100*sum((pivot_engagement_naep[('engagement_index', ']0.8 - 1.0')]  > pivot_engagement_naep[('engagement_index',   '0 - 0.2[')]) &
    (pivot_engagement_naep[('engagement_index', ']0.8 - 1.0')]  > pivot_engagement_naep[('engagement_index',   '[0.2 - 0.4[')]) &
    (pivot_engagement_naep[('engagement_index', ']0.8 - 1.0')]  > pivot_engagement_naep[('engagement_index',   '[0.4 - 0.6[')]) &
    (pivot_engagement_naep[('engagement_index', ']0.8 - 1.0')]  > pivot_engagement_naep[('engagement_index',   '[0.6 - 0.8[')]))/pivot_engagement_naep.shape[0],
                 100*sum((pivot_engagement_naep[('engagement_index', '[0.6 - 0.8[')]  > pivot_engagement_naep[('engagement_index',   '0 - 0.2[')]) &
    (pivot_engagement_naep[('engagement_index', '[0.6 - 0.8[')]  > pivot_engagement_naep[('engagement_index',   '[0.2 - 0.4[')]) &
    (pivot_engagement_naep[('engagement_index', '[0.6 - 0.8[')]  > pivot_engagement_naep[('engagement_index',   '[0.4 - 0.6[')]))/pivot_engagement_naep.shape[0],
                 100*sum((pivot_engagement_naep[('engagement_index', '[0.4 - 0.6[')]  > pivot_engagement_naep[('engagement_index',   '0 - 0.2[')]) &
    (pivot_engagement_naep[('engagement_index', '[0.4 - 0.6[')]  > pivot_engagement_naep[('engagement_index',   '[0.2 - 0.4[')]))/pivot_engagement_naep.shape[0],
                 100*sum(pivot_engagement_naep[('engagement_index', '[0.2 - 0.4[')]  > pivot_engagement_naep[('engagement_index',   '0 - 0.2[')])/pivot_engagement_naep.shape[0]
             ]})
percent_greater_than.style.background_gradient(cmap='Greens')

- From the above we can determine that the top 80-100% performing school districts have a higher engagement index than all of the other school districts 85% of the time. Which seems to confirm our suspicision that higher engagement correlates to better peformance 
- As expected 80-100% group has more engagement a vast majority of the time, similarly all the other test score groups have more engagement than lower scoring groups below them

However the previous graph was for <b>all of the products</b> that are tracked and we only have data on the 372 products in the provided file products_info.csv.

<b>If we only filtered for products in which the URL was known does the trend hold?</b>

# Products Used

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['URL'].notna()].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 900,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 20), 440, [25], width=100)

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 150, [25,30])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2020, 11, 5), 175, [25,25,25,25], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Engagement Index Grouped by NAEP Score</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 520, [35], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Do districts with higher test scores have higher engagement</span>",
    "<span style='font-size:26px; font-family:Times New Roman;'>with the 372 most used products?</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> have higher engagement than <b style='color:%s'>districts with lower test scores</b>. </span>""" % (palette_green,palette_red) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.08,0.1,0.055], ref="paper", width=700)

fig.show()

- From inspection this seems to confirm our prior conclusions
- Again we notice slight differences at the end and start of the year that can be attributed to different times of the year districts begin and end the school year

How does this stack up on a day to day basis?

In [None]:
pivot_engagement_naep = engagement_naep.pivot(index='time', columns='ADJ_GR08_MATHEMATICS_percentile_groups',values=['engagement_index'])
percent_greater_than = pd.DataFrame({'Test Score Group':['80-100%','60-80%','40-60%','20-40%'] ,
             'Percent of Days with greater engagement than lower scoring test groups':[
                 100*sum((pivot_engagement_naep[('engagement_index', ']0.8 - 1.0')]  > pivot_engagement_naep[('engagement_index',   '0 - 0.2[')]) &
    (pivot_engagement_naep[('engagement_index', ']0.8 - 1.0')]  > pivot_engagement_naep[('engagement_index',   '[0.2 - 0.4[')]) &
    (pivot_engagement_naep[('engagement_index', ']0.8 - 1.0')]  > pivot_engagement_naep[('engagement_index',   '[0.4 - 0.6[')]) &
    (pivot_engagement_naep[('engagement_index', ']0.8 - 1.0')]  > pivot_engagement_naep[('engagement_index',   '[0.6 - 0.8[')]))/pivot_engagement_naep.shape[0],
                 100*sum((pivot_engagement_naep[('engagement_index', '[0.6 - 0.8[')]  > pivot_engagement_naep[('engagement_index',   '0 - 0.2[')]) &
    (pivot_engagement_naep[('engagement_index', '[0.6 - 0.8[')]  > pivot_engagement_naep[('engagement_index',   '[0.2 - 0.4[')]) &
    (pivot_engagement_naep[('engagement_index', '[0.6 - 0.8[')]  > pivot_engagement_naep[('engagement_index',   '[0.4 - 0.6[')]))/pivot_engagement_naep.shape[0],
                 100*sum((pivot_engagement_naep[('engagement_index', '[0.4 - 0.6[')]  > pivot_engagement_naep[('engagement_index',   '0 - 0.2[')]) &
    (pivot_engagement_naep[('engagement_index', '[0.4 - 0.6[')]  > pivot_engagement_naep[('engagement_index',   '[0.2 - 0.4[')]))/pivot_engagement_naep.shape[0],
                 100*sum(pivot_engagement_naep[('engagement_index', '[0.2 - 0.4[')]  > pivot_engagement_naep[('engagement_index',   '0 - 0.2[')])/pivot_engagement_naep.shape[0]
             ]})
percent_greater_than.style.background_gradient(cmap='Greens')

- We affirm our suspicisions across the most used 372 products where the URLs are known.
- The highest scoring districts have higher engagement for 75% of the year than all of the other test score groups 

For the rest of this analysis we will be focusing on known URLs, as we can better understand from them exactly what better performing school districts are using and how they are using them.

Among the URLs that are known how many of the 372 products are students actually using?

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['URL'].notna()].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["URL"].nunique().reset_index()

moving_average_window = 7;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 900,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["URL"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["URL"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["URL"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["URL"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["URL"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 20), 350, [5], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 300, [5])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2020, 11, 5), 320, [5,5,5,5], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Unique Products Grouped by NAEP Score</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>7 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 380, [5], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Do districts with higher test scores use more or less products?</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> use a similar amount of products as <b style='color:%s'>districts with lower test scores</b>. </span>""" % (palette_green,palette_red) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=700)

fig.show()

- One insight we can notice is that since the pandemic began all groups adopted the use of more online products to supplement their teaching. Which leads us to believe that this analysis has more value than previously anticipated since more districts are all using more products it would make sense that they would want to know which are being adopted for use by the best students and best school districts.
- There is not a significant difference across each test score group, if you look closely it does seem like less products are used, but its a very small difference of about 5-10 products over a 7 day moving period. Which isn't very much whatsoever

Now lets breakdown the products provided. Within the provided URLs we have 372 different products with different categorical variables.

One of the fields provided in the 372 most used products is 'Primary Essential Function'. Below we have the counts of the number of products in each grouping. Instead of plotting them all we will take a look at a few that may have a correlation with higher test scores.

In [None]:
products['Primary Essential Function'].value_counts().reset_index().style.background_gradient(cmap='Greens')

#  Career Planning & Job Search

Among these there are a few that are interesting groups. The first one that will be looked at is <b>LC - Career Planning & Job Search</b>. It should be expected that the better performing school districts have more students engaged and using sites that allow them to investigate career planning and job searches. What are the URLs in that grouping?

In [None]:
products[products['Primary Essential Function']=='LC - Career Planning & Job Search']['URL'].tolist()

- Linkedin: professional networking
- naviance: american college and career readiness software 
- commonapp: college application form

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['Primary Essential Function']=='LC - Career Planning & Job Search'].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 20), 100, [10], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 50, [10])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 170, [10,10,10,10], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Engagement of Grouped by NAEP Score</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 150, [10], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Districts with higher test scores investigate career opportunities</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> engage more with career related URLs </span>""" % (palette_green) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

- As expected, districts with better test outcomes have higher interest in career focused URLs.

How does this stack up on a day to day basis?

In [None]:
pivot_engagement_naep = engagement_naep.pivot(index='time', columns='ADJ_GR08_MATHEMATICS_percentile_groups',values=['engagement_index'])
percent_greater_than = pd.DataFrame({'Test Score Group':['80-100%','60-80%','40-60%','20-40%'] ,
             'Percent of Days with greater engagement than lower scoring test groups':[
                 100*sum((pivot_engagement_naep[('engagement_index', ']0.8 - 1.0')]  > pivot_engagement_naep[('engagement_index',   '0 - 0.2[')]) &
    (pivot_engagement_naep[('engagement_index', ']0.8 - 1.0')]  > pivot_engagement_naep[('engagement_index',   '[0.2 - 0.4[')]) &
    (pivot_engagement_naep[('engagement_index', ']0.8 - 1.0')]  > pivot_engagement_naep[('engagement_index',   '[0.4 - 0.6[')]) &
    (pivot_engagement_naep[('engagement_index', ']0.8 - 1.0')]  > pivot_engagement_naep[('engagement_index',   '[0.6 - 0.8[')]))/pivot_engagement_naep.shape[0],
                 100*sum((pivot_engagement_naep[('engagement_index', '[0.6 - 0.8[')]  > pivot_engagement_naep[('engagement_index',   '0 - 0.2[')]) &
    (pivot_engagement_naep[('engagement_index', '[0.6 - 0.8[')]  > pivot_engagement_naep[('engagement_index',   '[0.2 - 0.4[')]) &
    (pivot_engagement_naep[('engagement_index', '[0.6 - 0.8[')]  > pivot_engagement_naep[('engagement_index',   '[0.4 - 0.6[')]))/pivot_engagement_naep.shape[0],
                 100*sum((pivot_engagement_naep[('engagement_index', '[0.4 - 0.6[')]  > pivot_engagement_naep[('engagement_index',   '0 - 0.2[')]) &
    (pivot_engagement_naep[('engagement_index', '[0.4 - 0.6[')]  > pivot_engagement_naep[('engagement_index',   '[0.2 - 0.4[')]))/pivot_engagement_naep.shape[0],
                 100*sum(pivot_engagement_naep[('engagement_index', '[0.2 - 0.4[')]  > pivot_engagement_naep[('engagement_index',   '0 - 0.2[')])/pivot_engagement_naep.shape[0]
             ]})
percent_greater_than.style.background_gradient(cmap='Greens')

There seems to be an overlap in some of the middle test score groups, but clearly the best performing districts have much higher engagement and the lowest performing districts have the lowest engagement.

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['Primary Essential Function']=='LC - Career Planning & Job Search'].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["pct_access"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 900,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 20), 0.3, [0.05], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 0.01, [0.05])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 1, 5), 0.6, [0.04,0.04,0.04,0.04], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Percent Access of Grouped by NAEP Score</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 0.6, [0.05], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Districts with higher test scores access career URLs more</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> access career related URLs more often </span>""" % (palette_green) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=700)

fig.show()

- A much higher proportion of students access and are more engaged with career related URLS

In [None]:
pivot_engagement_naep = engagement_naep.pivot(index='time', columns='ADJ_GR08_MATHEMATICS_percentile_groups',values=['pct_access'])
percent_greater_than = pd.DataFrame({'Test Score Group':['80-100%','60-80%','40-60%','20-40%'] ,
             'Percent of Days with greater access than lower scoring test groups':[
                 100*sum((pivot_engagement_naep[('pct_access', ']0.8 - 1.0')]  > pivot_engagement_naep[('pct_access',   '0 - 0.2[')]) &
    (pivot_engagement_naep[('pct_access', ']0.8 - 1.0')]  > pivot_engagement_naep[('pct_access',   '[0.2 - 0.4[')]) &
    (pivot_engagement_naep[('pct_access', ']0.8 - 1.0')]  > pivot_engagement_naep[('pct_access',   '[0.4 - 0.6[')]) &
    (pivot_engagement_naep[('pct_access', ']0.8 - 1.0')]  > pivot_engagement_naep[('pct_access',   '[0.6 - 0.8[')]))/pivot_engagement_naep.shape[0],
                 100*sum((pivot_engagement_naep[('pct_access', '[0.6 - 0.8[')]  > pivot_engagement_naep[('pct_access',   '0 - 0.2[')]) &
    (pivot_engagement_naep[('pct_access', '[0.6 - 0.8[')]  > pivot_engagement_naep[('pct_access',   '[0.2 - 0.4[')]) &
    (pivot_engagement_naep[('pct_access', '[0.6 - 0.8[')]  > pivot_engagement_naep[('pct_access',   '[0.4 - 0.6[')]))/pivot_engagement_naep.shape[0],
                 100*sum((pivot_engagement_naep[('pct_access', '[0.4 - 0.6[')]  > pivot_engagement_naep[('pct_access',   '0 - 0.2[')]) &
    (pivot_engagement_naep[('pct_access', '[0.4 - 0.6[')]  > pivot_engagement_naep[('pct_access',   '[0.2 - 0.4[')]))/pivot_engagement_naep.shape[0],
                 100*sum(pivot_engagement_naep[('pct_access', '[0.2 - 0.4[')]  > pivot_engagement_naep[('pct_access',   '0 - 0.2[')])/pivot_engagement_naep.shape[0]
             ]})
percent_greater_than.style.background_gradient(cmap='Greens')

Lets look at the specific URLs in the list:

Linkedin: http://www.linkedin.com

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['URL']=='http://www.linkedin.com'].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 20), 10, [1], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 1, [1])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 20, [1,1,1,1], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Engagement on Linkedin by NAEP Score</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 20, [1], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Linkedin: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> engage more with Linkedin </span>""" % (palette_green) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['URL']=='http://www.linkedin.com'].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["pct_access"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 900,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 20), 0.1, [0.01], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 0.01, [0.01])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2020, 11, 5), 0.15, [0.01,0.01,0.01,0.01], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Percent Access of Grouped by NAEP Score</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 0.15, [0.01], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Linkedin: Percent Access</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> access Linkedin more often </span>""" % (palette_green) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=700)

fig.show()

Now lets look at commonapp, a website for college applications: https://www.commonapp.org/


In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['URL']=='https://www.commonapp.org/'].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 20), 100, [15], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 10, [15])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 225, [15,15,15,15], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Engagement on Commonapp by NAEP Score</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 200, [25], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Commonapp: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> engage more with Commonapp </span>""" % (palette_green) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['URL']=='https://www.commonapp.org/'].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["pct_access"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 900,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 20), 0.3, [0.05], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 0.1, [0.05])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 1, 5), 0.6, [0.05,0.05,0.05,0.05], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Percent Access of Commonapp by NAEP Score</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 0.6, [0.04], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Commonapp: Percent Access</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> access Commonapp more often </span>""" % (palette_green) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=700)

fig.show()

- As expected better performing districts have more engagement with career and college application platforms
- Should be expected that better performing districts have more applications to colleges and universities

# Encyclopedia Resources

Next we will take a look at another grouping: <b> LC - Sites, Resources & Reference - Encyclopedia </b>

Hopefully here we can reveal more patterns among the best performing districts

In [None]:
products[products['Primary Essential Function']=='LC - Sites, Resources & Reference - Encyclopedia']

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['Primary Essential Function']=='LC - Sites, Resources & Reference - Encyclopedia'].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 50, [10], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 20, [10])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 175, [15,15,15,15], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Encyclopedia Engagement of Grouped by NAEP Score</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 220, [10], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Encyclopedia: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> utilize encyclopedia resources more often </span>""" % (palette_green) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['Primary Essential Function']=='LC - Sites, Resources & Reference - Encyclopedia'].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["pct_access"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 900,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 0.6, [0.1], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 0.1, [0.1])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 1, 5), 1.4, [0.1,0.1,0.1,0.1], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Percent Access Encyclopedia by NAEP Score</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 1.5, [0.1], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Encyclopedia: Percent Access</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> utilize encyclopedia resources more often </span>""" % (palette_green) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=700)

fig.show()

Again we can very clearly see a trend amongst each group

Lets take a look at wikipedia specifically since its a well known tool that many people are familiar with:

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['URL']=='http://www.wikipedia.org/'].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 100, [15], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 10, [15])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 225, [25,25,25,25], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Engagement on Wikipedia by NAEP Score</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 370, [20], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Wikipedia: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> engage more with Wikipedia </span>""" % (palette_green) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['URL']=='http://www.wikipedia.org/'].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["pct_access"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 900,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 5), 0.6, [0.2], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 5), 0.1, [0.2])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 5), 2.5, [0.25,0.25,0.25,0.25], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Percent Access Wikipedia by NAEP Score</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 3.5, [0.2], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Wikipedia: Percent Access</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> utilize wikipedia resources more often </span>""" % (palette_green) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=700)

fig.show()

- Students with better outcomes seem to use encyclopedia resources more often 

# Thesaurus & Dictionary Resources

Students with better outcomes seem to use encyclopedia resources more effectively

Do they also use thesaurses and dictionary resources more as well? We now look at <b> LC - Sites, Resources & Reference - Thesaurus & Dictionary</b>

In [None]:
products[products['Primary Essential Function']=='LC - Sites, Resources & Reference - Thesaurus & Dictionary']

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['Primary Essential Function']=='LC - Sites, Resources & Reference - Thesaurus & Dictionary'].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 50, [5], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 20, [5])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 125, [15,15,15,15], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Dictionary/Thesaurus Engagement </span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 125, [10], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Dictionary/Thesaurus: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> utilize Dictionary/Thesaurus resources more often </span>""" % (palette_green) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['Primary Essential Function']=='LC - Sites, Resources & Reference - Thesaurus & Dictionary'].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["pct_access"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 900,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 0.6, [0.1], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 0.1, [0.1])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2020, 9, 5), 1, [0.1,0.1,0.1,0.1], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Percent Access Dictionary/Thesaurus</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 1, [0.1], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Dictionary/Thesaurus: Percent Access</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> utilize Dictionary/Thesaurus resources more often </span>""" % (palette_green) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=700)

fig.show()

- It looks like the best performing students use and engage more often with resources like wikipedia, thesaurses and dictionaries.

- Its possible that worse performing schools are not as aware or know how to utilize free internet resources as well as schools that perform better

# Plagiarism Detection Products

Next we turn our attention to <b>plagiarism detection products</b>. Some districts use tools like turnitin to automatically check for plagiarism. Its possible that poorer performing school districts are less likely to catch students submitting work that is not theirs 

In [None]:
products[products['LP ID'].isin([57724,73323])]

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['LP ID'].isin([57724,73323])].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 50, [5], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 20, [5])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 125, [15,15,15,15], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Plagiarism Detection Engagement </span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 175, [10], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Plagiarism Detection: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with worse test scores</b> do not engage with plagiarism detection products resources as often </span>""" % (palette_red) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

Interestingly, here we do not have a perfect trend where the highest scoring group uses plagiarism detection products the most often, but we have a clear trend where the lowest performing districts are almost always using it the least often

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['LP ID'].isin([57724,73323])].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["pct_access"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 900,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 0.6, [0.05], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 0.1, [0.05])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2020, 9, 5), 1, [0.05,0.05,0.05,0.05], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Percent Access Plagiarism Detection</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 1, [0.05], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Plagiarism Detection: Percent Access</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with worse test scores</b> do not engage with plagiarism detection products resources as often </span>""" % (palette_red) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=700)

fig.show()

- Its very important for schools to utilize whatever resources they can to ensure academic integrity. If integrity is not ensured then students cheat and lose out on learning valuable information.

# Google Docs & Drive

First we look at google docs and drive. Here we expect the best performing districts to have high usage. The best students are most likely to spend a lot of time interacting with document creation services. Google docs and drive happen to be some of the most popular ones.

In [None]:
products[products['LP ID'].isin([95731,99916])]

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['LP ID'].isin([95731,99916])].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 6000, [750], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 2000, [750])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 15000, [950,950,950,950], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Google docs/drive Engagement </span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 16000, [750], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Google Docs/Drive: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with worse test scores</b> do not engage with google docs or drive as often </span>""" % (palette_red) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

- As expected, better students spend more time interacting with document creation services
- Does the same trend for percent access?

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['LP ID'].isin([95731,99916])].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["pct_access"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 900,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["pct_access"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 15, [2], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 5, [2])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2020, 7, 25), 25, [2,2,2,2], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Percent Access Google Docs/Drive</span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 27, [1], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Google Docs/Drive: Percent Access</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with worse test scores</b> do not engage with google docs/drive as often </span>""" % (palette_red) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=700)

fig.show()

- Clearly we have confirmed that students that perform better engage more often with google docs and drive
- This begs the question... are worse students distracted?



# Distractions:

The purpose of this section to attempt to understand if worse performing school districts have students that are more distracted than others?

We analyze in this section social media, video streaming sites and video games.

Two video games: minecraft and among us clearly pop out from the products list

In [None]:
products[products['LP ID'].isin([18090,32555])]

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['LP ID'].isin([18090])].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 5, [1], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 1, [1])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 10, [1,1,1,1], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Minecraft Engagement </span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 10, [1], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Minecraft: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>Districts with better test scores</b> have high engagement with minecraft </span>""" % (palette_green) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

- Better performing districts seem to play more minecraft? We would expect that districts that have higher engagement with video games might perform worse

Now lets take a look at among us. Another popular game during 2020. The data was only collected since around the time of october 2020 for among us.

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['LP ID'].isin([32555])].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 1.5, [0.2], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 0.5, [0.2])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 3, [0.2,0.2,0.2,0.2], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Among Us Engagement </span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 11, 15), 3, [0.2], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Among Us: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>There is no clear trend</b> both low and high scoring districts have high engagement with Among Us </span>""" % (palette_darkgrey) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

- There appears to be no clear trend from the engagement with video games

Next we take a look at social media platforms like instagram and facebook:

In [None]:
products[products['LP ID'].isin([13983,36859])]

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['LP ID'].isin([13983])].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 55, [3], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 50, [3])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 80, [4,4,4,4], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Facebook Engagement </span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 9, 15), 80, [3], width=350)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'>Facebook: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>There is no clear trend</b> both low and high scoring districts have high engagement with Facebook </span>""" % (palette_darkgrey) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

- No clear trend
- It looks like a pattern is emerging where students aren't being that distracted

We continue with instagram

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['LP ID'].isin([36859])].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 600, [50], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 200, [50])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 1000, [50,50,50,50], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Instagram Engagement </span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 9, 15), 800, [50], width=350)


# # title annotation
# text = [
#     "<span style='font-size:26px; font-family:Times New Roman;'>Instagram: Engagement</span>", 
#     """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>There is no clear trend</b> both low and high scoring districts have high engagement with Instagram </span>""" % (palette_darkgrey) 
    
# ]

# annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'> I nstagram: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>There is no clear trend</b>  </span>""" % (palette_darkgrey) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

- Here it looks like better performing districts engage with instagram more

Since we have covered video games and two of the most popular social media sites, lets take a look at streaming services like: Youtube, netflix and disneyplus.

In [None]:
products[products['LP ID'].isin([90153,61292,36620])]

First we start with youtube

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['LP ID'].isin([61292])].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 2000, [400], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 4000, [400])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 7000, [400,400,400,400], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average YouTube Engagement </span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 8, 15), 5000, [400], width=350)


# # title annotation
# text = [
#     "<span style='font-size:26px; font-family:Times New Roman;'>Instagram: Engagement</span>", 
#     """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>There is no clear trend</b> both low and high scoring districts have high engagement with Instagram </span>""" % (palette_darkgrey) 
    
# ]

# annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'> YouTube: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>There is no clear trend</b> but it looks like poor performing districts spend a lot of time on youtube.  </span>""" % (palette_darkgrey) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

The top 60-80% group follow very closely to the bottom 20 percentile grouping.

Now lets take a look at the 2 video streaming services that offer episodes and movies, whereas youtube is mostly short clips.

First we work with netflix:

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['LP ID'].isin([90153])].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 150, [20], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 50, [20])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 300, [20,20,20,20], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average Netflix Engagement </span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 300, [20], width=350)


# # title annotation
# text = [
#     "<span style='font-size:26px; font-family:Times New Roman;'>Instagram: Engagement</span>", 
#     """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>There is no clear trend</b> both low and high scoring districts have high engagement with Instagram </span>""" % (palette_darkgrey) 
    
# ]

# annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)


# title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'> Netflix: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>There is no clear trend</b>  </span>""" % (palette_darkgrey) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

- Not an exact trend once again, seems like better performing districts use netflix more throughout the year, while the worser performing districts are using it closer to the average

Last but not least we turn to disneyplus

In [None]:
engagement_naep = engagement_districts_products[engagement_districts_products['LP ID'].isin([36620])].groupby(["ADJ_GR08_MATHEMATICS_percentile_groups","time"])["engagement_index"].mean().reset_index()

moving_average_window = 14;

layout = dict(
    margin = dict(t=150),
    xaxis = dict(showline=True, linewidth=1, linecolor=palette_darkgrey, dtick="M1",tickformat="%b\n%Y"),
    yaxis = dict(showline=False, showgrid=True, gridwidth=1, gridcolor='#ddd', linecolor=palette_darkgrey),
    showlegend = False,
    hovermode="x unified",
    width = 1000,
    height = 600,
    plot_bgcolor= "#fff",
    hoverlabel=dict(
        bgcolor="white",
        font_size=12
    )
)

fig = go.Figure(layout=layout)

engagement_high_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="]0.8 - 1.0"]

fig.add_trace(go.Scatter(
                    x=engagement_high_naep["time"], 
                    y= engagement_high_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_green, width=3),
                    name='     80-100%'))

engagement_low_naep= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=="0 - 0.2["]
fig.add_trace(go.Scatter(
                    x=engagement_low_naep["time"], 
                    y= engagement_low_naep["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_red, width=1.5),
                    name='  0-20%'))


engagement_02_04= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.2 - 0.4[']
fig.add_trace(go.Scatter(
                    x=engagement_02_04["time"], 
                    y= engagement_02_04["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey4, width=1.5),
                    name='  20-40%'))

engagement_04_06= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.4 - 0.6[']
fig.add_trace(go.Scatter(
                    x=engagement_04_06["time"], 
                    y= engagement_04_06["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey3, width=1.5),
                    name='  40-60%'))

engagement_06_08= engagement_naep[engagement_naep["ADJ_GR08_MATHEMATICS_percentile_groups"]=='[0.6 - 0.8[']
fig.add_trace(go.Scatter(
                    x=engagement_06_08["time"], 
                    y= engagement_06_08["engagement_index"].rolling(moving_average_window).mean(),
                    mode='lines',
                    line= dict(color=palette_grey2, width=1.5),
                    name='  60-80%'))

text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%% </b>" % (palette_green),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 40, [5], width=100)


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>NAEP Test Score </b>" % (palette_red)
]

annotation_helper(fig, text, datetime.date(2021, 2, 12), 20, [5])


text = [
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>0-20%%</b>" % (palette_red),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>20-40%%</b>" % (palette_grey4),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>40-60%%</b>" % (palette_grey3),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>60-80%%</b>" % (palette_grey2),
    "<b style='color:%s; font-family:Tahoma; font-size:12px'>80-100%%</b>" % (palette_green)
]

annotation_helper(fig, text, datetime.date(2021, 1, 12), 100, [5,5,5,5], width=60, bgcolor="rgba(255,255,255,0.7)")


text = [
    "<span style='color:%s; font-family:Tahoma; font-size:14px'>Average D isney Engagement </span>" % palette_darkgrey,
    "<span style='color:%s; font-family:Tahoma; font-size:13px'>14 day moving average</span>" % palette_grey2,
]

annotation_helper(fig, text, datetime.date(2020, 4, 15), 100, [5], width=350)


# # title annotation
text = [
    "<span style='font-size:26px; font-family:Times New Roman;'> D isney: Engagement</span>", 
    """<span style='font-size:13px; font-family:Helvetica'><b style='color:%s'>There is no clear trend</b>  </span>""" % (palette_darkgrey) 
    
]

annotation_helper(fig, text, 0.9, 1.375, [0.12,0.055,0.055], ref="paper", width=800)

fig.show()

- Nope. Students aren't being very distracted by Disney+

After looking at lots of ways in which students can be distracted while learning online it doesn't seem like anything conclusive was found.

Hopefully this analysis illuminates that poorer performing districts may not be dealing with problems where students are just distracted by video games, social media or video streaming on their laptop directly. While its possible distractions exist in other facets of their lives, the computer that they are using doesn't seem to indicate that they are distracted.

I hope you enjoyed this analysis and I expect some of the findings here to be of use to LearnPlatform and educators

# Next Steps:

- A fuller analysis without all of the demographic data being obfuscasted would be nice
- Not sure if possible but maybe learn platform tracks individual students
- Segmenting activity by grade level would also leading to interesting results

# References:

1. NAEP information (https://en.wikipedia.org/wiki/National_Assessment_of_Educational_Progress)
2. Beautiful visualizations: spitfire2nd kaggle notebook (https://www.kaggle.com/spitfire2nd/the-learning-gap/notebook)