In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))
import glob
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import LinearSegmentedColormap

import warnings
warnings.filterwarnings('ignore')

import seaborn as sns
import missingno
import calendar

# About LearnPlatform
<br>
<br>
<center>
<iframe width="600" height="337" src="https://www.youtube.com/embed/Fy19PNEXe1M" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
<br>
**LearnPlatform in 100 seconds** (YouTube, 2020, [Link](https://youtu.be/Fy19PNEXe1M))
</center>
<br>
Launched in 2014, LearnPlatform is home to an award-winning team of educators, researchers, technologists and thought leaders who build and deliver ground-breaking tools that increase the capacity of educators and their organizations to research, select and evaluate digital learning products that best meet the current needs of their students. 

*(From LearnPlatform, [About-Us](https://learnplatform.com/about-us))*

In [None]:
CMAP = ["#69C9D6", "#8AC964"]
LP_CMAP = {0 : CMAP[0], 1 : CMAP[1]}

cmap_learnplatform = LinearSegmentedColormap.from_list('mycmap', ['#69C9D6',"#66C9D6", "#7AC997", "#93C948", '#8AC964'])

fig, ax = plt.subplots(figsize=(12, 2))
fig.subplots_adjust(bottom=0.5)

cmap = mpl.cm.cool
norm = mpl.colors.Normalize(vmin=0, vmax=1)

cb1 = mpl.colorbar.ColorbarBase(ax, cmap=cmap_learnplatform,
                                norm=norm,
                                orientation='horizontal',
                               ticks=[0,1])
cb1.set_label('Colormap from LearnPlatform logo' , size=16)
cb1.ax.set_xticklabels(['Low', 'High'], size=12)

fig.show()

<br>
<br>
<img src="https://i.imgur.com/ckHd3Ko.png" width="700px">

# Pre-Processing

In [None]:
products_info = pd.read_csv("../input/learnplatform-covid19-impact-on-digital-learning/products_info.csv")

districts_info = pd.read_csv("../input/learnplatform-covid19-impact-on-digital-learning/districts_info.csv") # For merge (left join), from int64 to object
districts_info['district_id'] = districts_info['district_id'].apply(str) # For merge (left join), from int64 to string

path = '../input/learnplatform-covid19-impact-on-digital-learning/engagement_data' 
all_files = glob.glob(path + "/*.csv")

collect = []

for filename in all_files:
    temp_df = pd.read_csv(filename, index_col=None, header=0)
    district_id = filename.split("/")[4].split(".")[0]
    temp_df["district_id"] = district_id
    collect.append(temp_df)
    
engagement_data = pd.concat(collect)
engagement_data = engagement_data.reset_index(drop=True)

df = pd.merge(left=engagement_data, right=products_info, how='left', left_on='lp_id', right_on='LP ID', sort=False)
df = pd.merge(left=df, right=districts_info, how='left', left_on='district_id', right_on='district_id', sort=False)

del [[products_info, districts_info, engagement_data]] #for memory saving

df['time'] = pd.to_datetime(df['time'])
df['Year'] = df['time'].dt.year
df['Month'] = df['time'].dt.month
df['Week'] = df['time'].dt.isocalendar().week

df['pct_engagement'] = df['engagement_index']/1000

df['locale_2'] = np.where(df['locale'].str.contains('Rural'), 'Rural', 'Urban')

# To save memory - delete non-useful columns

df = df.drop(['lp_id','URL', 'engagement_index'], axis=1)

# What is the picture of digital connectivity and engagement in 2020?

Internet Access Services: Status as of December 31, 2018
Data Dictionary for County-Level Data from FCC Form 477 (N=3,234)

* statename = State Name
* hhs = Households, in thousands (Census: 2014-2018 5-year ACS)
* ratio = residential / hhs
* Value -9999 = data withheld to maintain firm confidentiality

Data From [FCC (Federal Communications Commission)](https://www.fcc.gov/form-477-county-data-internet-access-services) 

In [None]:
connections_data = pd.read_csv("../input/fcc-form-477-data-as-of-december-31-2018/county_connections_dec_2018.csv")

# to exclude Value -9999 = data withheld to maintain firm confidentiality :
connections_data = connections_data[connections_data['ratio'] > 0] 

# to get household weighted connectivity ratio as state level :
connections_data['hhs_times_ratio'] = connections_data['hhs'] * connections_data['ratio']
connections_data_temp = connections_data.groupby('statename')
connections_data = connections_data_temp['hhs_times_ratio'].sum() / connections_data_temp['hhs'].sum()
connections_data = pd.DataFrame({'state':connections_data.index, 'county_connections_ratio(FCC)':connections_data.values}) # series to dataframe

df = pd.merge(left=df, right=connections_data, how='left', left_on='state', right_on='state', sort=False) # add on main df

del [connections_data] #for memory saving

In [None]:
FCC19_44A1_APDX2 = pd.read_csv("../input/fcc1944a1pages5054-appendix2/FCC-19-44A1-pages-50-54_appendix2.csv")

df = pd.merge(left=df, right=FCC19_44A1_APDX2, how='left', left_on=['state','locale_2'], right_on=['state','locale_2'], sort=False) # add on main df

del [FCC19_44A1_APDX2] #for memory saving

In [None]:
conn_engage_data = df.groupby(['state']) \
       .agg(count=('pct_engagement', 'count'), mean=('pct_engagement', 'mean')) \
       .reset_index()

#State list sort by high engagement index
High_Eng_List = conn_engage_data.sort_values(by='mean', ascending=False).state.values 

#Data for Heatmap : State and locale
conn_engage_heat = df.groupby(['locale','state']) \
       .agg(count=('pct_engagement', 'count'), mean=('pct_engagement', 'mean')) \
       .reset_index()
conn_engage_heat = conn_engage_heat.pivot("locale", "state", "mean")
conn_engage_heat = conn_engage_heat.reindex(['City','Suburb','Town','Rural']) #Sort locale
conn_engage_heat = conn_engage_heat[High_Eng_List] #Sort state

labels = conn_engage_heat.fillna(0) * 100
labels = labels.astype('int64').astype('str').replace("0", "")+ "%"

fig = plt.figure(figsize=(20,6))
fig.patch.set_facecolor('#EEEEEE')
fig.patch.set_alpha(1.0)

ax1 = sns.heatmap(conn_engage_heat, vmin=0.07, vmax=0.31, linewidths=0, cmap = cmap_learnplatform, cbar=True, annot=labels, fmt = '')
ax1.patch.set_facecolor('#FFFFFF')
ax1.patch.set_alpha(1.0)

cbar = ax1.collections[0].colorbar
cbar.set_ticks([0.07, 0.14, 0.31])
cbar.set_ticklabels(['7% (Bottom20%)', '14% (Median)', '31% (Top20%)'])

ax1 = plt.xticks(rotation = 70, size=12)
ax1 = plt.yticks(rotation = 0 , size=14)

ax1 = plt.xlabel("State")
ax1 = plt.ylabel("Locale")

ax1 = plt.title('Unit: Engagement%\nPeriod: 2020', fontsize = 11, loc='right')
ax1 = plt.title('Average Engagement by States and Locale',
          fontsize = 18,
          fontweight = 'heavy',
          loc='center', 
          pad=30); #semi-colon for hide text before graph output

Urban: City > Sub-Urban > Town
Rubal

In [None]:
conn_engage_locale_2 = df.groupby(['state', 'locale_2', 'county_connections_ratio(FCC)']) \
       .agg(count=('pct_engagement', 'count'), mean=('pct_engagement', 'mean')) \
       .reset_index()

fig = plt.figure(figsize=(15,7))

plt.tight_layout()

fig.patch.set_facecolor('#EEEEEE')
fig.patch.set_alpha(1.0)

ax1 = plt.subplot(1,2,1)

ax1 = sns.scatterplot(data=conn_engage_locale_2[conn_engage_locale_2['locale_2'] == 'Urban'], x="county_connections_ratio(FCC)", y="mean", hue="mean", palette=cmap_learnplatform, s=300)
ax1.patch.set_facecolor('#FFFFFF')
ax1.patch.set_alpha(1.0)
ax1.get_legend().remove()

def label_point(x, y, val, ax):
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point['x']-0.005, point['y']+0.01, str(point['val']))
label_point(conn_engage_locale_2[conn_engage_locale_2['locale_2'] == 'Urban']["county_connections_ratio(FCC)"], conn_engage_locale_2[conn_engage_locale_2['locale_2'] == 'Urban']["mean"], conn_engage_locale_2[conn_engage_locale_2['locale_2'] == 'Urban']["state"], plt.gca()) 

#X축 & Y축 표시 버그 있음
vals = ax1.get_yticks()
ax1.set_yticklabels(['{:,.0%}'.format(x) for x in vals])

vals = ax1.get_xticks()
ax1.set_xticklabels(['{:,.0%}'.format(x) for x in vals])

ax1 = plt.xlim(0.75, 1.00)
ax1 = plt.ylim(0   , 0.90)

ax1 = plt.axhline(0.13, 0, 1, color='lightskyblue', linestyle='--', linewidth='3')
ax1 = plt.text(0.982, 0.14, 'Median\n(13%)', fontsize=12, fontweight='bold', color='lightskyblue', horizontalalignment='center')

ax1 = plt.xlabel("County Connections Ratio(FCC)")
ax1 = plt.ylabel("Engagement% (mean)")

ax1 = plt.gca().spines['right'].set_visible(False)
ax1 = plt.gca().spines['top'].set_visible(False)
ax1 = plt.gca().spines['left'].set_visible(True)
ax1 = plt.gca().spines['bottom'].set_visible(True)

ax1 = plt.title('City, Suburb, Town', fontsize = 9, loc='right')
ax1 = plt.title('Urban',
          fontsize = 20,
          fontweight = 'heavy',
          loc='center', 
          pad=10); #semi-colon for hide text before graph output

ax2 = plt.subplot(1,2,2)

ax2 = sns.scatterplot(data=conn_engage_locale_2[conn_engage_locale_2['locale_2'] == 'Rural'], x="county_connections_ratio(FCC)", y="mean", hue="mean", palette=cmap_learnplatform, s=300)
ax2.patch.set_facecolor('#FFFFFF')
ax2.patch.set_alpha(1.0)
ax2.get_legend().remove()

def label_point(x, y, val, ax):
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point['x']-0.005, point['y']+0.01, str(point['val']))
label_point(conn_engage_locale_2[conn_engage_locale_2['locale_2'] == 'Rural']["county_connections_ratio(FCC)"], conn_engage_locale_2[conn_engage_locale_2['locale_2'] == 'Rural']["mean"], conn_engage_locale_2[conn_engage_locale_2['locale_2'] == 'Rural']["state"], plt.gca()) 

#X축 & Y축 표시 버그 있음
vals = ax2.get_yticks()
ax2.set_yticklabels(['{:,.0%}'.format(x) for x in vals])

vals = ax2.get_xticks()
ax2.set_xticklabels(['{:,.0%}'.format(x) for x in vals])

ax2 = plt.xlim(0.75, 1.00)
ax2 = plt.ylim(0   , 0.90)

ax2 = plt.axhline(0.31, 0, 1, color='limegreen', linestyle='--', linewidth='3')
ax2 = plt.text(0.982, 0.26, 'Median\n(31%)', fontsize=12, fontweight='bold', color='limegreen', horizontalalignment='center')

ax2 = plt.xlabel("County Connections Ratio(FCC)")
ax2 = plt.ylabel("Engagement% (mean)")

ax2 = plt.gca().spines['right'].set_visible(False)
ax2 = plt.gca().spines['top'].set_visible(False)
ax2 = plt.gca().spines['left'].set_visible(True)
ax2 = plt.gca().spines['bottom'].set_visible(True)

ax2 = plt.title('Rural only', fontsize = 9, loc='right')
ax2 = plt.title('Rural',
          fontsize = 20,
          fontweight = 'heavy',
          loc='center', 
          pad=10); #semi-colon for hide text before graph output



In [None]:
state_coverage_locale = df.groupby(['state', 'locale_2', 'Connected%_of_Pop.']) \
       .agg(count=('pct_engagement', 'count'), mean=('pct_engagement', 'mean')) \
       .reset_index()

fig = plt.figure(figsize=(15,7))

plt.tight_layout()

fig.patch.set_facecolor('#EEEEEE')
fig.patch.set_alpha(1.0)

ax1 = plt.subplot(1,2,1)

ax1 = sns.scatterplot(data=state_coverage_locale[state_coverage_locale['locale_2'] == 'Urban'], x="Connected%_of_Pop.", y="mean", hue="mean", palette=cmap_learnplatform, s=300)
ax1.patch.set_facecolor('#FFFFFF')
ax1.patch.set_alpha(1.0)
ax1.get_legend().remove()

def label_point(x, y, val, ax):
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point['x']-0.005, point['y']+0.01, str(point['val']))
label_point(state_coverage_locale[state_coverage_locale['locale_2'] == 'Urban']["Connected%_of_Pop."], 
            state_coverage_locale[state_coverage_locale['locale_2'] == 'Urban']["mean"], 
            state_coverage_locale[state_coverage_locale['locale_2'] == 'Urban']["state"], plt.gca()) 

#X축 & Y축 표시 버그 있음
vals = ax1.get_yticks()
ax1.set_yticklabels(['{:,.0%}'.format(x) for x in vals])

vals = ax1.get_xticks()
ax1.set_xticklabels(['{:,.0%}'.format(x) for x in vals])

ax1 = plt.xlim(0.60, 1.00)
ax1 = plt.ylim(0   , 0.90)

ax1 = plt.axhline(0.13, 0, 1, color='lightskyblue', linestyle='--', linewidth='3')
ax1 = plt.text(0.800, 0.14, 'Median\n(13%)', fontsize=12, fontweight='bold', color='lightskyblue', horizontalalignment='center')

ax1 = plt.xlabel("Connected%_of_Pop.")
ax1 = plt.ylabel("Engagement% (mean)")

ax1 = plt.gca().spines['right'].set_visible(False)
ax1 = plt.gca().spines['top'].set_visible(False)
ax1 = plt.gca().spines['left'].set_visible(True)
ax1 = plt.gca().spines['bottom'].set_visible(True)

ax1 = plt.title('City, Suburb, Town', fontsize = 9, loc='right')
ax1 = plt.title('Urban',
          fontsize = 20,
          fontweight = 'heavy',
          loc='center', 
          pad=10); #semi-colon for hide text before graph output

ax2 = plt.subplot(1,2,2)

ax2 = sns.scatterplot(data=state_coverage_locale[state_coverage_locale['locale_2'] == 'Rural'], x="Connected%_of_Pop.", y="mean", hue="mean", palette=cmap_learnplatform, s=300)
ax2.patch.set_facecolor('#FFFFFF')
ax2.patch.set_alpha(1.0)
ax2.get_legend().remove()

def label_point(x, y, val, ax):
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point['x']-0.005, point['y']+0.01, str(point['val']))
label_point(state_coverage_locale[state_coverage_locale['locale_2'] == 'Rural']["Connected%_of_Pop."], state_coverage_locale[state_coverage_locale['locale_2'] == 'Rural']["mean"], state_coverage_locale[state_coverage_locale['locale_2'] == 'Rural']["state"], plt.gca()) 

#X축 & Y축 표시 버그 있음
vals = ax2.get_yticks()
ax2.set_yticklabels(['{:,.0%}'.format(x) for x in vals])

vals = ax2.get_xticks()
ax2.set_xticklabels(['{:,.0%}'.format(x) for x in vals])

ax2 = plt.xlim(0.60, 1.00)
ax2 = plt.ylim(0   , 0.90)

ax2 = plt.axhline(0.31, 0, 1, color='limegreen', linestyle='--', linewidth='3')
ax2 = plt.text(0.800, 0.245, 'Median\n(31%)', fontsize=12, fontweight='bold', color='limegreen', horizontalalignment='center')

ax2 = plt.xlabel("County Connections Ratio(FCC)")
ax2 = plt.ylabel("Engagement% (mean)")

ax2 = plt.gca().spines['right'].set_visible(False)
ax2 = plt.gca().spines['top'].set_visible(False)
ax2 = plt.gca().spines['left'].set_visible(True)
ax2 = plt.gca().spines['bottom'].set_visible(True)

ax2 = plt.title('Rural only', fontsize = 9, loc='right')
ax2 = plt.title('Rural',
          fontsize = 20,
          fontweight = 'heavy',
          loc='center', 
          pad=10); #semi-colon for hide text before graph output


# Demographic

In [None]:
Census2017 = pd.read_csv("../input/us-census-demographic-data/acs2017_county_data.csv")

Census2017 = Census2017.drop(['CountyId','Income', 'IncomeErr', 'IncomePerCapErr',], axis=1)

Census2017['Hispanic'] = Census2017['TotalPop'] * Census2017['Hispanic'] / 100
Census2017['White'] = Census2017['TotalPop'] * Census2017['White'] / 100
Census2017['Black'] = Census2017['TotalPop'] * Census2017['Black'] / 100
Census2017['Native'] = Census2017['TotalPop'] * Census2017['Native'] / 100
Census2017['Asian'] = Census2017['TotalPop'] * Census2017['Asian'] / 100
Census2017['Pacific'] = Census2017['TotalPop'] * Census2017['Pacific'] / 100

Census2017['IncomePerCap'] = Census2017['TotalPop'] * Census2017['IncomePerCap'] #for get weight average later

Census2017['Poverty'] = Census2017['TotalPop'] * Census2017['Poverty'] / 100
Census2017['ChildPoverty'] = Census2017['TotalPop'] * Census2017['ChildPoverty'] / 100
Census2017['Professional'] = Census2017['TotalPop'] * Census2017['Professional'] / 100
Census2017['Service'] = Census2017['TotalPop'] * Census2017['Service'] / 100
Census2017['Office'] = Census2017['TotalPop'] * Census2017['Office'] / 100
Census2017['Construction'] = Census2017['TotalPop'] * Census2017['Construction'] / 100
Census2017['Production'] = Census2017['TotalPop'] * Census2017['Production'] / 100
Census2017['Drive'] = Census2017['TotalPop'] * Census2017['Drive'] / 100
Census2017['Carpool'] = Census2017['TotalPop'] * Census2017['Carpool'] / 100
Census2017['Transit'] = Census2017['TotalPop'] * Census2017['Transit'] / 100
Census2017['Walk'] = Census2017['TotalPop'] * Census2017['Walk'] / 100
Census2017['OtherTransp'] = Census2017['TotalPop'] * Census2017['OtherTransp'] / 100
Census2017['WorkAtHome'] = Census2017['TotalPop'] * Census2017['WorkAtHome'] / 100
Census2017['MeanCommute'] = Census2017['TotalPop'] * Census2017['MeanCommute'] / 100
Census2017['Carpool'] = Census2017['TotalPop'] * Census2017['Carpool'] / 100

Census2017['PrivateWork'] = Census2017['TotalPop'] * Census2017['PrivateWork'] / 100
Census2017['PublicWork'] = Census2017['TotalPop'] * Census2017['PublicWork'] / 100
Census2017['SelfEmployed'] = Census2017['TotalPop'] * Census2017['SelfEmployed'] / 100
Census2017['FamilyWork'] = Census2017['TotalPop'] * Census2017['FamilyWork'] / 100
Census2017['Unemployment'] = Census2017['TotalPop'] * Census2017['Unemployment'] / 100

# Group by state
Census2017 = Census2017.groupby(['State']).sum()

# Get ratio
Census2017['Men'] = Census2017['Men'] / Census2017['TotalPop'] * 100
Census2017['Women'] = Census2017['Women'] / Census2017['TotalPop'] * 100

Census2017['Hispanic'] = Census2017['Hispanic'] / Census2017['TotalPop'] * 100
Census2017['White'] = Census2017['White'] / Census2017['TotalPop'] * 100
Census2017['Black'] = Census2017['Black'] / Census2017['TotalPop'] * 100
Census2017['Native'] = Census2017['Native'] / Census2017['TotalPop'] * 100
Census2017['Asian'] = Census2017['Asian'] / Census2017['TotalPop'] * 100
Census2017['Pacific'] = Census2017['Pacific'] / Census2017['TotalPop'] * 100

Census2017['IncomePerCap'] = Census2017['IncomePerCap'] / Census2017['TotalPop'] #get weight average by state level

Census2017['Poverty'] = Census2017['Poverty'] / Census2017['TotalPop'] * 100
Census2017['ChildPoverty'] = Census2017['ChildPoverty'] / Census2017['TotalPop'] * 100
Census2017['Professional'] = Census2017['Professional'] / Census2017['TotalPop'] * 100
Census2017['Service'] = Census2017['Service'] / Census2017['TotalPop'] * 100
Census2017['Office'] = Census2017['Office'] / Census2017['TotalPop'] * 100
Census2017['Construction'] = Census2017['Construction'] / Census2017['TotalPop'] * 100
Census2017['Production'] = Census2017['Production'] / Census2017['TotalPop'] * 100
Census2017['Drive'] = Census2017['Drive'] / Census2017['TotalPop'] * 100
Census2017['Carpool'] = Census2017['Carpool'] / Census2017['TotalPop'] * 100
Census2017['Transit'] = Census2017['Transit'] / Census2017['TotalPop'] * 100

Census2017['VotingAgeCitizen'] = Census2017['VotingAgeCitizen'] / Census2017['TotalPop'] * 100

Census2017['Walk'] = Census2017['Walk'] / Census2017['TotalPop'] * 100
Census2017['OtherTransp'] = Census2017['OtherTransp'] / Census2017['TotalPop'] * 100
Census2017['WorkAtHome'] = Census2017['WorkAtHome'] / Census2017['TotalPop'] * 100
Census2017['MeanCommute'] = Census2017['MeanCommute'] / Census2017['TotalPop'] * 100
Census2017['Carpool'] = Census2017['Carpool'] / Census2017['TotalPop'] * 100

Census2017['Employed'] = Census2017['Employed'] / Census2017['TotalPop'] * 100

Census2017['PrivateWork'] = Census2017['PrivateWork'] / Census2017['TotalPop'] * 100
Census2017['PublicWork'] = Census2017['PublicWork'] / Census2017['TotalPop'] * 100
Census2017['SelfEmployed'] = Census2017['SelfEmployed'] / Census2017['TotalPop'] * 100
Census2017['FamilyWork'] = Census2017['FamilyWork'] / Census2017['TotalPop'] * 100
Census2017['Unemployment'] = Census2017['Unemployment'] / Census2017['TotalPop'] * 100

# df = pd.merge(left=df, right=Census2017, how='left', left_on=['state'], right_on=['State'], sort=False) # add on main df

# del [Census2017] #for memory saving

In [None]:
district_df = df.groupby(['district_id', 'state', 'locale_2', 'Connected%_of_Pop.']) \
       .agg(count=('pct_engagement', 'count'), mean=('pct_engagement', 'mean')) \
       .reset_index()

district_df = pd.merge(left=district_df, right=Census2017, how='left', left_on=['state'], right_on=['State'], sort=False) # add on main df

district_df = district_df.drop(['count'], axis=1)
district_df.rename(columns = {'mean' : 'Engagement%'}, inplace=True)

Engagment_median = district_df['Engagement%'].median()

district_df['Target'] = np.where(district_df['Engagement%'] >= Engagment_median, 1, 0)



In [None]:
covid19_data = pd.read_csv("../input/us-counties-covid-19-dataset/us-counties.csv")
covid19_data['date'] = pd.to_datetime(covid19_data['date'])
covid19_data['year'] = covid19_data['date'].dt.year
covid19_data = covid19_data[covid19_data["year"]==2020]
covid19_day_data = covid19_data.groupby('date')['cases'].sum().to_frame().reset_index()
covid19_day_data['Week'] = covid19_day_data['date'].dt.isocalendar().week
covid19_day_data = covid19_day_data.groupby('Week')['cases'].sum().to_frame()
covid19_day_data['new_cases'] = covid19_day_data['cases'].diff()
covid19_day_data['new_cases'][4] = 13 # Nan for input value
covid19_day_data = covid19_day_data.drop([53]) # not 7 days on week 53 on year 2020

In [None]:
# Data for line graph
engage_trend = df.groupby('Week')['pct_engagement'].mean()

# Graph Drewaing 
fig = plt.figure(figsize=(12,5))

fig.patch.set_facecolor('#F3F7FF')
fig.patch.set_alpha(1.0)

ax1 = sns.lineplot(engage_trend.index, engage_trend.values)

vals = ax1.get_yticks()
ax1.set_yticklabels(['{:,.0%}'.format(x) for x in vals])

ax1.patch.set_facecolor('#F3F7FF')
ax1.patch.set_alpha(1.0)

ax2 = ax1.twinx()
ax2 = sns.barplot(covid19_day_data.index, covid19_day_data['new_cases'], color="blue")

ax1 = plt.gca().spines['right'].set_visible(False)
ax1 = plt.gca().spines['top'].set_visible(False)
ax1 = plt.gca().spines['left'].set_visible(True)
ax1 = plt.gca().spines['bottom'].set_visible(True)


ax1 = plt.ylabel("Engagement% (mean)")

# ax1 = plt.axhline(0, 8, 0, color='black', linestyle='--', linewidth='1')

ax1 = plt.title('(Unit) Engagement Index/1000\n(Engagement) Total page load events\n(Period) 2021\n', fontsize = 9, loc='right')
ax1 = plt.title('Engagement Trend',
          fontsize = 16,
          fontweight = 'heavy',
          loc='center', 
          pad=30); #semi-colon for hide text before graph output

To Be Developed