# LearnPlatform COVID-19 Impact on Digital Learning
${\large{ In \ this\ Notebook\ You \ Will \ go \ through\ an\ analysis\ of\ US\\ Data \ about\ the \ impact\ of\ Covid\ on \ Digital\ Learning.\\ Step\ by\ step \ I \ wish\ you \ good\ time \ while\ you\ scrolling\ down\\ to\ the\ end\ of\ this \ notebook. \\}}$

# Libraries

${\large{ In \ this\ section\ we\ import\ the\ necessary\ libraries. \\}}$

In [None]:
# For managing , processing , cleaning Data:
import pandas as pd
import numpy as np

# For open Data:
import os as os

# For managing data and search for specific informations:
import re

# For geographycal visualisation :
import folium
import geopandas as gpd
from folium.plugins import DualMap
from folium.plugins import TimestampedGeoJson

# For visualisation :
import matplotlib.pyplot as plt
import seaborn as sns 

# To generate some gradian colors :
from matplotlib.cm import YlGn, YlOrBr, OrRd

# legend Colormap:
import branca.colormap as cm

# For more interactive Visualisation :
import plotly.express as px
import plotly.graph_objects as go

# For Create features:
from geojson import Feature, Point, FeatureCollection

# To genearte Word Cloud Picture for illustration purpose 
from wordcloud import WordCloud, STOPWORDS

# To generate datetime objects:
import datetime

# Useful Functions

${\large{ In \ this\ section\ we\ write \ down \ some Function\ to\ reduce \ the \ amout\\ of\ coding. \\}}$


In [None]:
# I- converting to continuous data:

def continuous_data(columns): 
# columns = ['pct_black/hispanic', 'pct_free/reduced', 'county_connections_ratio',
#           'pp_total_raw']     

    d_infos_ = pd.DataFrame(d)

    for column in columns :
        datas = []
        if column == 'pp_total_raw':
            for data in d[column]:
                data = re.findall(r"[-+]?\d*\.\d+|\d+", data)
                data[0] = int(float(data[0]))
                data[1] = int(float(data[1]))
                datas.append(sum(data)/len(data))
        else :
            for data in d[column]:
                data = re.findall(r"[-+]?\d*\.\d+|\d+", data)
                data[0] = float(data[0])
                data[1] = float(data[1])
                datas.append(sum(data)/len(data))
        d_infos_[column] = datas
    return d_infos_

# II- Top 10 products with higher engagement_index per district:

def Top_10():
    # read all engagement data that have data of "district_infos" :
    P_infos_n = P_infos[['LP ID', 'Product Name', 'main_fun']]
    P_infos_n = P_infos_n.rename(columns={"LP ID" : "lp_id"}, inplace=False)
    dfs = {'locale':[], 'state': [], 'ID':[], 'Data':[]}
    for ID, x in zip(d_infos_.district_id, zip(d_infos_.locale, d_infos_.state)) :
        dfs['locale'].append(x[0])
        dfs['state'].append(x[1])
        dfs['ID'].append(ID)

        tmp_df = pd.DataFrame(pd.read_csv(path+'/engagement_data/'+ str(ID)+'.csv')).dropna()
        tmp_df.time = pd.to_datetime(tmp_df.time)    
        tmp_df = pd.merge(tmp_df, P_infos_n, on='lp_id')    

        tmp_df = pd.DataFrame(
        tmp_df[['time', 'lp_id', 'engagement_index', 'Product Name', 'main_fun']].groupby(
            [pd.Grouper(key='time', freq='1M')]
            ).apply(
            lambda x: x.nlargest(1, 'engagement_index'))
                             )
        tmp_df.insert(5,"locale" , x[0] )
        tmp_df.insert(6,"state" , x[1] )

        dfs['Data'].append(pd.DataFrame(tmp_df).dropna())
    
    # concatenate all angagement_data :
    mlind = [(x, y) for x, y in zip(dfs['locale'], dfs['state']) ]
    # mlind = [x for x in dfs['locale'] ]
    DF = pd.concat([pd.DataFrame(df) for df in dfs['Data']], keys= mlind)
    
    return DF

# III-  Map a color for each values of 'pp_total_raw' from
# the districts informations data :

def color_P_T_R_function(x, locale):
    if locale == 'City':
        data_min = All.loc['City']['pp_total_raw'].min()
        data_max = All.loc['City']['pp_total_raw'].max()
    elif locale == 'Rural':
        data_min = All.loc['Rural']['pp_total_raw'].min()
        data_max = All.loc['Rural']['pp_total_raw'].max()
    elif locale == 'Town':
        data_min = All.loc['Town']['pp_total_raw'].min()
        data_max = All.loc['Town']['pp_total_raw'].max()
    elif locale == 'Suburb':
        data_min = All.loc['Suburb']['pp_total_raw'].min()
        data_max = All.loc['Suburb']['pp_total_raw'].max()
    y = (x-data_min)/(data_max-data_min)   
    color_tuple = YlGn(y, bytes=True)[:3]  
    return "#%02x%02x%02x" % color_tuple         

# IV- Geo-Visualisation of the 'PP_Total_raw' :

def PP_Total_raw_map(Locales): # Locales = ['City', 'Suburb', 'Town', 'Rural']
    # Initialization :
    Map = folium.Map([40, -102], zoom_start = 3)
    
    # Create colormap legend :
    Mx = All.pp_total_raw.max()
    Mn = All.pp_total_raw.min()
    scale = np.linspace(Mn,Mx,3)
    colormap = cm.LinearColormap(colors=['yellow', 'green'],
                                 index=[Mn, Mx],vmin=Mn, vmax=Mx,
                                 caption='Per-pupil total expenditure per locale')
    # add legend to map :
    Map.add_child(colormap)

    for L in Locales:
        
        folium.features.GeoJson(name="Click for"+ L ,
                                data=All.loc[L],
                                style_function=lambda feature: {
        'fillColor': color_P_T_R_function(feature['properties']['pp_total_raw'],L),
                                'color' : 'black',
                                'weight' : 1,
                                'fillOpacity' : .9},
                                  show=True,
        tooltip=folium.features.GeoJsonTooltip(
        fields=['locale','state','district_id','pct_black/hispanic',
                'pct_free/reduced','county_connections_ratio',
                'pp_total_raw'],
        aliases=['Locale','State','ID', 'B/H (%)', 'F/R (%)', 'C.C.R', 
                 'PP.T.R'],
        style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
        )
                               ).add_to(Map)
    
    # Custome the non-information for the missing states : 
    
    folium.features.GeoJson(
    name="State with no informations.",
    data=geoJSON_df[~geoJSON_df['state'].isin(All['state'])],
    style_function=lambda feature: {
            'fillColor': '#ffffff',
            'color' : 'black',
            'weight' : 1},
            #'dashArray' : '5, 5'},
    show=True,
    tooltip=folium.features.GeoJsonTooltip(
    fields=['state'],
    aliases=["<b>State </b><br><i>No information to display </i>."],
    style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
            )
        ).add_to(Map)
    
    folium.TileLayer('cartodbdark_matter',name="dark mode",control=True).add_to(Map)
    folium.TileLayer('cartodbpositron',name="light mode",control=True).add_to(Map)

    folium.LayerControl(collapsed=True).add_to(Map)
    
    return Map

# V- chart for Top products visualusation:

def Top_Visual():
    date = [datetime.datetime(int(str(x).split(' ')[0].split('-')[0]),
                          int(str(x).split(' ')[0].split('-')[1]),
                          int(str(x).split(' ')[0].split('-')[2])).strftime("%b %Y")
                          for x in pd.date_range(start='2020-01-01', end='2020-12-31', freq= "M")]

    fig = go.Figure()

    Locale = ['City', 'Town', 'Suburb', 'Rural']   
    colors = ['firebrick', 'steelblue', 'indianred', 'lightsalmon']
    for locale, c in zip(Locale, colors) :

        Data = pd.DataFrame(Top_districts.loc[locale].groupby([pd.Grouper(key='time', freq='1M')],
                                            as_index = False).apply(
            lambda x: x.nlargest(1, 'engagement_index'))
                        )    
        Data['main_fun'] = Data['main_fun'].str.replace('LC','Learning & Curriculum')
        Data['main_fun'] = Data['main_fun'].str.replace('CM','Classroom Management')
        Data['main_fun'] = Data['main_fun'].str.replace('SDO','School & District Operations')

        fig.add_trace(go.Bar(
            x=date,
            y=Data["engagement_index"],
            name=locale,
            marker_color=c,
            hovertext="<b>Product:</b><i>" + Data["Product Name"] +"</i>.<br>"
            +"<b>State:</b><i>"+ Data["state"] + "</i>.<br>"
            +"<b>Essential Function:</b><i>"+ Data["main_fun"] + "</i>."
        ))

    fig.update_layout(
        title=f'Total page-load events per one thousand students per product',
        xaxis_tickfont_size=14,
        xaxis_tickangle=-45,
        yaxis=dict(
            title='Engagement Index',
            titlefont_size=16,
            tickfont_size=14,
        ),
        barmode='group'
    )

    return fig.show()

# In order to create features:

def Create_feature(kind):
    
    # Beging Creating Features
    TmpFeature = []
    for index,row in Group_df.iterrows() :
        long = Lon_lat[Lon_lat['state'] == index[1]]['long'].values[0]
        lat = Lon_lat[Lon_lat['state'] == index[1]]['lat'].values[0]
        radius_data = 'radius/' + kind
        fill_color_data = 'fill color/' + kind
        TmpFeature.append(
            Feature(
                geometry=Point((long,lat)),
                properties={
                    'time': pd.to_datetime(index[0]).__str__(),
                    'style': {'color' : ''},
                    'icon': 'circle',
                    'iconstyle':{
                            'fillColor': row[fill_color_data],
                            'fillOpacity': 0.8,
                            'stroke': 'false',
                            'radius': row[radius_data]
                                        }
                            }
            ))

    return TmpFeature

# VII- Dual map of covud's cases & deaths:

def Dual_map(cases = 'cases', deaths = 'deaths'):
    dualmap = DualMap(location = [40,-102],
                      zoom_start = 3,
                      layout = 'horizontal'
                     )

    TimestampedGeoJson({
                            'type': 'FeatureCollection',
                            'features': Create_feature(deaths),
                            },
                      period = 'P1M',                  
                      auto_play = True).add_to(dualmap.m1)

    folium.features.GeoJson(
        name=state_geo,
        data=state_geo,
        show=True,
        tooltip=folium.features.GeoJsonTooltip(
        fields=['state'],
        aliases=['State'],    
        style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
                )
            ).add_to(dualmap.m1)

    folium.plugins.Fullscreen(position='topright', 
                              title='Full Screen of Deaths Data', 
                              title_cancel='Exit Full Screen of Deaths Data', 
                              force_separate_button=False).add_to(dualmap.m1)

    TimestampedGeoJson({
                            'type': 'FeatureCollection',
                            'features': Create_feature(cases),
                            },
                      period = 'P1M',                  
                      auto_play = True).add_to(dualmap.m2)

    folium.features.GeoJson(
        name=state_geo,
        data=state_geo,
        show=True,
        tooltip=folium.features.GeoJsonTooltip(
        fields=['state'],
        aliases=['State'],    
        style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
                )
            ).add_to(dualmap.m2)

    folium.plugins.Fullscreen(position='topright', 
                              title='Full Screen of Cases Data', 
                              title_cancel='Exit Full Screen of Deaths Data', 
                              force_separate_button=False).add_to(dualmap.m2)


    return dualmap

# VIII - Producsts and Engagement  Index 
def Prod_vs_engagement():
    fig = go.Figure()
    for p in Data_prod['Product Name'].unique() :

        Data = pd.DataFrame(Data_prod[Data_prod['Product Name'] == p])

        Date = [datetime.datetime(int(str(x).split(' ')[0].split('-')[0]),
                              int(str(x).split(' ')[0].split('-')[1]),
                              int(str(x).split(' ')[0].split('-')[2])).strftime("%b %Y")
                              for x in Data.time]

        Data['main_fun'] = Data['main_fun'].str.replace('LC','Learning & Curriculum')
        Data['main_fun'] = Data['main_fun'].str.replace('CM','Classroom Management')
        Data['main_fun'] = Data['main_fun'].str.replace('SDO','School & District Operations')

        fig.add_trace(go.Bar(
            x=Date, 
            y=Data['engagement_index'],
            name = p,
            hovertext=
            "<b>State:</b><i>" + Data['state'] +"</i>.<br>"
            + "<b>Essential Function:</b><i>" + Data['main_fun'] +"</i>.<br>",
            width = 0.2
                            )
                     )
    fig.update_layout(
        title='Total page-load events per one thousand students per product <br> during 2020',
        xaxis_tickfont_size=14,
        xaxis_tickangle=-45,
        yaxis=dict(
            title='Engagement Index',
            titlefont_size=16,
            tickfont_size=14,
        ),
        barmode='group',
        bargap = 0.2
    )    
    return fig.show()

# Handling the data (District informations, Engagement & Products Data):

# I- Extract all the Districts ID with an engagement data:

$\large{ \underline{\textbf{Note:}}\ that\ not\ all\ engagement\ data\ are\ mentioned\\ in\ the\ file:\texttt{districts_info.csv} }$

${ \large{ \underline{\textbf{S}}ince\ the\ file\ name\ of\ eacth\ \texttt{####.csv}\ in\ \texttt{engagement_data}\ \\ stand\ for\ the\ \textbf{ID}\ of\ the\ District\ so\ w'll\ extract\ them\ for future \\ purpose\ (linked\ \texttt{districts_info.csv}\ with\ the\ corresponding \\ engagement\_data).  }}$

In [None]:
# Data engagement:

# file name == district_id:

f_n = pd.DataFrame({'district_id':[]})

path = "../input/learnplatform-covid19-impact-on-digital-learning"

for root, dirc ,files in os.walk( path + '/engagement_data'):
    for name in files:
        new = {'district_id': int(name.split('.')[0])}
        f_n = f_n.append(new, ignore_index = True)
# the amount of engagement data we have:
print(len(f_n.index))
f_n.head()

${ \large{ \underline{\textbf{W}}e\ have\ \textbf{233}\ \texttt{####.csv} \ file\ about\ \texttt{engagement_data}.\\ \underline{\textbf{F}}ortunately\ w'll\ not\ use\ them\ all\ \underline{\textbf{J}}ust\ the\ files\ with\ matching\ \textbf{ID}\\ of\ the\ District\ in\ the\ \texttt{districts_info.csv}.  }}$

# II- Districts informations data:

## II-A. Discription of the data:

| Name | Description |
| :--- | :----------- |
| district_id | The unique identifier of the school district |
| state | The state where the district resides in |
| locale | NCES locale classification that categorizes U.S. territory into four types of areas: City, Suburban, Town, and Rural.|
| pct_black/hispanic | Percentage of students in the districts identified as Black or Hispanic based on 2018-19 NCES data |
| pct_free/reduced | Percentage of students in the districts eligible for free or reduced-price lunch based on 2018-19 NCES data |
| county_connections_ratio | `ratio` (residential fixed high-speed connections over 200 kbps in at least one direction/households) based on the county level data from FCC From 477 (December 2018 version).|
| pp_total_raw | Per-pupil total expenditure (sum of local and federal expenditure) from Edunomics Lab's National Education Resource Database on Schools (NERD$) project.|

In [None]:
d_infos = pd.read_csv(path + '/districts_info.csv')

d_infos = d_infos[d_infos['locale'].isin(['Suburb', 'City', 'Rural', 'Town'])]

d_infos.head()

## II-B. Districts informations data that have engagement data:

In [None]:
d = pd.DataFrame({})
for iD in f_n['district_id']:
    d= d.append(d_infos.loc[d_infos['district_id'] == iD], ignore_index = True)

d = d.dropna()
d = d.reset_index(drop=True)
d.tail()

${ \large{ \underline{\textbf{T}}he\ have\ \texttt{districts_info.csv}\ are\ not\ numeric\ values.\ \underline{\textbf{S}}o\ w'll\\ convert\ them\ by\ taking\ the\ middle\ valuse\ for\ each\ interval.}}$

In [None]:
columns = d.columns[3:]

continuous_data(columns).head()

## II-C. Group by locale and "max pp_total_raw" :

In [None]:
d_infos_ = pd.DataFrame(
   continuous_data(columns).groupby(by = ['locale','state'],as_index = False
                        ).agg('max'))

d_infos_.head() # this is the final data we will procceed with

## II-D. Statistical description:

In [None]:
d_infos_.describe()

# III- Products informations :

## III-A. Discription of the data:

| Name | Description |
| :--- | :----------- |
| LP ID| The unique identifier of the product |
| URL | Web Link to the specific product |
| Product Name | Name of the specific product |
| Provider/Company Name | Name of the product provider |
| Sector(s) | Sector of education where the product is used |
| Primary Essential Function | The basic function of the product. There are two layers of labels here. Products are first labeled as one of these three categories: LC = Learning & Curriculum, CM = Classroom Management, and SDO = School & District Operations. Each of these categories have multiple sub-categories with which the products were labeled. |

## III-B. Cleaning Data:

In [None]:
P_infos = pd.read_csv( path + '/products_info.csv')

# droping duplicates URL & "nan" values :

P_infos.drop_duplicates(subset=['URL'], keep='first', inplace = True)
P_infos = P_infos.dropna()
P_infos = P_infos.reset_index(drop=True)

P_infos.head()

## III-C. Slicing the Primary Essential Function feature of the data to "main", "sec" & "others":

In [None]:
# create columns named : 
# "main_fun" = which should be: LC, CM or SDO
# "sec_fun" & "other" = which should be sub-categories with
# which the products were labeled.

main_fun = []
sec_fun = []
other = []

for fun in P_infos['Primary Essential Function'] : 
    fun = fun.split("-")
    main_fun.append(fun[0])
    sec_fun.append(fun[1])
    if (len(fun)>=3) : other.append(fun[-1])
    else : other.append("no other sub func")

P_infos['main_fun'] = main_fun
P_infos['sec_fun'] = sec_fun
P_infos['other'] = other

P_infos.head()

## III-D. Grouping by the features 'main_fun','sec_fun' & 'Sector(s)': 

In [None]:
P_infos_g = pd.DataFrame(
    P_infos.groupby(by=['main_fun','sec_fun', 'Sector(s)'], 
                    as_index=False)['LP ID'].count()
            )
P_infos_g.tail()

${ \large{ \underline{\textbf{T}}his\ last\ data\ \texttt{P_infos_g}\ is\ helpful\ in\ figuring\ the\ sectors\ targets\\ of\ these\ \underline{\textbf{P}}roducts\ and\ thier\ famous\  Primary Essential Function.}}$

# IV- Engagement Data:

## IV-A. Description of the data:

| Name | Description |
| :--- | :----------- |
| time | date in "YYYY-MM-DD" |
| lp_id | The unique identifier of the product |
| pct_access | Percentage of students in the district have at least one page-load event of a given product and on a given day |
| engagement_index | Total page-load events per one thousand students of a given product and on a given day |

## IV-B. Top districts with a higher engagement index products per month :

In [None]:
Top_districts = Top_10()
Top_districts.head()

## IV-C. Top products with a higher engagement index per month for a districts:

In [None]:
Data_prod = pd.DataFrame(Top_districts.groupby(['Product Name',
                                                Top_districts.time]).apply(
        lambda x: x.nlargest(1, 'engagement_index')))
Data_prod.head()  

# V- Covid-19 Data:

${ \large{ \underline{\textbf{I}}n\ this\ section\ w'll\ use\ the\ Covid-19\ data\ from}}$ [this source](https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv).

In [None]:
url = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv"
covid_df = pd.read_csv(url, parse_dates = ['date'])
covid_df.drop(['fips'], axis = 1, inplace = True)

covid_df = covid_df[covid_df['state'].isin(d_infos_.state)].reset_index()
covid_df.drop(['index'], axis = 1, inplace = True)
covid_df = covid_df[covid_df['date'].isin(pd.date_range(start='2020-01-01', end='2020-12-31'))]

covid_df.tail()

${ \large{ \underline{\textbf{Processing}}\ Covid\ data.\ W'll\ associate\ a\ geometry\ for\ each\ state.\ And\ w'll\ explore\ the\ Coordinates\ of\ the\ us states.\ For\ the\ resource\ check\ this:\  }}$ [Geaometry us Data](http://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us-states.json)

[State Coordinates](https://www.kaggle.com/delendaanouarakacha/longitude-latitude-world-usa?select=longitude_latitude_world_usa.csv).

${ \large{ \underline{\textbf{States Coordinates}} }}$

In [None]:
# Data for Longitude and Latitude :

Lon_lat = pd.read_csv('../input/longitude-latitude-world-usa/longitude_latitude_world_usa.csv')
Lon_lat = Lon_lat[['usa_state_code', 'usa_state_longitude', 'usa_state_latitude', 'usa_state']]
Lon_lat.dropna(subset=['usa_state'], inplace =True)
Lon_lat = Lon_lat.reset_index(drop=True)
Lon_lat = Lon_lat.rename(columns={"usa_state_longitude":"long",
                                  "usa_state_latitude":"lat",
                                  "usa_state":"state",
                                  "usa_state_code":"code"})
Lon_lat = Lon_lat[Lon_lat['state'].isin(d_infos_.state)].reset_index()
Lon_lat.drop(['index'], axis = 1, inplace = True)
Lon_lat.head()    

${ \large{ \underline{\textbf{States Geometries}} }}$

In [None]:
# We import the geoJSON file. 
url = ("https://raw.githubusercontent.com/python-visualization/folium/master/examples/data")
state_geo = f"{url}/us-states.json"

# We read the file and print it.
geoJSON_df = gpd.read_file(state_geo)
geoJSON_df = geoJSON_df[['name', 'geometry']]
geoJSON_df = geoJSON_df.rename(columns={'name':'state'})
geoJSON_df.tail()

${ \large{ \underline{\textbf{Grouped Data}}:\ Customize\ the\ color\ of\ cases \ and\ deaths\ values\\ and\ add\ a\ radius\ features.}}$

In [None]:
# Group Covid data and the geometry of the corresponding staes:
state_geo = geoJSON_df[geoJSON_df['state'].isin(d_infos_.state)].reset_index()
state_geo.drop(['index'], axis = 1, inplace = True)

Merge = pd.merge(state_geo, covid_df, on = 'state')

Group_df = Merge.groupby(by=[pd.Grouper(key='date',freq='M'),
                  'state']).agg({'cases':'sum',
                                 'deaths':'sum',
                                 'geometry':'first'})
# Customize:
def color_function(x, kind):
    if kind == 'cases' :
        data_min = Group_df['cases'].min()
        data_max = Group_df['cases'].max()        
        y = (x-data_min)/(data_max-data_min)    
        color_tuple = YlOrBr(y, bytes=True)[:3]   
    elif kind == 'deaths' :
        data_min = Group_df['deaths'].min()
        data_max = Group_df['deaths'].max()
        y = (x-data_min)/(data_max-data_min)    
        color_tuple = OrRd(y, bytes=True)[:3]   
    return "#%02x%02x%02x" % color_tuple
def radius_function(x, kind):
    if kind == 'cases' :
        data_min = Group_df['cases'].min()
        data_max = Group_df['cases'].max()
        y = (x-data_min)/(data_max-data_min) + 10 
    elif kind == 'deaths' :
        data_min = Group_df['deaths'].min()
        data_max = Group_df['deaths'].max()          
        y = (x-data_min)/(data_max-data_min) + 4
    return y
Group_df['fill color/cases'] = [color_function(X,'cases') for X in Group_df['cases']]
Group_df['fill color/deaths'] = [color_function(X, 'deaths') for X in Group_df['cases']]
Group_df['radius/cases'] = [radius_function(X,'cases') for X in Group_df['cases']]
Group_df['radius/deaths'] = [radius_function(X, 'deaths') for X in Group_df['cases']]

Group_df.head()

# Visualization :

# I- Geographical Visualization:

# A. Districts and thiere informations:

${\large{ \underline{\textbf{A}}bout\ the \ data\ used\ here, \ specifically\ the\ \texttt{geojson}\ one\ check:\ }}$[this](http://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us-states.json).

In [None]:
# Create sub DataFrame by locale:
# Cities:
city_df = d_infos_[d_infos_['locale'] == 'City']
# Suburb:
suburb_df = d_infos_[d_infos_['locale'] == 'Suburb']
# Rural : 
rural_df = d_infos_[d_infos_['locale'] == 'Rural']
# Town :
town_df = d_infos_[d_infos_['locale'] == 'Town']

# Create sub geoDataFrame by locale:
# Geo Cities:
geo_city = geoJSON_df[geoJSON_df['state'].isin(city_df['state'])] 
# Geo rural:
geo_rural = geoJSON_df[geoJSON_df['state'].isin(rural_df['state'])] 
# Geo subrub:
geo_suburb = geoJSON_df[geoJSON_df['state'].isin(suburb_df['state'])] 
# Geo town:
geo_town = geoJSON_df[geoJSON_df['state'].isin(town_df['state'])] 

# Concatenate all sub data with thier geopandas DF : 
All = pd.concat([pd.merge(geo_rural, rural_df, on= 'state'),
                pd.merge(geo_city, city_df, on= 'state'),
                pd.merge(geo_town, town_df, on= 'state'),
                pd.merge(geo_suburb, suburb_df, on= 'state')],
                keys=['Rural','City', 'Town', 'Suburb'] )

${\large{ \underline{\textbf{I}}n\ the\ Map:\\ \textbf{B/H}\ : Percentage\ of\ students\ in\ the\ districts\ identified\ as\ Black\ or\\ Hispanic.\\ \textbf{F/R}:\ Percentage\ of\ students\ in\ the\ districts\ eligible\ for\ or\ reduced-price\ lunch.\\ \textbf{C.C.R}:Residential\ fixed\ hig\ speed\ connection\ over\ 200\ kbps\ in\ at\\ least\ one\ direction/households.\\ \textbf{PP.T.R}:\ Per-pupil\ total\ expenditure.
} }$

In [None]:
Locales = ['City', 'Suburb', 'Town', 'Rural']
PP_Total_raw_map(Locales)

# B. Covid-19 evolution of cases and deaths:

${\large{ \underline{\textbf{Note}}:\\ \hspace{0.5 cm} The\ right\ map\ for\ Cases\ Data.\ And\ the\ left\ one\ for\ Deaths\ Data.}}$

In [None]:
Dual_map()

# II- Chart Visualization :

# Districts Data:

# A. Per-pupil total expenditure per locale :

In [None]:
sns.set_theme(style="darkgrid")

Locale = ['Town', 'City', 'Rural', 'Suburb']

fig, axes = plt.subplots(ncols=2, nrows=2, sharey=True, figsize=(10, 9))


for ax, locale in zip(axes.flat, Locale):
    data = d_infos_[d_infos_['locale']== locale]

    sns.barplot(data= data, x= "state", y= "pp_total_raw", ax=ax)
    ax.set_title(locale)
    xlabels = d_infos_['state'][d_infos_['locale']== locale]
    ax.set_xticklabels(xlabels, rotation =60)
    ax.set_ylabel("")
    ax.set_xlabel("")
# adjustement :
fig.suptitle("Per-pupil total expenditure per locale")
fig.subplots_adjust(hspace = 0.4, wspace = 0.2)
fig.text(0.3, 0.01, 'state', ha='center')
fig.text(0.75, 0.0, 'state', ha='center')
fig.text(0.04, 0.25, 'pp_total_raw', va='center', rotation='vertical')
fig.text(0.04, 0.75, 'pp_total_raw', va='center', rotation='vertical')
plt.show()

# B. Percentage of students identified as Black or Hispanic:

In [None]:
Locale = ['Town', 'City', 'Rural', 'Suburb']

fig, axes = plt.subplots(ncols=2, nrows=2, sharey=True, figsize=(10, 9))


for ax, locale in zip(axes.flat, Locale):
    data = d_infos_[d_infos_['locale']== locale]

    sns.barplot(data= data, x= "state", y= "pct_black/hispanic", ax=ax)
    ax.set_title(locale)
    xlabels = d_infos_['state'][d_infos_['locale']== locale]
    ax.set_xticklabels(xlabels, rotation =60)
    ax.set_ylabel("")
    ax.set_xlabel("")
# adjustement :
fig.suptitle("Percentage of students in the districts identified as Black or Hispanic")
fig.subplots_adjust(hspace = 0.4, wspace = 0.2)
fig.text(0.3, 0.01, 'state', ha='center')
fig.text(0.75, 0.0, 'state', ha='center')
fig.text(0.04, 0.25, 'Percentage B/H (0~1)', va='center', rotation='vertical')
fig.text(0.04, 0.75, 'Percentage B/H (0~1)', va='center', rotation='vertical')
plt.show()

# # C. Percentage of students in the districts eligible for free or reduced-price Lunch:

In [None]:
Locale = ['Town', 'City', 'Rural', 'Suburb']

fig, axes = plt.subplots(ncols=2, nrows=2, sharey=True, figsize=(10, 9))


for ax, locale in zip(axes.flat, Locale):
    data = d_infos_[d_infos_['locale']== locale]

    sns.barplot(data= data, x= "state", y= "pct_free/reduced", ax=ax)
    ax.set_title(locale)
    xlabels = d_infos_['state'][d_infos_['locale']== locale]
    ax.set_xticklabels(xlabels, rotation =60)
    ax.set_ylabel("")
    ax.set_xlabel("")
# adjustement :
fig.suptitle("Percentage of students in the districts eligible for free or reduced-price Lunch.")
fig.subplots_adjust(hspace = 0.4, wspace = 0.2)
fig.text(0.3, 0.01, 'state', ha='center')
fig.text(0.75, 0.0, 'state', ha='center')
fig.text(0.04, 0.25, 'Percentage F/R (0~1)', va='center', rotation='vertical')
fig.text(0.04, 0.75, 'Percentage F/R (0~1)', va='center', rotation='vertical')
plt.show()

# Products Data:

# A. Sectors as target and The Dominant Main Functionality:

In [None]:
plt.figure(1)
sns.swarmplot(x=P_infos_g['Sector(s)'], y=P_infos_g['LP ID'])#, hue=P_infos_g['main_fun'], size= 6)
plt.xticks(rotation = -45)
plt.ylabel("Product's Count")
plt.title("The Target Sectors of the products.")
plt.show()

plt.figure(2)
sns.countplot(x=P_infos_g['main_fun'], data=P_infos_g)
plt.xticks(rotation=90)
plt.title("The Dominant Main Function  of the products.")
plt.show()



# B. Product and Engagement Index:

In [None]:
Correlations = []
for ID in d_infos_["district_id"]:
    df = pd.read_csv('../input/learnplatform-covid19-impact-on-digital-learning/engagement_data/' + str(ID) + '.csv',
                      usecols = ["pct_access","engagement_index"])
    Correlations.append(df.pct_access.corr(df.engagement_index))

${ \large{ \underline{\textbf{Why}}\ Engagement\ index\ and\ not\ Percentage\ of\ access?\\  \underline{\textbf{So}}\ far\ the\ engagement\ index\ is\ little\ correlated\ with\ the\ access\ percentage.}}$

In [None]:
print("The Correlation between The Engagement Index and the Access Percentage is:" ,np.mean(Correlations))    

In [None]:
Prod_vs_engagement()

# Engagement Data (Per Districts and Locales):

In [None]:
Top_Visual()

# Thank you For Reaching to this Point of the Notebook.

In [None]:
np.random.choice(covid_df['state'], 4)
words = ['Learning', 'Covid', 'Pendamic', 'USA', 'LEarning Platforme',
         " ".join(x for x in np.random.choice(covid_df['state'], 4))+" ", 
         " ".join(x for x in np.random.choice(Data_prod['Product Name'], 10))+" "]
words = " ".join(x for x in words)+" "
wordcloud = WordCloud(width = 800, height = 800,
                background_color ='black',
                min_font_size = 9).generate(words)
plt.figure(figsize = (8, 8), facecolor = None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)
 
plt.show() 