# COVID-19 LearnPlatform Analysis
![](http://images.pexels.com/photos/4260325/pexels-photo-4260325.jpeg?cs=srgb&dl=pexels-august-de-richelieu-4260325.jpg&fm=jpg)

**Import Libraries**

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import glob # read all files in engagement data
import seaborn as sns # visualization
import matplotlib.pyplot as plt # plotting
import geopandas # plotting maps
from geopy.geocoders import Nominatim
import folium as fl
from folium.plugins import HeatMap


print('Setup complete')

**Import product information data**

Includes data about platforms

In [None]:
products_df = pd.read_csv("../input/learnplatform-covid19-impact-on-digital-learning/products_info.csv")
products_df.head(10)

**Import districts information data**

Includes data about platforms

In [None]:
districts_df = pd.read_csv("../input/learnplatform-covid19-impact-on-digital-learning/districts_info.csv")
districts_df.head(10)

**Import engagement data**


In [None]:
path = '../input/learnplatform-covid19-impact-on-digital-learning/engagement_data' 
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    district_id = filename.split("/")[4].split(".")[0]
    df["district_id"] = district_id
    li.append(df)
    
engagement_df = pd.concat(li)
engagement_df = engagement_df.reset_index(drop=True)
pd.to_datetime(engagement_df['time'], format='%Y%m%d', errors='ignore')
engagement_df.head()

**State distribution**

In [None]:
plt.figure(figsize=(16,10))
sns.set_theme(font="Serif")
sns.countplot(y='state', data=districts_df, order=districts_df.state.value_counts().index, palette="viridis")
plt.title("State distribution", size=20)
plt.show()

**Distribution**

geoplotlib
https://observablehq.com/@d3/zoom-to-bounding-box
https://observablehq.com/@d3/donut-chart

In [None]:
districts_df["locale"] = districts_df["locale"].fillna('Not specified')
plt.figure(figsize=(16,10))
sns.set_theme(font="Serif")
sns.countplot(y='locale', data=districts_df, order=districts_df.locale.value_counts().index, palette="viridis")
plt.title("Location distribution", size=20)
plt.show()

GEOPLOT

In [None]:
states = geopandas.read_file('../input/usa-shapefile-census-2018/cb_2018_us_state_500k.shp')
type(states)

states.head()







In [None]:
locations = pd.DataFrame({"Name":districts_df['state'].unique()})

In [None]:
geol = Nominatim(user_agent="app")
lat = []
lon = []

for location in locations['Name']:
    location = geol.geocode(location)    
    if location is None:
        lat.append(np.nan)
        lon.append(np.nan)
    else:
        lat.append(location.latitude)
        lon.append(location.longitude)

In [None]:
locations['lat'] = lat
locations['lon'] = lon

In [None]:
Rest_locations=pd.DataFrame(districts_df['state'].value_counts().reset_index())

Rest_locations.columns=['Name','count']
final_loc=Rest_locations.merge(locations,on='Name',how="left").dropna()

def generateBaseMap(default_location=[37.0902, -95.7129], default_zoom_start=4.5):
    base_map = fl.Map(location=default_location, zoom_start=default_zoom_start)
    return base_map

basemap=generateBaseMap()

HeatMap(final_loc[['lat','lon','count']],zoom=20,radius=20).add_to(basemap)

basemap
