In [90]:
import pandas as pd
import numpy as np
try: 
    import ipyleaflet
    from plotly.offline import plot, iplot, init_notebook_mode
    import plotly.graph_objs as go
    import cufflinks as cf
    import geocoder
    import folium
    from folium import plugins
    import geopandas as gpd
except: 
    !pip install geocoder
    !pip install ipyleaflet
    !pip install plotly
    !pip install cufflinks
    !pip install folium
    !pip install geopandas
import os
proj_dir = os.environ['HOME'] + '/dev/Projects/COVID/'
out_dir = os.environ['HOME'] + '/dev/Projects/COVID/Output/'
data_dir = os.environ['HOME'] + '/dev/Projects/COVID/Data/'
data_dir
init_notebook_mode(connected=True)
cf.go_offline
cf.set_config_file(theme='henanigans')

In [91]:
state = 'New York'
top_x = 5
scale = 10000
df_counties = pd.read_csv(data_dir+'covid-19-data/us-counties.csv')
df_counties = df_counties.replace('New York City','New York')
df_counties_populations = pd.read_csv(data_dir+'co-est2019-alldata.csv',encoding="ISO-8859-1",dtype={'STATE': str,'COUNTY':str})[['STATE','COUNTY','STNAME','CTYNAME','POPESTIMATE2019']]
df_counties_populations = df_counties_populations[(df_counties_populations['COUNTY']!=0) & (df_counties_populations['STNAME'] != df_counties_populations['CTYNAME'])]
df_counties_points = gpd.read_file(data_dir+'UScounties.shp')
df_counties_points['centroid'] = df_counties_points['geometry'].centroid
df_counties_points['latitude'] = df_counties_points['centroid'].apply(lambda p: p.y)
df_counties_points['longitude'] = df_counties_points['centroid'].apply(lambda p: p.x)
df_counties_points.head()
df_counties_populations = df_counties_populations.merge(df_counties_points,left_on=['STNAME','COUNTY'],right_on=['STATE_NAME','CNTY_FIPS'])
df_counties_populations['CTYNAME'] = df_counties_populations['CTYNAME'].str.replace(' County','')

df_counties = df_counties.merge(df_counties_populations,left_on=['county','state'],right_on=['CTYNAME','STNAME'])
df_counties = df_counties.drop(['STATE','COUNTY','STNAME','CTYNAME'],axis=1)
df_counties['norm_cases'] = df_counties['cases']/(df_counties['POPESTIMATE2019']/scale)

df_counties_pivot = df_counties[df_counties['state']==state]
df_counties_pivot = df_counties_pivot[df_counties_pivot['county']!='Unknown']
df_counties_raw = df_counties_pivot.pivot(index='date',columns='county',values='cases')
df_counties_norm = df_counties_pivot.pivot(index='date',columns='county',values='norm_cases')
df_counties_chg = df_counties_raw.pct_change()*100
# print(df_counties.head())
df_counties_raw.max().sort_values().tail(5).index
counties_raw=df_counties_raw.max().sort_values().tail(5).index
counties_norm=df_counties_norm.max().sort_values().tail(5).index
fig = df_counties_raw[counties_raw].iplot(asFigure=True,kind='scatter',title='COVID Cases by County in '+state,xTitle='Date',yTitle='# of Cases')
fig2 = df_counties_chg[counties_raw].iplot(asFigure=True,kind='bar',title='COVID Percent Change of Top '+ str(top_x)+ ' Counties in '+state,xTitle='Date',yTitle='Daily % Change of Cases')
fig3 = df_counties_norm[counties_norm].iplot(asFigure=True,kind='scatter',title='COVID Cases by County in ' +state+ ' per '+str(scale) + ' people',xTitle='Date',yTitle='# of Cases per '+str(scale)+ ' people')

plot(fig,filename=out_dir+'county.html')
plot(fig3,filename=out_dir+'county_norm.html')
plot(fig2,filename=out_dir+'county_percent_chg.html')

iplot(fig)
iplot(fig3)
iplot(fig2)

In [92]:
scale = 1000000
states= ['Hawaii','New York','California','Connecticut', 'Washington']
df_populations = pd.read_csv(data_dir+'co-est2019-alldata.csv',encoding="ISO-8859-1",dtype={'STATE': str,'COUNTY':str})[['STATE','COUNTY','STNAME','CTYNAME','POPESTIMATE2019']]
df_states_populations = df_populations[(df_populations['COUNTY']=='000') & (df_populations['STNAME'] == df_populations['CTYNAME'])]
df_states = pd.read_csv(data_dir+'covid-19-data/us-states.csv')
df_states = df_states.merge(df_states_populations,left_on=['state'],right_on=['STNAME'])
df_states['norm_cases'] = df_states['cases']/(df_states['POPESTIMATE2019']/scale)
df_states_pivot = df_states.pivot(index='date',columns='state',values='cases')
df_states_chg = df_states_pivot.pct_change()*100
df_states_norm = df_states.pivot(index='date',columns='state',values='norm_cases')
fig = df_states_pivot[states].iplot(asFigure=True,kind='scatter',title='COVID Cases By State',xTitle='Date',yTitle='# of Cases')
fig2 = df_states_norm[states].iplot(asFigure=True,kind='scatter',title='COVID Cases in each state per '+str(scale) + ' people',xTitle='Date',yTitle='# of Cases/1000000')
fig3 = df_states_chg[states].iplot(asFigure=True,kind='bar',title='COVID Percent Change by State',xTitle='Date',yTitle='Daily % Change of Cases')


plot(fig,filename=out_dir+'state.html')
plot(fig3,filename=out_dir+'state_norm.html')
plot(fig2,filename=out_dir+'state_percent_chg.html')

iplot(fig)
iplot(fig2)
iplot(fig3)

In [93]:
df_counties = df_counties[df_counties['county']!='Unknown']
df_counties_all_pivot = pd.pivot_table(df_counties,index='date',columns=['county','state','latitude','longitude'],values='cases')
df_counties_diff = df_counties_all_pivot.diff()
df_counties_diff_stacked = df_counties_diff.reset_index().melt(id_vars='date')
df_counties_diff_stacked['value'] = df_counties_diff_stacked['value'].replace(0,np.nan)
df_counties_diff_stacked.head()
points = []
for idx,row in df_counties_diff_stacked.iterrows():
    d = {
        'time':row['date'],
        'popup':str(row['value']) + ' new cases in ' + str(row['county'])+ ', ' + str(row['state']),
        'coordinates':[row['longitude'],row['latitude']],
        'value': row['value']/2
    }
    if d['value']>50:
        d['value']=50
#     print(d['time'])
    points.append(d)
features = [
    {
        'type':'Feature',
        'geometry': {
            'type':'Point',
            'coordinates':point['coordinates']
        },
        'properties': {
            'time': point['time'],
            'popup': point['popup'],
            'id':'cases',
            'icon':'circle',
            'iconstyle': {
                'fillColor':'red',
                'fillOpacity':0.6,
                'stroke':'false',
                'radius': point['value']
            }
        } 
    } for point in points
]
# print(features)

m = folium.Map(
    location=[39.8282,-98.5795],
    tiles = 'cartodbpositron',
    titles='COVID-19 Cases Over Time In US',
    zoom_start = 4
)

plugins.TimestampedGeoJson(
    {
        'type': 'FeatureCollection',
        'features': features
    },
    period='P1D',
    add_last_point=False,
    auto_play=False,
    loop=False,
    max_speed=1,
    loop_button=True,
    date_options='YYYY/MM/DD',
    transition_time=200,
    time_slider_drag_update=True,
    duration='PT1H'
).add_to(m)

<folium.plugins.timestamped_geo_json.TimestampedGeoJson at 0x7f03c4be9358>

In [94]:
df_counties = df_counties[df_counties['county']!='Unknown']
df_counties_all_deaths_pivot = pd.pivot_table(df_counties,index='date',columns=['county','state','latitude','longitude'],values='deaths')
df_counties_all_deaths_pivot
df_counties_deaths_diff = df_counties_all_deaths_pivot.diff()
df_counties_deaths_diff_stacked = df_counties_deaths_diff.reset_index().melt(id_vars='date')
df_counties_deaths_diff_stacked['value'] = df_counties_deaths_diff_stacked['value'].replace(0,np.nan)
df_counties_deaths_diff_stacked.head()
points = []
for idx,row in df_counties_deaths_diff_stacked.iterrows():
    d = {
        'time':row['date'],
        'popup':str(row['value']) + ' new deaths in ' + str(row['county'])+ ', ' + str(row['state']),
        'coordinates':[row['longitude'],row['latitude']],
        'value': row['value']
    }
    if d['value']>50:
        d['value']=50
#     print(d['time'])
    points.append(d)
features = [
    {
        'type':'Feature',
        'geometry': {
            'type':'Point',
            'coordinates':point['coordinates']
        },
        'properties': {
            'time': point['time'],
            'popup': point['popup'],
            'id':'deaths',
            'icon':'circle',
            'iconstyle': {
                'fillColor':'purple',
                'fillOpacity':0.6,
                'stroke':'false',
                'radius': point['value']
            }
        } 
    } for point in points
]

plugins.TimestampedGeoJson(
    {
        'type': 'FeatureCollection',
        'features': features
    },
    period='P1D',
    add_last_point=False,
    auto_play=False,
    loop=False,
    max_speed=1,
    loop_button=True,
    date_options='YYYY/MM/DD',
    transition_time=200,
    time_slider_drag_update=True,
    duration='PT1H'
).add_to(m)



<folium.plugins.timestamped_geo_json.TimestampedGeoJson at 0x7f03c4ce92e8>

In [95]:
from folium import IFrame

text = 'your text here'

iframe = folium.IFrame(text, width=700, height=450)

m.save(out_dir+'covid_daily_map.html')

In [96]:
df_states_points = gpd.read_file(data_dir+'s_11au16.shp')
df_states_points.head()
df_counties.head()

Unnamed: 0,date,county,state,fips,cases,deaths,POPESTIMATE2019,NAME,STATE_NAME,STATE_FIPS,CNTY_FIPS,FIPS,geometry,centroid,latitude,longitude,norm_cases
0,2020-01-21,Snohomish,Washington,53061.0,1,0,822083,Snohomish,Washington,53,61,53061,"POLYGON ((-121.11924 47.77552, -122.39449 47.7...",POINT (-121.69716 48.04257),48.042567,-121.697158,0.012164
1,2020-01-22,Snohomish,Washington,53061.0,1,0,822083,Snohomish,Washington,53,61,53061,"POLYGON ((-121.11924 47.77552, -122.39449 47.7...",POINT (-121.69716 48.04257),48.042567,-121.697158,0.012164
2,2020-01-23,Snohomish,Washington,53061.0,1,0,822083,Snohomish,Washington,53,61,53061,"POLYGON ((-121.11924 47.77552, -122.39449 47.7...",POINT (-121.69716 48.04257),48.042567,-121.697158,0.012164
3,2020-01-24,Snohomish,Washington,53061.0,1,0,822083,Snohomish,Washington,53,61,53061,"POLYGON ((-121.11924 47.77552, -122.39449 47.7...",POINT (-121.69716 48.04257),48.042567,-121.697158,0.012164
4,2020-01-25,Snohomish,Washington,53061.0,1,0,822083,Snohomish,Washington,53,61,53061,"POLYGON ((-121.11924 47.77552, -122.39449 47.7...",POINT (-121.69716 48.04257),48.042567,-121.697158,0.012164
