In [16]:
import pandas as pd
import geopandas as gpd
import joblib
import json
import folium
import urllib
import os

In [17]:
DAILY_DATA_DIR = '/Users/Berto/Projects/COVID-19_Tracker/COVID-19/csse_covid_19_data/csse_covid_19_daily_reports'
TIME_DATA_DIR = '/Users/Berto/Projects/COVID-19_Tracker/COVID-19/csse_covid_19_data/csse_covid_19_time_series'

CONFIRMED = os.path.join(TIME_DATA_DIR, "time_series_19-covid-Confirmed.csv")
DEATHS = os.path.join(TIME_DATA_DIR, "time_series_19-covid-Deaths.csv")
RECOVERED = os.path.join(TIME_DATA_DIR, "time_series_19-covid-Recovered.csv")

In [18]:
date_list = os.listdir(DAILY_DATA_DIR)
date_list.remove('.gitignore')
date_list.remove('README.md')
date_list.sort()

In [19]:
confirmed_df = pd.read_csv(CONFIRMED)
deaths_df = pd.read_csv(DEATHS)
recovered_df = pd.read_csv(RECOVERED)

In [20]:
confirmed_df = confirmed_df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'],
                                 var_name='date', 
                                 value_name='confirmed')
deaths_df = deaths_df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'],
                           var_name='date', 
                           value_name='deaths')
recovered_df = recovered_df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'],
                                 var_name='date', 
                                 value_name='recovered')

In [21]:
df = pd.merge(confirmed_df, deaths_df, 
               on=['Province/State', 'Country/Region', 'Lat', 'Long', 'date'],
               how='inner')
df = pd.merge(df, recovered_df, 
               on=['Province/State', 'Country/Region', 'Lat', 'Long', 'date'],
               how='inner')

In [22]:
# 'US' is not a standard country code.  
df['Country/Region'] = df['Country/Region'].replace(to_replace='US', value='United States')

In [23]:
df.to_csv('covid-19.csv')

## Add ISO2 and ISO3 country codes so that Folium can plot

In [24]:
link = "http://country.io/names.json"
f = urllib.request.urlopen(link)

country_json = f.read().decode("utf-8")
country_ISO2 = json.loads(country_json)
country_ISO2_df = pd.DataFrame(country_ISO2.items(), columns=['ISO2 Code','Country/Region'])

df = pd.merge(df, country_ISO2_df, on='Country/Region', how='inner')
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,date,confirmed,deaths,recovered,ISO2 Code
0,,Thailand,15.0,101.0,1/22/20,2,0,0,TH
1,,Thailand,15.0,101.0,1/23/20,3,0,0,TH
2,,Thailand,15.0,101.0,1/24/20,5,0,0,TH
3,,Thailand,15.0,101.0,1/25/20,7,0,0,TH
4,,Thailand,15.0,101.0,1/26/20,8,0,2,TH


In [25]:
link = "http://country.io/iso3.json"
f = urllib.request.urlopen(link)

country_json = f.read().decode("utf-8")
country_ISO3 = json.loads(country_json)
country_ISO3_df = pd.DataFrame(country_ISO3.items(), columns=['ISO2 Code','ISO3 Code'])

df = pd.merge(df, country_ISO3_df, on='ISO2 Code', how='inner')
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,date,confirmed,deaths,recovered,ISO2 Code,ISO3 Code
0,,Thailand,15.0,101.0,1/22/20,2,0,0,TH,THA
1,,Thailand,15.0,101.0,1/23/20,3,0,0,TH,THA
2,,Thailand,15.0,101.0,1/24/20,5,0,0,TH,THA
3,,Thailand,15.0,101.0,1/25/20,7,0,0,TH,THA
4,,Thailand,15.0,101.0,1/26/20,8,0,2,TH,THA


# Choropleth Map

## GeoJsons

In [26]:
url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data'

# country_geo = f'https://github.com/datasets/geo-countries/tree/master/data/countries.geojson'
country_geo = f'{url}/world-countries.json'
state_geo = f'{url}/us-states.json'

In [31]:
f = urllib.request.urlopen(country_geo)
country_json = f.read().decode("utf-8")

country_codes = json.loads(country_json)
country_codes_df = pd.DataFrame(country_codes.items(), columns=['Country Code','Country/Region'])

In [38]:
df['date'] = pd.to_datetime(df['date'], format="%m/%d/%y")
df['date'].max()

Timestamp('2020-03-14 00:00:00')

In [29]:
m = folium.Map(location=[48, -102], 
               tiles=
               zoom_start=4,
               min_zoom=4)

folium.Choropleth(
    geo_data=country_geo,
    name='choropleth',
    data=df,
    columns=['ISO3 Code', 'confirmed'],
    key_on='feature.id',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Confirmed Cases'
).add_to(m)

folium.LayerControl().add_to(m)

m

In [44]:
m = folium.Map(location=[48, -102], 
               tiles="OpenStreetMap",
               zoom_start=4,
               min_zoom=2)

folium.Choropleth(
    geo_data=state_geo,
    name='choropleth',
    data=df.loc[df['Country/Region'] == 'United States'],
    columns=['Province/State', 'confirmed'],
    key_on='feature.id',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Confirmed Cases'
).add_to(m)

folium.LayerControl().add_to(m)

m

In [49]:
df.loc[df['Country/Region'] == 'United States'].head(40)

Unnamed: 0,Province/State,Country/Region,Lat,Long,date,confirmed,deaths,recovered,ISO2 Code,ISO3 Code
5512,Washington,United States,47.4009,-121.4905,2020-01-22,0,0,0,US,USA
5513,New York,United States,42.1657,-74.9481,2020-01-22,0,0,0,US,USA
5514,California,United States,36.1162,-119.6816,2020-01-22,0,0,0,US,USA
5515,Massachusetts,United States,42.2302,-71.5301,2020-01-22,0,0,0,US,USA
5516,Diamond Princess,United States,35.4437,139.638,2020-01-22,0,0,0,US,USA
5517,Grand Princess,United States,37.6489,-122.6655,2020-01-22,0,0,0,US,USA
5518,Georgia,United States,33.0406,-83.6431,2020-01-22,0,0,0,US,USA
5519,Colorado,United States,39.0598,-105.3111,2020-01-22,0,0,0,US,USA
5520,Florida,United States,27.7663,-81.6868,2020-01-22,0,0,0,US,USA
5521,New Jersey,United States,40.2989,-74.521,2020-01-22,0,0,0,US,USA
