In [2]:
import os
import sys

from IPython.display import IFrame
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import numpy as np
import missingno as msno
import seaborn as sns
import matplotlib.patches as patches
import warnings
import networkx as nx
import osmnx as ox
import requests
import matplotlib.cm as cm
import matplotlib.colors as colors

import urllib
import zipfile
import glob

# US COVID-19 Case Data Visualisation
## Procedure 1: US States and Counties GeoDataFrame (EPSG: 2163)

In [69]:
# Read US States Boundary Shapefile
us_states_gdf = gpd.read_file('Data/input/us_state/cb_2018_us_state_5m.shp')

# Converting the CRS to US National Atlas Equal Area Projection (EPSG: 2163)
us_states_gdf.to_crs(epsg=2163, inplace=True)

In [9]:
# Read US Counties Boundary Shapefile
us_counties_gdf = gpd.read_file('Data/input/us_counties/cb_2018_us_county_5m.shp')

# Adding a 'FULLFP' column that concatenates 'STATEFP' and 'COUNTYFP'
us_counties_gdf['FULLFP'] = us_counties_gdf['STATEFP'] + us_counties_gdf['COUNTYFP']

# Converting the CRS to US National Atlas Equal Area Projection (EPSG: 2163)
us_counties_gdf.to_crs(epsg=2163, inplace=True)

In [4]:
# Read US State Name, FIPS, and Postcode csv file 
fips_csv_dir = 'Data/input/us_state_fips/us-state-ansi-fips.csv'
fips_df = pd.read_csv(fips_csv_dir, dtype={' fips':object})

In [5]:
# Rename columns
fips_df.rename(columns=
    {' fips': 'fips',
    ' stusps': 'postcode',},
    inplace=True)

In [10]:
# Attribute Joins - Merging `us_counties_gdf` and `fips_df` on `'STATEFP'` and `'fips'`
merged_df = pd.merge(
    left=us_counties_gdf, 
    right=fips_df, 
    how='left', 
    left_on='STATEFP', 
    right_on='fips'
)

In [11]:
# Remove (drop) 'fips' column
merged_df = merged_df.drop(['fips'], axis=1)

In [63]:
# Make us_gdf a GeoDataFrame of merged_df
us_counties_gdf = gpd.GeoDataFrame(merged_df,
                                   crs={'init': 'epsg:2163'},
                                   geometry=merged_df['geometry'])

  return _prepare_from_string(" ".join(pjargs))


## Procedure 2: US COVID-19 Cases/Deaths/Testing DataFrame
### [New York Times US Counties Data](https://github.com/nytimes/covid-19-data)

In [125]:
# Read NYTimes COVID-19 US Counties Data
nytimes_counties_df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv', 
                                  dtype={'fips':object}
                                 )

In [126]:
# Changing the 'date' dtype from object to datetime
nytimes_counties_df['date']=pd.to_datetime(nytimes_counties_df['date'])

In [127]:
nytimes_counties_df.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061,1,0
1,2020-01-22,Snohomish,Washington,53061,1,0
2,2020-01-23,Snohomish,Washington,53061,1,0
3,2020-01-24,Cook,Illinois,17031,1,0
4,2020-01-24,Snohomish,Washington,53061,1,0


In [133]:
# nytimes_counties_plot_df = nytimes_counties_df
# nytimes_counties_plot_df.set_index('date', inplace=True)

### [COVID Tracking Project Latest US & States Current Data](https://covidtracking.com/api)

In [15]:
# Read COVID Tracking Project US COVID-19 Current Data
covtrack_us_now_df = pd.read_json('https://covidtracking.com/api/v1/us/current.json')

In [16]:
# Read COVID Tracking Project US States COVID-19 Current Data
covtrack_states_now_df = pd.read_json('https://covidtracking.com/api/v1/states/current.json')

### [COVID Tracking Project US Accumulative Data](https://covidtracking.com/api)

In [17]:
# Read COVID Tracking Project US COVID-19 Accumulative Data
covtrack_us_hist_df = pd.read_json('https://covidtracking.com/api/v1/us/daily.json', dtype={'fips':object})

In [20]:
# Creating a new column, 'datetime' that converts 'date' from dtype: int to the Dtype: str of datetime
covtrack_us_hist_df['datetime']=pd.to_datetime(covtrack_us_hist_df['date'].astype(str), format='%Y-%m-%d')

# Converting the Dtype to datetime
covtrack_us_hist_df['datetime']=pd.to_datetime(covtrack_us_hist_df['datetime'])

In [29]:
# # Creating a DataFrame for plotting charts (Set 'datetime' as the index)
# covtrack_us_hist_plot_df = covtrack_us_hist_df[['datetime', 'positive', 'death', 'total']]
# covtrack_us_hist_plot_df.set_index('datetime', inplace=True)

### [COVID Tracking Project States Accumulative Data](https://covidtracking.com/api)

In [21]:
# Read COVID Tracking Project US States COVID-19 Accumulative Data
covtrack_states_hist_df = pd.read_json('https://covidtracking.com/api/v1/states/daily.json', dtype={'fips':object})

In [22]:
# Creating a new column, 'datetime' that converts 'date' from dtype: int to the Dtype: str of datetime
covtrack_states_hist_df['datetime']=pd.to_datetime(covtrack_states_hist_df['date'].astype(str), format='%Y-%m-%d')

# Converting the Dtype to datetime
covtrack_states_hist_df['datetime']=pd.to_datetime(covtrack_states_hist_df['datetime'])

In [78]:
# # Creating a DataFrame for plotting charts (Set 'datetime' as the index)
# covtrack_states_hist_plot_df = covtrack_states_hist_df[['datetime', 'state', 'fips', 'positive', 'death', 'total']]
# covtrack_states_hist_plot_df.set_index('datetime', inplace=True)

## Procedure 3: Merge US COVID-19 DataFrame with GeoDataFrame 
### `nytimes_counties_df`

In [114]:
# Merge NYTimes US Counties COVID-19 Data with us_counties_gdf
us_cov19_counties_df = pd.merge(
    nytimes_counties_df,
    us_counties_gdf[['FULLFP', 'STATEFP', 'COUNTYFP', 'geometry', 'postcode']],
    left_on='fips',
    right_on='FULLFP',
    how='left',
)

In [115]:
us_cov19_counties_gdf = gpd.GeoDataFrame(us_cov19_counties_df, 
                                         crs={'init': 'epsg:2163'},
                                         geometry='geometry')

  return _prepare_from_string(" ".join(pjargs))


In [116]:
us_cov19_counties_gdf = us_cov19_counties_gdf.drop(['FULLFP'], axis=1)

In [135]:
us_cov19_counties_gdf.head()

Unnamed: 0,county,state,fips,cases,deaths,STATEFP,COUNTYFP,geometry,postcode
0,Snohomish,Washington,53061,1,0,53,61,"MULTIPOLYGON (((-1634037.628 566970.701, -1633...",WA
1,Snohomish,Washington,53061,1,0,53,61,"MULTIPOLYGON (((-1634037.628 566970.701, -1633...",WA
2,Snohomish,Washington,53061,1,0,53,61,"MULTIPOLYGON (((-1634037.628 566970.701, -1633...",WA
3,Cook,Illinois,17031,1,0,17,31,"POLYGON ((965008.651 -256879.590, 966661.997 -...",IL
4,Snohomish,Washington,53061,1,0,53,61,"MULTIPOLYGON (((-1634037.628 566970.701, -1633...",WA


In [134]:
# us_cov19_counties_plot_gdf = us_cov19_counties_gdf['date', 'county', 'state', 'postcode', 'fips', 'STATEFP', 'COUNTYFP', 'cases', 'deaths', 'geometry']
# us_cov19_counties_plot_gdf.set_index('date', inplace=True)

### `covtrack_states_hist_df`

In [87]:
# Merge COVID Tracking Project US States COVID-19 Accumulative Data with us_states_gdf
us_cov19_states_df = pd.merge(
    covtrack_states_hist_df,
    us_states_gdf[['STATEFP', 'NAME', 'geometry']],
    left_on='fips',
    right_on='STATEFP',
    how='left',
)

In [88]:
us_cov19_states_gdf = gpd.GeoDataFrame(us_cov19_states_df, 
                                       crs={'init': 'epsg:2163'},
                                       geometry='geometry')

  return _prepare_from_string(" ".join(pjargs))


In [89]:
us_cov19_states_gdf = us_cov19_states_gdf.drop(['STATEFP'], axis=1)

In [96]:
# # Creating a DataFrame for plotting charts (Set 'datetime' as the index)
# us_cov19_states_plot_gdf = us_cov19_states_gdf[['datetime', 'date', 'NAME', 'state', 'fips', 'positive', 'death', 'total', 'geometry']]
# us_cov19_states_plot_gdf.set_index('datetime', inplace=True)

In [97]:
us_cov19_states_plot_gdf.head()

Unnamed: 0_level_0,date,NAME,state,fips,positive,death,total,geometry
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-05-03,20200503,Alaska,AK,2,368.0,9.0,21578.0,"MULTIPOLYGON (((-4288629.978 3426273.416, -428..."
2020-05-03,20200503,Alabama,AL,1,7725.0,290.0,92500.0,"MULTIPOLYGON (((1150023.442 -1526367.945, 1150..."
2020-05-03,20200503,Arkansas,AR,5,3431.0,76.0,52890.0,"POLYGON ((482000.986 -928930.836, 487242.080 -..."
2020-05-03,20200503,American Samoa,AS,60,0.0,0.0,57.0,"MULTIPOLYGON (((-7800567.112 -3751033.523, -78..."
2020-05-03,20200503,Arizona,AZ,4,8640.0,362.0,81119.0,"POLYGON ((-1386135.563 -1256471.688, -1386636...."


# South Korea COVID-19 Case Data Visualisation
## Procedure 1: South Korea Provinces and Special/Metropolitan Cities GeoDataFrame (EPSG: 4326)

In [23]:
# Read in Natural Earth Large Scale Cultural Data (Admin 1 - States and Provinces) Boundary Shapefile
ne_admin_1_states_gdf = gpd.read_file('Data/input/ne_admin_1_states_provinces/ne_10m_admin_1_states_provinces.shp')

In [24]:
# Create a GeoDataFrame for South Korea Provinces & Special/Metropolitan Cities ONLY
south_korea_gdf=ne_admin_1_states_gdf[ne_admin_1_states_gdf['admin']=='South Korea']

In [25]:
# Keep the only columns that needed
south_korea_gdf = south_korea_gdf[['geonunit', 'gu_a3', 'gn_name', 'name_de', 'fips', 'latitude', 'longitude', 'geometry']]

## Procedure 2: South Korea COVID-19 Cases/Deaths/Testing DataFrame
### [South Korea Accumulative Data](https://github.com/katkim0307/COVID-19_Response_Comparison/blob/master/Data/input/covid_19_south_korea_full_xls.xlsx)

In [26]:
# Read 
kor_cov19_df = pd.read_excel('Data/input/covid_19_south_korea_full_xls.xlsx',
                            sheet_name='covid_19_daily_country')

kor_cov19_df.fillna(0, inplace=True)

In [31]:
# Creating a DataFrame for plotting charts (Set 'Date' as the index)
kor_cov19_plot_df = kor_cov19_df[['Date', 'Confirm_New', 'Confirm_Tot', 
                                'Death_New', 'Death_Tot', 
                                'Test_New', 'Test_Tot', 'Test_Curr']]
kor_cov19_plot_df.set_index('Date', inplace=True)

### [South Korea Provinces and Special/Metropolitan Cities Data](https://github.com/katkim0307/COVID-19_Response_Comparison/blob/master/Data/input/covid_19_south_korea_full_xls.xlsx)

In [27]:
# Read
kor_cov19_province_df = pd.read_excel('Data/input/covid_19_south_korea_full_xls.xlsx',
                                  sheet_name='covid_19_daily_province')


In [32]:
# Creating a DataFrame for plotting charts (Set 'Date' as the index)
kor_cov19_province_plot_df = kor_cov19_province_df[['Date', 'Province', 'Confirm_New', 'Confirm_Tot', 
                                                 'Death_New', 'Death_Tot', 
                                                 'Test_New', 'Test_Tot', 'Test_Curr']]
kor_cov19_province_plot_df.set_index('Date', inplace=True)

## Procedure 3: Merge South Korea COVID-19 DataFrame with GeoDataFrame 