# COVID-19 Open Research Dataset Challenge (CORD-19)
  
https://www.kaggle.com/allen-institute-for-ai/CORD-19-research-challenge

This notebook is specifically inspired by the following notebook:  
  
https://www.kaggle.com/tarunkr/covid-19-case-study-analysis-viz-comparisons

## Import Libraries and Datasets

In [1]:
# Installs
#!pip install pycountry_convert 
#!pip install folium
#!pip install calmap

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker 
import pycountry_convert as pc
import folium
import branca
from datetime import datetime, timedelta,date
from scipy.interpolate import make_interp_spline, BSpline
import plotly.express as px
import json, requests
import calmap

from keras.layers import Input, Dense, Activation, LeakyReLU, Dropout
from keras import models
from keras.optimizers import RMSprop, Adam

Using TensorFlow backend.


In [3]:
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [4]:
# retrieve data from internet
df_confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
df_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')

# Depricated
# df_recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv')
df_covid19 = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv")
df_table = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_time.csv",parse_dates=['Last_Update'])

In [5]:
df_covid19.head()

Unnamed: 0,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID,ISO3
0,Afghanistan,2020-10-06 16:23:44,33.93911,67.709953,39486.0,1467.0,32977.0,5042.0,101.432527,,,3.715241,4,AFG
1,Albania,2020-10-06 16:23:44,41.1533,20.1683,14568.0,403.0,8965.0,5200.0,506.220029,,,2.766337,8,ALB
2,Algeria,2020-10-06 16:23:44,28.0339,1.6596,52270.0,1768.0,36672.0,13830.0,119.198989,,,3.382437,12,DZA
3,Andorra,2020-10-06 16:23:44,42.5063,1.5218,2370.0,53.0,1615.0,702.0,3067.36556,,,2.236287,20,AND
4,Angola,2020-10-06 16:23:44,-11.2027,17.8739,5530.0,199.0,2591.0,2740.0,16.825762,,,3.598553,24,AGO


In [6]:
df_covid19.shape

(188, 14)

### Preprocessing

Rename column names of the dataframes to a more usable format.

In [7]:
df_confirmed = df_confirmed.rename(columns = {
    "Province/State": "state",
    "Country/Region": "country"})

df_deaths = df_deaths.rename(columns = {
    "Province/State":"state",
    "Country/Region": "country"})


df_covid19 = df_covid19.rename(columns = {
    "Country_Region": "country"})

df_confirmed

Unnamed: 0,state,country,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,9/26/20,9/27/20,9/28/20,9/29/20,9/30/20,10/1/20,10/2/20,10/3/20,10/4/20,10/5/20
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,39192,39227,39233,39254,39268,39285,39290,39297,39341,39422
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,13153,13259,13391,13518,13649,13806,13965,14117,14266,14410
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,50914,51067,51213,51368,51530,51690,51847,51995,52136,52270
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,1836,1836,1966,1966,2050,2050,2110,2110,2110,2370
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,4672,4718,4797,4905,4972,5114,5211,5370,5402,5530
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,38253,38703,39121,39541,39899,40322,40766,41078,41498,41957
262,,Western Sahara,24.215500,-12.885800,0,0,0,0,0,0,...,10,10,10,10,10,10,10,10,10,10
263,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,2030,2030,2031,2031,2034,2039,2040,2041,2041,2041
264,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,14612,14641,14660,14715,14759,14802,14830,14974,15052,15089


In [13]:
# change country name to satisfy pycountry_convert Lib
## USA
df_confirmed.loc[df_confirmed['country'] == "US", "country"] = "USA"
df_deaths.loc[df_deaths['country'] == "US", "country"] == "USA"
df_covid19.loc[df_covid19['country'] == "US", "country"] == "USA"
df_table.loc[df_table['Country_Region'] == "US", "Country_Region"] = "USA"

## South Korea

## Taiwan

## Democratic Republic of the Congo

## Côte d'Ivoire

## Réunion

## Republic of the Congo

## Bahamas

## Gambia

df_confirmed

Unnamed: 0,state,country,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,9/26/20,9/27/20,9/28/20,9/29/20,9/30/20,10/1/20,10/2/20,10/3/20,10/4/20,10/5/20
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,39192,39227,39233,39254,39268,39285,39290,39297,39341,39422
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,13153,13259,13391,13518,13649,13806,13965,14117,14266,14410
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,50914,51067,51213,51368,51530,51690,51847,51995,52136,52270
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,1836,1836,1966,1966,2050,2050,2110,2110,2110,2370
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,4672,4718,4797,4905,4972,5114,5211,5370,5402,5530
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,38253,38703,39121,39541,39899,40322,40766,41078,41498,41957
262,,Western Sahara,24.215500,-12.885800,0,0,0,0,0,0,...,10,10,10,10,10,10,10,10,10,10
263,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,2030,2030,2031,2031,2034,2039,2040,2041,2041,2041
264,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,14612,14641,14660,14715,14759,14802,14830,14974,15052,15089
