In [3]:
import pandas as pd
import numpy as np
from datetime import timedelta
import datetime as dt
np.seterr(divide='ignore', invalid='ignore')
import plotly.express as px
import plotly.graph_objects as go

In [5]:
#Create reference to CSV file
csv_path = "Resources/covid_19_data.csv"

#Impor the CSV into a pandas DataFrame
covid_19 = pd.read_csv(csv_path, parse_dates=["ObservationDate"]) 

In [12]:
covid_19 = covid_19.replace('Mainland China', 'China')

#Rename Columns
covid_19 = covid_19.rename(columns={"ObservationDate": "Date"})

covid_19 = covid_19.loc[:,['Date', 'Province/State', 'Country/Region', 'Confirmed', 'Deaths', 'Recovered']]

covid_19 = covid_19[['Province/State', 'Country/Region', 'Date', 'Confirmed', 'Deaths', 'Recovered']]

covid_19

Unnamed: 0,Province/State,Country/Region,Date,Confirmed,Deaths,Recovered
0,Anhui,China,2020-01-22,1.0,0.0,0.0
1,Beijing,China,2020-01-22,14.0,0.0,0.0
2,Chongqing,China,2020-01-22,6.0,0.0,0.0
3,Fujian,China,2020-01-22,1.0,0.0,0.0
4,Gansu,China,2020-01-22,0.0,0.0,0.0
...,...,...,...,...,...,...
11336,,Uzbekistan,2020-03-23,46.0,0.0,0.0
11337,,Venezuela,2020-03-23,77.0,0.0,15.0
11338,,Vietnam,2020-03-23,123.0,0.0,17.0
11339,,Zambia,2020-03-23,3.0,0.0,0.0


In [13]:
df = covid_19
                
df

Unnamed: 0,Province/State,Country/Region,Date,Confirmed,Deaths,Recovered
0,Anhui,China,2020-01-22,1.0,0.0,0.0
1,Beijing,China,2020-01-22,14.0,0.0,0.0
2,Chongqing,China,2020-01-22,6.0,0.0,0.0
3,Fujian,China,2020-01-22,1.0,0.0,0.0
4,Gansu,China,2020-01-22,0.0,0.0,0.0
...,...,...,...,...,...,...
11336,,Uzbekistan,2020-03-23,46.0,0.0,0.0
11337,,Venezuela,2020-03-23,77.0,0.0,15.0
11338,,Vietnam,2020-03-23,123.0,0.0,17.0
11339,,Zambia,2020-03-23,3.0,0.0,0.0


## Infected Countries Ordered by Date Since 1st Confirmed Case

In [14]:
#Create Dataset that lists all countries
dfww = df.loc[:,['Country/Region', 'Date', 'Confirmed', 'Deaths', 'Recovered']]

#Create DataFrame of days with Confirmed Cases
df1 = dfww[dfww['Confirmed']>0]
df1 = df1.reset_index(drop=True)

#Find start date of infections in YYY-DD-MM format
start_day = df1.groupby(by='Country/Region').agg('min').reset_index(drop=False)
start_day = start_day.loc[:,['Country/Region', 'Date']]

#Find todays date in YYYY-DD-MM format
today = dt.datetime.today()

#Days since first infection date
days_difference = []
days_difference = (today - start_day['Date']).dt.days
days_difference

#Merge on index
start_day = start_day.merge(days_difference, left_index=True, right_index=True)
start_day = start_day.rename(columns={'Country/Region': 'Country/Region', 
                                      'Date_x': 'Start Date', 
                                      'Date_y': 'Days Since 1st Case'})

start_day['Weeks Since 1st Case'] = round(start_day['Days Since 1st Case'] / 7, 2)

start_day.sort_values(by='Days Since 1st Case', ascending=False)\
.reset_index(drop=True).style.background_gradient(cmap='Reds')
#start_day

Unnamed: 0,Country/Region,Start Date,Days Since 1st Case,Weeks Since 1st Case
0,Japan,2020-01-22 00:00:00,62,8.86
1,Macau,2020-01-22 00:00:00,62,8.86
2,South Korea,2020-01-22 00:00:00,62,8.86
3,China,2020-01-22 00:00:00,62,8.86
4,US,2020-01-22 00:00:00,62,8.86
5,Taiwan,2020-01-22 00:00:00,62,8.86
6,Thailand,2020-01-22 00:00:00,62,8.86
7,Vietnam,2020-01-23 00:00:00,61,8.71
8,Hong Kong,2020-01-23 00:00:00,61,8.71
9,Singapore,2020-01-23 00:00:00,61,8.71


## Confirmed Cases desc. by Country

In [15]:
#Merge datasets together so age metrics follow global analysis
dfww = dfww.groupby(by='Country/Region').agg('max').reset_index(drop=False)

dfww = pd.merge(dfww, start_day, on='Country/Region')

In [16]:
#Add Column for "Active" = 'Confirmed' - 'Deaths' - 'Recovered'

active = dfww['Confirmed'] - dfww['Deaths'] - dfww['Recovered']

dfww['Active'] = active

dfww = dfww[['Country/Region', 'Date', 
             'Confirmed', 'Deaths', 
             'Recovered', 'Active',
             'Start Date', 'Days Since 1st Case', 
             'Weeks Since 1st Case'
            ]]

dfww

Unnamed: 0,Country/Region,Date,Confirmed,Deaths,Recovered,Active,Start Date,Days Since 1st Case,Weeks Since 1st Case
0,Azerbaijan,2020-02-28,1.0,0.0,0.0,1.0,2020-02-28,25,3.57
1,"('St. Martin',)",2020-03-10,2.0,0.0,0.0,2.0,2020-03-10,14,2.00
2,Afghanistan,2020-03-23,40.0,1.0,1.0,38.0,2020-02-24,29,4.14
3,Albania,2020-03-23,104.0,4.0,2.0,98.0,2020-03-09,15,2.14
4,Algeria,2020-03-23,230.0,17.0,65.0,148.0,2020-02-25,28,4.00
...,...,...,...,...,...,...,...,...,...
198,Venezuela,2020-03-23,77.0,0.0,15.0,62.0,2020-03-14,10,1.43
199,Vietnam,2020-03-23,123.0,0.0,17.0,106.0,2020-01-23,61,8.71
200,Zambia,2020-03-23,3.0,0.0,0.0,3.0,2020-03-18,6,0.86
201,Zimbabwe,2020-03-23,3.0,1.0,0.0,2.0,2020-03-20,4,0.57


## Confirmed Cases by Country

In [17]:
dfww_confirmed = dfww.sort_values(by='Confirmed', ascending=False).reset_index(drop=True)

dfww_confirmed.style.background_gradient(cmap='YlOrRd')

Unnamed: 0,Country/Region,Date,Confirmed,Deaths,Recovered,Active,Start Date,Days Since 1st Case,Weeks Since 1st Case
0,China,2020-03-23 00:00:00,67800.0,3153.0,59882.0,4765.0,2020-01-22 00:00:00,62,8.86
1,Italy,2020-03-23 00:00:00,63927.0,6077.0,7432.0,50418.0,2020-01-31 00:00:00,53,7.57
2,Spain,2020-03-23 00:00:00,35136.0,2311.0,3355.0,29470.0,2020-02-01 00:00:00,52,7.43
3,Germany,2020-03-23 00:00:00,29056.0,123.0,453.0,28480.0,2020-01-28 00:00:00,56,8.0
4,Iran,2020-03-23 00:00:00,23049.0,1812.0,8376.0,12861.0,2020-02-19 00:00:00,34,4.86
5,France,2020-03-23 00:00:00,19874.0,860.0,2200.0,16814.0,2020-01-24 00:00:00,60,8.57
6,US,2020-03-23 00:00:00,15793.0,117.0,178.0,15498.0,2020-01-22 00:00:00,62,8.86
7,South Korea,2020-03-23 00:00:00,8961.0,111.0,3166.0,5684.0,2020-01-22 00:00:00,62,8.86
8,Switzerland,2020-03-23 00:00:00,8795.0,120.0,131.0,8544.0,2020-02-25 00:00:00,28,4.0
9,UK,2020-03-23 00:00:00,6650.0,335.0,135.0,6180.0,2020-01-31 00:00:00,53,7.57


## Confirmed Deaths by Country

In [18]:
dfww_deaths = dfww.sort_values(by='Deaths', ascending=False).reset_index(drop=True)

dfww_deaths.style.background_gradient(cmap='Reds')

Unnamed: 0,Country/Region,Date,Confirmed,Deaths,Recovered,Active,Start Date,Days Since 1st Case,Weeks Since 1st Case
0,Italy,2020-03-23 00:00:00,63927.0,6077.0,7432.0,50418.0,2020-01-31 00:00:00,53,7.57
1,China,2020-03-23 00:00:00,67800.0,3153.0,59882.0,4765.0,2020-01-22 00:00:00,62,8.86
2,Spain,2020-03-23 00:00:00,35136.0,2311.0,3355.0,29470.0,2020-02-01 00:00:00,52,7.43
3,Iran,2020-03-23 00:00:00,23049.0,1812.0,8376.0,12861.0,2020-02-19 00:00:00,34,4.86
4,France,2020-03-23 00:00:00,19874.0,860.0,2200.0,16814.0,2020-01-24 00:00:00,60,8.57
5,UK,2020-03-23 00:00:00,6650.0,335.0,135.0,6180.0,2020-01-31 00:00:00,53,7.57
6,Netherlands,2020-03-23 00:00:00,4749.0,213.0,2.0,4534.0,2020-02-27 00:00:00,26,3.71
7,Germany,2020-03-23 00:00:00,29056.0,123.0,453.0,28480.0,2020-01-28 00:00:00,56,8.0
8,Switzerland,2020-03-23 00:00:00,8795.0,120.0,131.0,8544.0,2020-02-25 00:00:00,28,4.0
9,US,2020-03-23 00:00:00,15793.0,117.0,178.0,15498.0,2020-01-22 00:00:00,62,8.86


## Recovered Cases by Country

In [19]:
dfww_recovered = dfww.sort_values(by='Recovered', ascending=False).reset_index(drop=True)

dfww_recovered.style.background_gradient(cmap='Greens')

Unnamed: 0,Country/Region,Date,Confirmed,Deaths,Recovered,Active,Start Date,Days Since 1st Case,Weeks Since 1st Case
0,China,2020-03-23 00:00:00,67800.0,3153.0,59882.0,4765.0,2020-01-22 00:00:00,62,8.86
1,Iran,2020-03-23 00:00:00,23049.0,1812.0,8376.0,12861.0,2020-02-19 00:00:00,34,4.86
2,Italy,2020-03-23 00:00:00,63927.0,6077.0,7432.0,50418.0,2020-01-31 00:00:00,53,7.57
3,Spain,2020-03-23 00:00:00,35136.0,2311.0,3355.0,29470.0,2020-02-01 00:00:00,52,7.43
4,South Korea,2020-03-23 00:00:00,8961.0,111.0,3166.0,5684.0,2020-01-22 00:00:00,62,8.86
5,France,2020-03-23 00:00:00,19874.0,860.0,2200.0,16814.0,2020-01-24 00:00:00,60,8.57
6,Others,2020-03-23 00:00:00,712.0,8.0,567.0,137.0,2020-02-07 00:00:00,46,6.57
7,Germany,2020-03-23 00:00:00,29056.0,123.0,453.0,28480.0,2020-01-28 00:00:00,56,8.0
8,Belgium,2020-03-23 00:00:00,3743.0,88.0,401.0,3254.0,2020-02-04 00:00:00,49,7.0
9,Japan,2020-03-23 00:00:00,1128.0,42.0,235.0,851.0,2020-01-22 00:00:00,62,8.86


## Active Cases by Country

In [118]:
dfww_active = dfww.sort_values(by='Active', ascending=False).reset_index(drop=True)

dfww_active.style.background_gradient(cmap='YlOrRd')

Unnamed: 0,Country/Region,Date,Confirmed,Deaths,Recovered,Active,Start Date,Days Since 1st Case,Weeks Since 1st Case
0,Italy,2020-03-22 00:00:00,59138,5476,7024,46638,2020-01-31 00:00:00,52,7.43
1,Germany,2020-03-22 00:00:00,24873,94,266,24513,2020-01-27 00:00:00,56,8.0
2,Spain,2020-03-22 00:00:00,28768,1772,2575,24421,2020-02-01 00:00:00,51,7.29
3,US,2020-03-22 00:00:00,15793,117,6,15670,2020-02-24 00:00:00,28,4.0
4,France,2020-03-22 00:00:00,16018,674,2200,13144,2020-01-24 00:00:00,59,8.43
5,Iran,2020-03-22 00:00:00,21638,1685,7931,12022,2020-02-19 00:00:00,33,4.71
6,Switzerland,2020-03-22 00:00:00,7245,98,131,7016,2020-02-25 00:00:00,27,3.86
7,South Korea,2020-03-22 00:00:00,8897,104,2909,5884,2020-01-22 00:00:00,61,8.71
8,United Kingdom,2020-03-22 00:00:00,5683,281,65,5337,2020-01-31 00:00:00,52,7.43
9,China,2020-03-22 00:00:00,67800,3144,59433,5223,2020-01-22 00:00:00,61,8.71


## Recovery Percent by Country (Recovered / Confirmed Cases)

In [119]:
dfww_recovered['Recovered Percent'] = round((dfww_recovered['Recovered'] / dfww_recovered['Confirmed']) * 100, 2)

dfww_recovered = dfww_recovered[['Country/Region', 'Date', 
                                 'Confirmed', 'Deaths', 'Recovered', 
                                 'Recovered Percent', 'Start Date', 
                                 'Days Since 1st Case', 'Weeks Since 1st Case']]

dfww_recovered.sort_values(by='Recovered Percent', ascending=False)\
.reset_index(drop=True).style.background_gradient(cmap='Greens')

Unnamed: 0,Country/Region,Date,Confirmed,Deaths,Recovered,Recovered Percent,Start Date,Days Since 1st Case,Weeks Since 1st Case
0,China,2020-03-22 00:00:00,67800,3144,59433,87.66,2020-01-22 00:00:00,61,8.71
1,Nepal,2020-03-22 00:00:00,2,0,1,50.0,2020-01-25 00:00:00,58,8.29
2,Cruise Ship,2020-03-22 00:00:00,712,8,325,45.65,2020-02-07 00:00:00,45,6.43
3,Bahrain,2020-03-22 00:00:00,332,2,149,44.88,2020-02-24 00:00:00,28,4.0
4,Iran,2020-03-22 00:00:00,21638,1685,7931,36.65,2020-02-19 00:00:00,33,4.71
5,Ethiopia,2020-03-22 00:00:00,11,0,4,36.36,2020-03-13 00:00:00,10,1.43
6,South Korea,2020-03-22 00:00:00,8897,104,2909,32.7,2020-01-22 00:00:00,61,8.71
7,Algeria,2020-03-22 00:00:00,201,17,65,32.34,2020-02-25 00:00:00,27,3.86
8,Singapore,2020-03-22 00:00:00,455,2,144,31.65,2020-01-23 00:00:00,60,8.57
9,Oman,2020-03-22 00:00:00,55,0,17,30.91,2020-02-24 00:00:00,28,4.0


# Country Deep Dive

In [120]:
italy = df.loc[df["Country/Region"] == "Italy", :]
italy = italy.reset_index(drop=True)


#Create DataFrame of days with Confirmed Cases
temp = italy[italy['Confirmed']>0]
temp = temp.reset_index(drop=True)

#Find start date of infections in YYY-DD-MM format
italy_start = temp['Date'].reset_index(drop=True)[0]

#Find todays date in YYYY-DD-MM format
today = dt.datetime.today()

#Days since first infection date
difference = today - italy_start
difference = difference.days
difference

#Find Daily increase in Confirmed, Deaths, and Recovered
it_confirmed_pct_change = round(italy['Confirmed'].pct_change() * 100, 2)
it_death_change = round(italy['Deaths'].pct_change() * 100, 2)
it_recovery_change = round(italy['Recovered'].pct_change() * 100, 2)

#Find Daily Percentage Increase in Confirmed, Deaths, and Recovered
it_confirmed_perday = italy['Confirmed'].diff()
it_death_perday = italy['Deaths'].diff()
it_recovered_perday = italy['Recovered'].diff()


italy_percent_change = pd.DataFrame

italy_percent = pd.DataFrame({
                   "New Confirmed Cases Per Day": it_confirmed_perday,
                   "Confirmed Percent Change" : it_confirmed_pct_change,
                   "New Confirmed Deaths Per Day": it_death_perday,
                   "Death Percent Change" : it_death_change,
                   "New Recovered Cases Per Day": it_recovered_perday,
                   "Recovery Percent Change" : it_recovery_change,
                    })
    
italy_percent.fillna(value=0)

#Merge DataSets together into 1 df
italy_df = italy.merge(italy_percent, left_index=True, right_index=True)
italy_df = italy_df.fillna(value=0)

In [121]:
italy_df

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,New Confirmed Cases Per Day,Confirmed Percent Change,New Confirmed Deaths Per Day,Death Percent Change,New Recovered Cases Per Day,Recovery Percent Change
0,0,Italy,43.0,12.0,2020-01-22,0,0,0,0.0,0.00,0.0,0.00,0.0,0.00
1,0,Italy,43.0,12.0,2020-01-23,0,0,0,0.0,0.00,0.0,0.00,0.0,0.00
2,0,Italy,43.0,12.0,2020-01-24,0,0,0,0.0,0.00,0.0,0.00,0.0,0.00
3,0,Italy,43.0,12.0,2020-01-25,0,0,0,0.0,0.00,0.0,0.00,0.0,0.00
4,0,Italy,43.0,12.0,2020-01-26,0,0,0,0.0,0.00,0.0,0.00,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,0,Italy,43.0,12.0,2020-03-18,35713,2978,4025,4207.0,13.35,475.0,18.98,1084.0,36.86
57,0,Italy,43.0,12.0,2020-03-19,41035,3405,4440,5322.0,14.90,427.0,14.34,415.0,10.31
58,0,Italy,43.0,12.0,2020-03-20,47021,4032,4440,5986.0,14.59,627.0,18.41,0.0,0.00
59,0,Italy,43.0,12.0,2020-03-21,53578,4825,6072,6557.0,13.94,793.0,19.67,1632.0,36.76


## Death Percent Change

In [122]:
fig = px.bar(italy_df, x='Date', y='Death Percent Change',
             hover_data=['Confirmed', 'Deaths', 'Recovered'], color='Confirmed Percent Change',
             height=400)
fig.show()