# Explore Covid-19 development for a specific country/region
* [Import Data](#1)
* [Select a specific country/region](#2)
* [Development of Confirmed Cases](#3)
* [Development of Deaths](#4)
* [Ratios](#5)
* [Other Evaluations](#6)
* [Comparison with another country/region](#7)

In [None]:
# packages

# standard
import numpy as np
import pandas as pd

# plots
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

<a id='1'></a>
# Import Data

In [None]:
# load cases data
df_cases = pd.read_csv('../input/covid19-data-from-john-hopkins-university/CONVENIENT_global_confirmed_cases.csv')
df_cases.tail()

In [None]:
# load deaths data
df_deaths = pd.read_csv('../input/covid19-data-from-john-hopkins-university/CONVENIENT_global_deaths.csv')
df_deaths.tail()

In [None]:
# list all available columns
print(list(df_cases.columns))

In [None]:
# number of observations
n = df_cases.shape[0] - 1
print('Number of observations:', n)

In [None]:
# plot size config
psx = 12
psy = 6

<a id='2'></a>
# Select a specific country/region

In [None]:
# select a country/region
country_select = 'Germany'

# other examples:
#
# country_select = 'US'
# country_select = 'Italy'
# country_select = 'Spain'
# country_select = 'France.10'

#### Note: For countries having sub-regions the selection is a little bit tricky, you have to either select the right columns, e. g. for France you would need 'France.10' or even perform an explicit aggregation over multiple columns (e. g. for Australia or Canada)

In [None]:
# build corresponding data frame
col_dates = df_cases['Country/Region'][1:n+1]
col_cases_inc = pd.to_numeric(df_cases[country_select][1:n+1])
col_deaths_inc = pd.to_numeric(df_deaths[country_select][1:n+1])
df = pd.DataFrame()
df['Date'] = pd.to_datetime(col_dates)
df['Cases_Inc'] = col_cases_inc
df['Deaths_Inc'] = col_deaths_inc
# aggregation (values are incremental)
df['Cases'] = df['Cases_Inc'].cumsum()
df['Deaths'] = df['Deaths_Inc'].cumsum()

In [None]:
# calc additional features
df['CFR'] = df.Deaths / df.Cases # case fatality rate
df['Cases_RelChange'] = df['Cases'].pct_change() # day to day relative change
df['Deaths_RelChange'] = df['Deaths'].pct_change() # day to day relative change

In [None]:
# add day of week
def get_weekday(i_date):
    return i_date.strftime('%A')

df['Weekday'] = list(map(get_weekday, df.Date))

In [None]:
# show most recent values (2 weeks)
df.tail(14)

<a id='3'></a>
# Development of Confirmed Cases

In [None]:
# plot development of cases
fig, ax = plt.subplots(figsize=(psx,psy))
ax.plot(df.Date, df.Cases, color='darkgreen')
plt.title('Cases - ' + country_select)
plt.grid()
plt.show()

In [None]:
# plot incremental development
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date, df.Cases_Inc, color='darkgreen')
plt.title('Cases Incremental - ' + country_select)
plt.xlabel('Date')
plt.ylabel('Count')
plt.grid()
plt.show()

In [None]:
# plot incremental development - last seven days only
mean_inc_7 = np.round(df.Cases_Inc[n-7:n].mean(),2)
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date[n-7:n], df.Cases_Inc[n-7:n], color='darkgreen')
plt.hlines(mean_inc_7, xmin=df.Date[n-6], xmax=df.Date[n], 
           color='darkgreen', linestyles='dashed')
plt.title('Cases Incremental (last 7 days only) [mean = ' + str(mean_inc_7) + '] - ' + country_select)
plt.xlabel('Date')
plt.ylabel('Count')
plt.grid()
plt.show()

In [None]:
# measure development over 7 days (to remove weekday volatilites)
df['Cases_Diff7'] = df.Cases - df.Cases.shift(7)

fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date, df.Cases_Diff7, color='darkgreen')
plt.title('Cases - Development vs. previous week - ' + country_select)
plt.xlabel('Date')
plt.ylabel('Increase (over 7 days)')
plt.grid()
plt.show()

In [None]:
# measure development over 7 days (to remove weekday volatilites) - now for increments
df['Cases_Inc_Diff7'] = df.Cases_Inc - df.Cases_Inc.shift(7)

fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date, df.Cases_Inc_Diff7, color='darkgreen')
plt.title('Incremental Cases - Development vs. previous week - ' + country_select)
plt.xlabel('Date')
plt.ylabel('Increase (over 7 days)')
plt.grid()
plt.show()

In [None]:
# plot relative changes
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date, df.Cases_RelChange, color='darkgreen')
plt.title('Cases Relative Change - ' + country_select)
plt.ylim(0,1) # cap plot at 100% increase!
plt.xlabel('Date')
plt.ylabel('Relative Change')
plt.grid()
plt.show()

In [None]:
# plot relative changes - zoom
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date, df.Cases_RelChange, color='darkgreen')
plt.title('Cases Relative Change / Zoom - ' + country_select)
plt.ylim(0,0.1)
plt.xlabel('Date')
plt.ylabel('Relative Change')
plt.grid()
plt.show()

In [None]:
# show changes of last 7 days
recent_changes_cases = df.Cases_RelChange[(n-7):n]
m_recent_changes_cases = np.round(recent_changes_cases.mean(),4)

fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date[(n-7):n], df.Cases_RelChange[(n-7):n], color='darkgreen')
plt.hlines(m_recent_changes_cases, xmin=df.Date[n-6], xmax=df.Date[n], 
           color='darkgreen', linestyles='dashed')
plt.title('Cases - Relative change last seven days - mean = ' + str(m_recent_changes_cases) + ' - ' + country_select)
plt.xlabel('Date')
plt.ylabel('Relative Change')
plt.grid()
plt.show()

## Impact of weekday

In [None]:
# evaluate impact of weekday
fig, ax = plt.subplots(figsize=(psx,psy))
sns.violinplot(x='Weekday', y='Cases_Inc', data=df)
plt.grid()
plt.show()

In [None]:
# evaluate impact of weekday; logarithmic plot
fig, ax = plt.subplots(figsize=(psx,psy))
sns.violinplot(x=df.Weekday, y=np.log10(1+df.Cases_Inc))
plt.ylabel('log10(Cases_Inc)')
plt.grid()
plt.show()

In [None]:
# evaluate average per weekday
df_means = df.groupby('Weekday', as_index=False).agg(
    mean_cases_inc = pd.NamedAgg(column='Cases_Inc', aggfunc=np.mean),
    median_cases_inc = pd.NamedAgg(column='Cases_Inc', aggfunc=np.median))

In [None]:
# compare with overall mean
cases_inc_overall_mean = df.Cases_Inc.mean()
print('Incremental Cases - overall average: ', np.round(cases_inc_overall_mean,2))

df_means['Factor_to_overall_mean'] = cases_inc_overall_mean / df_means.mean_cases_inc
df_means

<a id='4'></a>
# Development of Deaths

In [None]:
# plot development of deaths
fig, ax = plt.subplots(figsize=(psx,psy))
ax.plot(df.Date, df.Deaths, color='darkred')
plt.title('Deaths - ' + country_select)
plt.xlabel('Date')
plt.ylabel('Count')
plt.grid()
plt.show()

In [None]:
# plot incremental development
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date, df.Deaths_Inc, color='darkred')
plt.title('Deaths Incremental - ' + country_select)
plt.xlabel('Date')
plt.ylabel('Count')
plt.grid()
plt.show()

In [None]:
# plot incremental development - last seven days only
mean_deaths_inc_7 = np.round(df.Deaths_Inc[n-7:n].mean(),2)
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date[n-7:n], df.Deaths_Inc[n-7:n], color='darkred')
plt.hlines(mean_deaths_inc_7, xmin=df.Date[n-6], xmax=df.Date[n], 
           color='darkred', linestyles='dashed')
plt.title('Deaths Incremental (last 7 days only) [mean = ' + str(mean_deaths_inc_7) + '] - ' + country_select)
plt.xlabel('Date')
plt.ylabel('Count')
plt.grid()
plt.show()

In [None]:
# plot relative changes - zoom
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date, df.Deaths_RelChange, color='darkred')
plt.title('Deaths Relative Change - ' + country_select)
plt.xlabel('Date')
plt.ylabel('Relative Change')
plt.grid()
plt.show()

In [None]:
# plot relative changes - zoom
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date, df.Deaths_RelChange, color='darkred')
plt.title('Deaths Relative Change [Zoom] - ' + country_select)
plt.ylim(0,0.05)
plt.xlabel('Date')
plt.ylabel('Relative Change')
plt.grid()
plt.show()

In [None]:
# show changes of last 7 days
recent_changes_deaths = df.Deaths_RelChange[(n-7):n]
m_recent_changes_deaths = np.round(recent_changes_deaths.mean(),4)

fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date[(n-7):n], df.Deaths_RelChange[(n-7):n], color='darkred')
plt.hlines(m_recent_changes_deaths, xmin=df.Date[n-6], xmax=df.Date[n],
           color='darkred', linestyles='dashed')
plt.title('Deaths - Relative change last seven days - mean = ' + str(m_recent_changes_deaths) + ' - ' + country_select)
plt.xlabel('Date')
plt.ylabel('Relative Change')
plt.grid()
plt.show()

<a id='5'></a>
# Ratios

In [None]:
# plot development of case fatality rate (= deaths / cases)
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date, df.CFR)
plt.title('Case Fatality Rate - ' + country_select)
plt.xlabel('Date')
plt.ylabel('Case Fatality Rate')
plt.grid()
plt.show()

<a id='6'></a>
# Other Evaluations

In [None]:
# plot incremental cases vs cases
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Cases, df.Cases_Inc)
plt.title('Incremental Cases vs Cases - ' + country_select)
plt.xlabel('Cases')
plt.ylabel('Incremental Cases')
plt.grid()
plt.show()

In [None]:
# plot deaths vs cases
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Cases, df.Deaths)
plt.title('Deaths vs Cases - ' + country_select)
plt.xlabel('Cases')
plt.ylabel('Deaths')
plt.grid()
plt.show()

In [None]:
# plot increment of deaths vs increment of cases; bubble size ~ number of cases
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Cases_Inc, df.Deaths_Inc, s=0.0002*df.Cases, alpha=0.2)
plt.title('Incremental Deaths vs Incremental Cases - ' + country_select)
plt.xlabel('Incremental Cases')
plt.ylabel('Incremental Deaths')
plt.grid()
plt.show()

#### Interactive version:

In [None]:
# interactive version; bubble size ~ (accumulated) number of cases; color ~ (accumulated) number of deaths
fig = px.scatter(df, x='Cases_Inc', y='Deaths_Inc', color='Deaths', size='Cases', 
                 hover_name='Date', opacity=0.5)
fig.update_layout(title='Incremental Deaths vs Incremental Cases - ' + country_select,
                  xaxis_title='Incremental Cases',  
                  yaxis_title='Incremental Deaths')
# fig.update_traces(marker_coloraxis=None) # hide colorbar
fig.show()

<a id='7'></a>
# Comparison with another country/region

In [None]:
# select second country/region
country_compare = 'Belgium'

In [None]:
# build corresponding data frame
df_comp = pd.DataFrame()
df_comp['Date'] = pd.to_datetime(df_cases['Country/Region'][1:n+1])
df_comp['Cases_Inc'] = pd.to_numeric(df_cases[country_compare][1:n+1])
df_comp['Deaths_Inc'] = pd.to_numeric(df_deaths[country_compare][1:n+1])
# aggregation (values are incremental)
df_comp['Cases'] = df_comp['Cases_Inc'].cumsum()
df_comp['Deaths'] = df_comp['Deaths_Inc'].cumsum()

# calc additional features
df_comp['CFR'] = df_comp.Deaths / df_comp.Cases # case fatality rate
df_comp['Cases_RelChange'] = df_comp['Cases'].pct_change() # day to day relative change
df_comp['Deaths_RelChange'] = df_comp['Deaths'].pct_change() # day to day relative change

In [None]:
# show most recent figures
df_comp.tail(7)

In [None]:
# plot development of cases
fig, ax = plt.subplots(figsize=(psx,psy))
ax.plot(df.Date, df.Cases, color='darkgreen')
ax.plot(df.Date, df_comp.Cases, color='darkgrey')
plt.title('Cases - ' + country_select + ' vs ' + country_compare)
plt.grid()
plt.show()

In [None]:
# plot incremental development
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date, df.Cases_Inc, color='darkgreen')
ax.scatter(df_comp.Date, df_comp.Cases_Inc, color='darkgrey')
plt.title('Cases Incremental - ' + country_select + ' vs ' + country_compare)
plt.xlabel('Date')
plt.ylabel('Count')
plt.grid()
plt.show()

In [None]:
# plot development of deaths
fig, ax = plt.subplots(figsize=(psx,psy))
ax.plot(df.Date, df.Deaths, color='darkred')
ax.plot(df.Date, df_comp.Deaths, color='darkgrey')
plt.title('Deaths - ' + country_select + ' vs ' + country_compare)
plt.grid()
plt.show()

In [None]:
# plot development of case fatality rate (= deaths / cases)
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Date, df.CFR)
ax.scatter(df_comp.Date, df_comp.CFR, color='darkgrey')
plt.title('Case Fatality Rate - ' + country_select + ' vs ' + country_compare)
plt.xlabel('Date')
plt.ylabel('Case Fatality Rate')
plt.grid()
plt.show()

In [None]:
# plot development of case fatality rate (= deaths / cases)
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Cases, df_comp.Cases, color='darkgreen')
plt.title('Cases vs Cases - ' + country_select + ' vs ' + country_compare)
plt.xlabel('Count ' + country_select)
plt.ylabel('Count ' + country_compare)
plt.grid()
plt.show()

In [None]:
# plot incremental cases vs incremental cases
fig, ax = plt.subplots(figsize=(psx,psy))
ax.scatter(df.Cases_Inc, df_comp.Cases_Inc, color='darkgreen')
plt.title('Incremental Cases vs Incremental Cases - ' + country_select + ' vs ' + country_compare)
plt.xlabel('Incremental Cases ' + country_select)
plt.ylabel('Incremental Cases ' + country_compare)
plt.grid()
plt.show()

In [None]:
df_temp = df
df_temp['Cases_Comp'] = df_comp.Cases
df_temp['Cases_Inc_Comp'] = df_comp.Cases_Inc
df_temp['Deaths_Comp'] = df_comp.Deaths
df_temp['Deaths_Inc_Comp'] = df_comp.Deaths_Inc

In [None]:
# interactive version; bubble size ~ (accumulated) number of cases; color ~ (accumulated) number of cases for 2nd country
fig = px.scatter(df_temp, x='Cases_Inc', y='Cases_Inc_Comp',
                 color='Cases_Comp', 
                 size='Cases', 
                 hover_name='Date', opacity=0.5)
fig.update_layout(title='Incremental Cases vs Incremental Cases - ' + country_select + ' vs ' + country_compare,
                  xaxis_title='Incremental Cases '+country_select,  
                  yaxis_title='Incremental Cases '+country_compare)
# fig.update_traces(marker_coloraxis=None) # hide colorbar
fig.show()

In [None]:
# same for deaths
fig = px.scatter(df_temp, x='Deaths_Inc', y='Deaths_Inc_Comp',
                 color='Deaths_Comp', 
                 size='Deaths', 
                 hover_name='Date', opacity=0.5)
fig.update_layout(title='Incremental Deaths vs Incremental Deaths - ' + country_select + ' vs ' + country_compare,
                  xaxis_title='Incremental Deaths '+country_select,  
                  yaxis_title='Incremental Deaths '+country_compare)
# fig.update_traces(marker_coloraxis=None) # hide colorbar
fig.show()

In [None]:
# evaluate correlations
selected_features = ['Cases_Inc','Cases_Inc_Comp','Deaths_Inc','Deaths_Inc_Comp']
corr_pearson = df_temp[selected_features].corr(method='pearson')
corr_spearman = df_temp[selected_features].corr(method='spearman')

In [None]:
# plot pearson correlation
sns.heatmap(corr_pearson, annot=True, cmap='RdYlGn', vmin=-1, vmax=+1)
plt.title('Pearson correlation')
plt.show()

In [None]:
# plot spearman correlation
sns.heatmap(corr_spearman, annot=True, cmap='RdYlGn', vmin=-1, vmax=+1)
plt.title('Spearman correlation')
plt.show()