# Compare Country Death Trajectories

Inspired by:
- https://github.com/pratapvardhan/notebooks/blob/master/covid19/covid19-compare-country-death-trajectories.ipynb

In [1]:
#hide
import pandas as pd
import altair as alt
from IPython.display import HTML

In [2]:
#hide
#url = ('https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/'
#       'csse_covid_19_time_series/time_series_19-covid-Deaths.csv')
url = ('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/'
       'csse_covid_19_time_series/time_series_19-covid-Deaths.csv')
df = pd.read_csv(url)
# rename countries
df['Country/Region'] = df['Country/Region'].replace({'Korea, South': 'South Korea'})
dt_cols = df.columns[~df.columns.isin(['Province/State', 'Country/Region', 'Lat', 'Long'])]

In [3]:
#hide
dff = (df.groupby('Country/Region')[dt_cols].sum()
       .stack().reset_index(name='Confirmed Cases')
       .rename(columns={'level_1': 'Date', 'Country/Region': 'Country'}))
dff['Date'] = pd.to_datetime(dff['Date'], format='%m/%d/%y')

In [4]:
dff['Date']

0      2020-01-22
1      2020-01-23
2      2020-01-24
3      2020-01-25
4      2020-01-26
          ...    
8985   2020-03-15
8986   2020-03-16
8987   2020-03-17
8988   2020-03-18
8989   2020-03-19
Name: Date, Length: 8990, dtype: datetime64[ns]

In [5]:
dff.dtypes

Country                    object
Date               datetime64[ns]
Confirmed Cases             int64
dtype: object

In [6]:
dt_cols

Index(['1/22/20', '1/23/20', '1/24/20', '1/25/20', '1/26/20', '1/27/20',
       '1/28/20', '1/29/20', '1/30/20', '1/31/20', '2/1/20', '2/2/20',
       '2/3/20', '2/4/20', '2/5/20', '2/6/20', '2/7/20', '2/8/20', '2/9/20',
       '2/10/20', '2/11/20', '2/12/20', '2/13/20', '2/14/20', '2/15/20',
       '2/16/20', '2/17/20', '2/18/20', '2/19/20', '2/20/20', '2/21/20',
       '2/22/20', '2/23/20', '2/24/20', '2/25/20', '2/26/20', '2/27/20',
       '2/28/20', '2/29/20', '3/1/20', '3/2/20', '3/3/20', '3/4/20', '3/5/20',
       '3/6/20', '3/7/20', '3/8/20', '3/9/20', '3/10/20', '3/11/20', '3/12/20',
       '3/13/20', '3/14/20', '3/15/20', '3/16/20', '3/17/20', '3/18/20',
       '3/19/20'],
      dtype='object')

In [7]:
#hide
MIN_CASES = 10
LAST_DATE = dt_cols[-1]
# sometimes last column may be empty, then go backwards
for c in dt_cols[::-1]:
    if not df[c].fillna(0).eq(0).all():
        LAST_DATE = c
        break
countries = dff[dff['Date'].eq(LAST_DATE) & dff['Confirmed Cases'].ge(MIN_CASES) & 
        dff['Country'].ne('China')
       ].sort_values(by='Confirmed Cases', ascending=False)
countries = countries['Country'].values

In [8]:
#hide
SINCE_CASES_NUM = 10
COL_X = f'Days since {SINCE_CASES_NUM}th death'
dff2 = dff[dff['Country'].isin(countries)].copy()
days_since = (dff2.assign(F=dff2['Confirmed Cases'].ge(SINCE_CASES_NUM))
              .set_index('Date')
              .groupby('Country')['F'].transform('idxmax'))
dff2[COL_X] = (dff2['Date'] - days_since.values).dt.days.values
dff2 = dff2[dff2[COL_X].ge(0)]

In [9]:
dff2

Unnamed: 0,Country,Date,Confirmed Cases,Days since 10th death
925,Belgium,2020-03-17,10,0
926,Belgium,2020-03-18,14,1
927,Belgium,2020-03-19,21,2
3119,France,2020-03-07,11,0
3120,France,2020-03-08,19,1
...,...,...,...,...
8695,United Kingdom,2020-03-15,21,1
8696,United Kingdom,2020-03-16,56,2
8697,United Kingdom,2020-03-17,56,3
8698,United Kingdom,2020-03-18,72,4


In [16]:
#hide
def get_country_colors(x):
    mapping = {
        'Italy': 'black',
        'Iran': '#A1BA59',
        'South Korea': '#A1BA59', #'#E45756',
        'Spain': '#E45756', #'#F58518',
        'Germany': '#9D755D',
        'France': '#2495D3', #'#F58518',
        'US': '#9D755D', #'#2495D3',
        'Switzerland': '#9D755D',
        'Norway': '#C1B7AD',
        'United Kingdom': '#C1B7AD', #'#2495D3',
        'Netherlands': '#F58518', #'#C1B7AD',
        'Sweden': '#C1B7AD',
        'Belgium': '#C1B7AD',
        'Denmark': '#C1B7AD',
        'Austria': '#C1B7AD',
        'Japan': '#9467bd'}
    return mapping.get(x, '#C1B7AD')

In [17]:
#hide_input
baseline_countries = ['Italy', 'South Korea', 'Japan', 'Spain', 'France', 'United Kingdom', 'Netherlands', 'Germany']
max_date = dff2['Date'].max()
color_domain = list(dff2['Country'].unique())
color_range = list(map(get_country_colors, color_domain))

In [18]:
dff2

Unnamed: 0,Country,Date,Confirmed Cases,Days since 10th death
925,Belgium,2020-03-17,10,0
926,Belgium,2020-03-18,14,1
927,Belgium,2020-03-19,21,2
3119,France,2020-03-07,11,0
3120,France,2020-03-08,19,1
...,...,...,...,...
8695,United Kingdom,2020-03-15,21,1
8696,United Kingdom,2020-03-16,56,2
8697,United Kingdom,2020-03-17,56,3
8698,United Kingdom,2020-03-18,72,4


In [19]:
def make_since_chart(highlight_countries=[], baseline_countries=baseline_countries):
    selection = alt.selection_multi(fields=['Country'], bind='legend', 
                                    init=[{'Country': x} for x in highlight_countries + baseline_countries])

    base = alt.Chart(dff2, width=550).encode(
        x=f'{COL_X}:Q',
        y=alt.Y('Confirmed Cases:Q', scale=alt.Scale(type='log'), axis=alt.Axis(title='Cumulative Deaths')),
        color=alt.Color('Country:N', scale=alt.Scale(domain=color_domain, range=color_range)),
        tooltip=list(dff2),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.05))
    )
    max_day = dff2[COL_X].max()
    ref = pd.DataFrame([[x, SINCE_CASES_NUM*1.33**x] for x in range(max_day+1)], columns=[COL_X, 'Confirmed Cases'])
    base_ref = alt.Chart(ref).encode(x=f'{COL_X}:Q', y='Confirmed Cases:Q')
    return (
        base_ref.mark_line(color='black', opacity=.5, strokeDash=[3,3]) +
        base_ref.transform_filter(
            alt.datum[COL_X] >= max_day
        ).mark_text(dy=-6, align='right', fontSize=10, text='33% Daily Growth') +
        base.mark_line(point=True).add_selection(selection) + 
        base.transform_filter(
            alt.datum['Date'] >= int(max_date.timestamp() * 1000)
        ).mark_text(dy=-8, align='right', fontWeight='bold').encode(text='Country:N')
    ).properties(
        #title=f"Compare {', '.join(highlight_countries)} death trajectory with {', '.join(baseline_countries)}"
        title=f"Compare countries death trajectories"
    )

Click (Shift+ for multiple) on Countries legend to filter the visualization.

In [20]:
#hide_input
HTML(f'<small class="float-right">Last Updated on {pd.to_datetime(LAST_DATE).strftime("%B, %d %Y")}<small>')

In [21]:
#hide_input
chart = make_since_chart()
chart