In [32]:
# Load the latest summary data

import pandas as pd
import numpy as np
import altair as alt

# Read the raw data
url = 'https://raw.githubusercontent.com/ratreya/jupyter-notebooks/master/jhu-daily-reports.csv'
df = pd.read_csv(url)
days = sorted(df['Date'].unique())

# Set up some global config and variables
alt.renderers.enable('default')

def align(data):
    # Find the index with the lowest non-zero starting case number
    base = min(((idx, val) for (idx, val) in enumerate(data) if val[0] > 0), key = lambda x: x[1][0])[0]

    # Align everyone else to this base
    for i in [x for x in range(len(data)) if x != base]:
        padding = min(((x[0], abs(x[1] - data[i][0])) for x in enumerate(data[base])), key = lambda x: x[1])[0]
        data[i] = [0 for i in range(padding)] + data[i]
    return data

def streamgraph(df, by, value, sort, limit):
    top = df[df['Date'] == days[-1]].sort_values(sort, ascending=False)[by].values.tolist()
    data = df[df[by].isin(top[:limit])]
    others = df[df[by].isin(top[limit:])].groupby('Date', as_index=False).sum()
    others.insert(0, by, 'Others')
    data = data.append(others, ignore_index=True)
    
    chart = alt.Chart(data).mark_area().encode(
        alt.X('Date:T', axis=alt.Axis(domain=False, format='%d %b', tickSize=0)),
        alt.Y(value + ':Q', stack='center', axis=None),
        alt.Color(by + ':N', scale=alt.Scale(scheme='tableau20'))
    ).properties(width=1000, height=600)
    chart.display()

In [33]:
# Aggregate at country level
country_level = df.groupby(['Country', 'Date'], as_index=False).sum()
# Drop 03-22-2020 since county breakdown started from here and so there is break in continuity
country_level = country_level.drop(country_level[country_level['Date'] == '03-22-2020'].index)
country_level = country_level.drop(country_level[country_level['Date'] == '03-12-2020'].index)
country_level = country_level.drop(country_level[country_level['Date'] == '02-12-2020'].index)

streamgraph(country_level, 'Country', 'Confirmed_New', 'Confirmed', 10)

In [34]:
# Plot top contributing states to US trend

us_state_level = df[df['Country'] == 'US'].groupby(['State', 'Date'], as_index=False).sum()
# Drop 03-22-2020 since county breakdown started from here and so there is break in continuity
us_state_level = us_state_level.drop(us_state_level[us_state_level['Date'] == '03-22-2020'].index)
us_state_level = us_state_level.drop(us_state_level[us_state_level['Date'] == '03-18-2020'].index)
us_state_level = us_state_level[us_state_level['Date'] >= '03-01-2020']

streamgraph(us_state_level, 'State', 'Confirmed_New', 'Confirmed', 10)

In [35]:
top = df[df['Date'] == days[-1]].sort_values('Confirmed', ascending=False)['Country'].values.tolist()
data = country_level[(country_level['Confirmed_New'] > 1) & (country_level['Confirmed'] > 100) & (country_level['Country'].isin(top[:10] + ['South Korea']))]
#data.loc[(data['Country'].isin(top[:10])) & (data['Date'] == days[-1]), 'Mark'] = True


alt.Chart(data).transform_loess('Confirmed', 'Confirmed_New', bandwidth=0.2, groupby=['Country']).mark_line().encode(
    alt.X('Confirmed', scale=alt.Scale(type='log')),
    alt.Y('Confirmed_New', scale=alt.Scale(type='log')),
    color = 'Country',
    tooltip=['Country', 'Confirmed_New']
    ).properties(width=1000, height=600).interactive()

In [36]:
top = us_state_level[us_state_level['Date'] == days[-1]].sort_values('Confirmed', ascending=False)['State'].values.tolist()
data = us_state_level[(us_state_level['Confirmed_New'] > 1) & (us_state_level['Confirmed'] > 100) & (us_state_level['State'].isin(top[:10]))]

alt.Chart(data).transform_loess('Confirmed', 'Confirmed_New', bandwidth=0.3, groupby=['State']).mark_line().encode(
    alt.X('Confirmed', scale=alt.Scale(type='log')),
    alt.Y('Confirmed_New', scale=alt.Scale(type='log')),
    color = 'State',
    tooltip=['State', 'Confirmed_New']
    ).properties(width=1000, height=600).interactive()