In [9]:
# Load the latest summary data

import pandas as pd
import numpy as np
import altair as alt
from typing import List

# Read the raw data
url = 'https://raw.githubusercontent.com/ratreya/jupyter-notebooks/master/jhu-daily-reports.csv'
df = pd.read_csv('jhu-daily-reports.csv')
days = sorted(df['Date'].unique())
df['Active']  = df.Confirmed - (df.Deaths + df.Recovered)

# Set up some global config and variables
alt.renderers.enable('default')

def align(data):
    # Find the index with the lowest non-zero starting case number
    base = min(((idx, val) for (idx, val) in enumerate(data) if val[0] > 0), key = lambda x: x[1][0])[0]

    # Align everyone else to this base
    for i in [x for x in range(len(data)) if x != base]:
        padding = min(((x[0], abs(x[1] - data[i][0])) for x in enumerate(data[base])), key = lambda x: x[1])[0]
        data[i] = [0 for i in range(padding)] + data[i]
    return data

def streamgraph(df, by, value, sort, limit, stack='center'):
    df.loc[df[value]<0, value] = 0
    order = df[df['Date'] == days[-1]].sort_values(sort, ascending=False)
    top = order[by].values.tolist()
    data = df[df[by].isin(top[:limit])]
    others = df[df[by].isin(top[limit:])].groupby('Date', as_index=False).sum()
    others.insert(0, by, 'Others')
    data = data.append(others, ignore_index=True)
    selection = alt.selection_multi(fields=[by], bind='legend')

    return alt.Chart(data).mark_area().encode(
        alt.X('Date:T', axis=alt.Axis(domain=False, format='%d %b', tickSize=0)),
        alt.Y(value + ':Q', stack=stack, axis=alt.Axis(title=' '.join(value.split('_'))) if stack=='zero' else None),
        alt.Color(by + ':N', scale=alt.Scale(scheme='tableau20'), sort=top[:limit]),
        order=alt.Order('sort:Q'),
        tooltip=alt.Tooltip(['Date:T', by, value]),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
    ).transform_lookup(
        lookup=by,
        from_=alt.LookupData(order, by, [sort]),
        as_=['sort']
    ).properties(
        width=1200,
        height=800
    ).add_selection(selection)

def nontimegraph(df, by, value, sort, limit, highlight='', addon=[]):
    top = df[df['Date'] == days[-1]].sort_values(sort, ascending=False)[by].values.tolist()[:limit] + addon
    data = df[(df[value] > 1) & (df['Confirmed'] > 100) & (df[by].isin(top))]
    selection = alt.selection_multi(fields=[by], bind='legend', init=[{by: highlight}], empty='none', nearest=True)

    line = alt.Chart(data).mark_line().encode(
        alt.X('Confirmed:Q', scale=alt.Scale(type='log'), axis=alt.Axis(title='Cumulative Cases')),
        alt.Y(value + ':Q', scale=alt.Scale(type='log'), axis=alt.Axis(title=' '.join(value.split('_')))),
        color=alt.condition(selection, alt.value('black'), alt.Color(by+':N', scale=alt.Scale(scheme='category10' if highlight == '' else 'set2'))),
        size=alt.condition(selection, alt.value(3), alt.value(1)),
        tooltip=alt.Tooltip(['Date:T', by, value])
    )
    point = alt.Chart(data[data['Date'] == days[-1]]).mark_circle(size=60).encode(
        alt.X('Confirmed:Q', scale=alt.Scale(type='log')),
        alt.Y(value + ':Q', scale=alt.Scale(type='log')),
        color=alt.condition(selection, alt.value('black'), alt.Color(by+':N')),
        size=alt.condition(selection, alt.value(100), alt.value(60)),
    )
    text = point.mark_text(
        align='left',
        dx=5,  # Nudges text to right so it doesn't appear on top of the bar
    ).encode(
        text=by+':N',
        size=alt.condition(selection, alt.value(15), alt.value(10)),
        color=alt.condition(selection, alt.value('black'), alt.Color(by+':N'))
    )

    return (line+point+text).properties(
        width=1200, 
        height=800
    ).add_selection(selection)


# Global Data Visualizations

In [10]:
from ipywidgets import interact

# Aggregate at country level
country_level = df.groupby(['Country', 'Date'], as_index=False).sum()

# Drop 03-22-2020 since county breakdown started from here and so there is break in continuity
country_level = country_level.drop(country_level[country_level['Date'] == '03-22-2020'].index)
country_level = country_level.drop(country_level[country_level['Date'] == '03-12-2020'].index)
country_level = country_level.drop(country_level[country_level['Date'] == '02-12-2020'].index)

@interact(value=['Confirmed', 'Deaths', 'Recovered', 'Confirmed_New', 'Deaths_New', 'Recovered_New'], sort=['Active', 'Confirmed', 'Deaths', 'Recovered', 'Confirmed_New', 'Deaths_New', 'Recovered_New'], limit=(1,50,1))
def chart(value='Confirmed_New', sort='Active', limit=10):
    return streamgraph(country_level, 'Country', value, sort, limit).interactive()

interactive(children=(Dropdown(description='value', index=3, options=('Confirmed', 'Deaths', 'Recovered', 'Con…

In [11]:
countries = country_level[country_level['Confirmed'] > 1000]['Country'].unique().tolist()

@interact(value=['Confirmed_New', 'Deaths_New', 'Recovered_New'], sort=['Active', 'Confirmed', 'Deaths', 'Recovered', 'Confirmed_New', 'Deaths_New', 'Recovered_New'], limit=(1,50,1), add=countries)
def chart(value, sort, limit=10, add='India'):
    return nontimegraph(country_level, 'Country', value, sort, limit, highlight=add, addon=['South Korea']+[add]).interactive()

interactive(children=(Dropdown(description='value', options=('Confirmed_New', 'Deaths_New', 'Recovered_New'), …

# State Level Visualizations

In [12]:
countries = df[(df['State'].notnull()) & (df['Confirmed'] > 1000)]['Country'].unique()

def state_data(country):
    state_level = df[df['Country'] == country].groupby(['State', 'Date'], as_index=False).sum()
    if country == 'US':
        # Drop 03-22-2020 since county breakdown started from here and so there is break in continuity
        state_level = state_level.drop(state_level[state_level['Date'] == '03-22-2020'].index)
        state_level = state_level.drop(state_level[state_level['Date'] == '03-18-2020'].index)
        state_level = state_level[state_level['Date'] >= '03-01-2020']
    return state_level


@interact(country=countries, value=['Confirmed', 'Deaths', 'Recovered', 'Confirmed_New', 'Deaths_New', 'Recovered_New'], sort=['Active', 'Confirmed', 'Deaths', 'Recovered', 'Confirmed_New', 'Deaths_New', 'Recovered_New'], limit=(1,50,1))
def chart(country='US', value='Confirmed_New', sort='Active', limit=10):
    return streamgraph(state_data(country), 'State', value, sort, limit).interactive()

interactive(children=(Dropdown(description='country', index=6, options=('Australia', 'Canada', 'China', 'Denma…

In [13]:
@interact(country=countries, value=['Confirmed_New', 'Deaths_New', 'Recovered_New'], sort=['Active', 'Confirmed', 'Deaths', 'Recovered', 'Confirmed_New', 'Deaths_New', 'Recovered_New'], limit=(1,50,1))
def chart(country='US', value='Confirmed_New', sort='Active', limit=10):
    return nontimegraph(state_data(country), 'State', value, sort, limit).interactive()

interactive(children=(Dropdown(description='country', index=6, options=('Australia', 'Canada', 'China', 'Denma…

# US County Level Visualizations

In [14]:
us_state_level = state_data('US')

states=us_state_level[us_state_level['Confirmed'] > 1000]['State'].unique()
@interact(value=['Confirmed', 'Deaths', 'Recovered', 'Confirmed_New', 'Deaths_New', 'Recovered_New'], sort=['Active', 'Confirmed', 'Deaths', 'Recovered', 'Confirmed_New', 'Deaths_New', 'Recovered_New'], limit=(1,50,1), state=states)
def chart(state='CA', value='Confirmed_New', sort='Active', limit=10):
    county_level = df[df['State'] == state].fillna({'County': state}).groupby(['County', 'Date'], as_index=False).sum()
    return streamgraph(county_level, 'County', value, sort, limit, 'zero').interactive()

interactive(children=(Dropdown(description='state', index=2, options=('AL', 'AZ', 'CA', 'CO', 'CT', 'DC', 'FL'…

In [15]:
states=us_state_level[us_state_level['Confirmed'] > 1000]['State'].unique()

@interact(value=['Confirmed_New', 'Deaths_New', 'Recovered_New'], sort=['Active', 'Confirmed', 'Deaths', 'Recovered', 'Confirmed_New', 'Deaths_New', 'Recovered_New'], limit=(1,50,1), state=states)
def chart(state='CA', value='Confirmed_New', sort='Confirmed', limit=10):
    county_level = df[df['State'] == state].fillna({'County': state}).groupby(['County', 'Date'], as_index=False).sum()
    return nontimegraph(county_level, 'County', value, sort, limit).interactive()

interactive(children=(Dropdown(description='state', index=2, options=('AL', 'AZ', 'CA', 'CO', 'CT', 'DC', 'FL'…

In [16]:
df['Area'] = df.County + ', ' + df.State
county_level = df.groupby(['Area', 'Date'], as_index=False).sum()

@interact(value=['Confirmed_New', 'Deaths_New', 'Recovered_New'], sort=['Active', 'Confirmed', 'Deaths', 'Recovered', 'Confirmed_New', 'Deaths_New', 'Recovered_New'], limit=(1,50,1), state = us_state_level['State'].unique())
def chart(value='Confirmed_New', sort='Confirmed', limit=10):
    return nontimegraph(county_level, 'Area', value, sort, limit).interactive()

interactive(children=(Dropdown(description='value', options=('Confirmed_New', 'Deaths_New', 'Recovered_New'), …