In [118]:
# Load the latest summary data

import pandas as pd
import numpy as np
import altair as alt
from typing import List

# Read the raw data
url = 'https://raw.githubusercontent.com/ratreya/jupyter-notebooks/master/jhu-daily-reports.csv'
df = pd.read_csv(url)
days = sorted(df['Date'].unique())

# Set up some global config and variables
alt.renderers.enable('default')

def align(data):
    # Find the index with the lowest non-zero starting case number
    base = min(((idx, val) for (idx, val) in enumerate(data) if val[0] > 0), key = lambda x: x[1][0])[0]

    # Align everyone else to this base
    for i in [x for x in range(len(data)) if x != base]:
        padding = min(((x[0], abs(x[1] - data[i][0])) for x in enumerate(data[base])), key = lambda x: x[1])[0]
        data[i] = [0 for i in range(padding)] + data[i]
    return data

def streamgraph(df, by, value, sort, limit):
    order = df[df['Date'] == days[-1]].sort_values(sort, ascending=False)
    top = order[by].values.tolist()
    data = df[df[by].isin(top[:limit])]
    others = df[df[by].isin(top[limit:])].groupby('Date', as_index=False).sum()
    others.insert(0, by, 'Others')
    data = data.append(others, ignore_index=True)
    selection = alt.selection_single(fields=[by], bind='legend')

    return alt.Chart(data).mark_area().encode(
        alt.X('Date:T', axis=alt.Axis(domain=False, format='%d %b', tickSize=0)),
        alt.Y(value + ':Q', stack='center', axis=None),
        alt.Color(by + ':N', scale=alt.Scale(scheme='tableau20'), sort=top[:limit]),
        order=alt.Order(sort+':Q'),
        tooltip=alt.Tooltip([by, value]),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
    ).transform_lookup(
        lookup=by,
        from_=alt.LookupData(order, by, [sort])
    ).properties(
        width=1200,
        height=800
    ).add_selection(selection)

def nontimegraph(df, by, value, sort, limit, highlight):
    top = sort if type(sort) is list else df[df['Date'] == days[-1]].sort_values(sort, ascending=False)[by].values.tolist()[:limit]
    data = df[(df[value] > 1) & (df['Confirmed'] > 100) & (df[by].isin(top))]
    selection = alt.selection_single(fields=[by], bind='legend', init={by: highlight}, empty='none')

    line = alt.Chart(data).mark_line().encode(
        alt.X('Confirmed:Q', scale=alt.Scale(type='log'), axis=alt.Axis(title='Cumulative Cases')),
        alt.Y(value + ':Q', scale=alt.Scale(type='log'), axis=alt.Axis(title=' '.join(value.split('_')))),
        color=alt.condition(selection, alt.value('black'), alt.Color(by+':N', scale=alt.Scale(scheme='category10' if highlight == '' else 'set2'))),
        size=alt.condition(selection, alt.value(3), alt.value(1)),
        tooltip=alt.Tooltip([by, value])
    )
    point = alt.Chart(data[data['Date'] == days[-1]]).mark_circle(size=60).encode(
        alt.X('Confirmed:Q', scale=alt.Scale(type='log')),
        alt.Y(value + ':Q', scale=alt.Scale(type='log')),
        color=alt.condition(selection, alt.value('black'), alt.Color(by+':N')),
        size=alt.condition(selection, alt.value(100), alt.value(60)),
    )
    text = point.mark_text(
        align='left',
        dx=5,  # Nudges text to right so it doesn't appear on top of the bar
    ).encode(
        text=by+':N',
        size=alt.condition(selection, alt.value(15), alt.value(10)),
        color=alt.condition(selection, alt.value('black'), alt.Color(by+':N'))
    )

    return (line+point+text).properties(width=1200, height=800).add_selection(selection)


# Global Data Visualization

In [119]:
# Aggregate at country level
country_level = df.groupby(['Country', 'Date'], as_index=False).sum()
# Drop 03-22-2020 since county breakdown started from here and so there is break in continuity
country_level = country_level.drop(country_level[country_level['Date'] == '03-22-2020'].index)
country_level = country_level.drop(country_level[country_level['Date'] == '03-12-2020'].index)
country_level = country_level.drop(country_level[country_level['Date'] == '02-12-2020'].index)

streamgraph(country_level, 'Country', 'Confirmed_New', 'Confirmed', 10).interactive()

In [120]:
country_level['Active'] = country_level.Confirmed - (country_level.Deaths + country_level.Recovered)
top = country_level[country_level['Date'] == days[-1]].sort_values('Confirmed', ascending=False)['Country'].values.tolist()[:10] + ['India','South Korea']
nontimegraph(country_level, 'Country', 'Confirmed_New', top, len(top), highlight='India').interactive()

# US State Level Visualizations

In [121]:
# Plot top contributing states to US trend

us_state_level = df[df['Country'] == 'US'].groupby(['State', 'Date'], as_index=False).sum()
# Drop 03-22-2020 since county breakdown started from here and so there is break in continuity
us_state_level = us_state_level.drop(us_state_level[us_state_level['Date'] == '03-22-2020'].index)
us_state_level = us_state_level.drop(us_state_level[us_state_level['Date'] == '03-18-2020'].index)
us_state_level = us_state_level[us_state_level['Date'] >= '03-01-2020']

streamgraph(us_state_level, 'State', 'Confirmed_New', 'Confirmed', 10).interactive()

In [122]:
us_state_level['Active'] = us_state_level['Confirmed'] - (us_state_level['Deaths'] + us_state_level['Recovered'])
nontimegraph(us_state_level, 'State', 'Confirmed_New', 'Confirmed', 15, 'CA').interactive()