In [16]:
# Load the latest summary data

import pandas as pd
import altair as alt

# Read the raw data
url = 'https://raw.githubusercontent.com/ratreya/jupyter-notebooks/master/covid-merged-daily-reports.csv'
df = pd.read_csv(url)
days = sorted(df['Date'].unique())

# Set up some global config and variables
alt.renderers.enable('default')
# This is the variable to consider - options are 'Confirmed', 'Deaths', 'Recovered', 'Confirmed_New', 'Deaths_New', 'Recovered_New'
variable = 'Confirmed_New'

def align(data):
    # Find the index with the lowest non-zero starting case number
    base = min(((idx, val) for (idx, val) in enumerate(data) if val[0] > 0), key = lambda x: x[1][0])[0]

    # Align everyone else to this base
    for i in [x for x in range(len(data)) if x != base]:
        padding = min(((x[0], abs(x[1] - data[i][0])) for x in enumerate(data[base])), key = lambda x: x[1])[0]
        data[i] = [0 for i in range(padding)] + data[i]
    return data


In [24]:
# Plot top contributors to global trend

# Aggregate at country level
country_level = df.groupby(['Country', 'Date'], as_index=False).sum()
# Drop 03-22-2020 since county breakdown started from here and so there is break in continuity
country_level = country_level.drop(country_level[country_level['Date'] == '03-22-2020'].index)

# Countries to compare - descending by variable
top_countries = sorted([(x, country_level[country_level['Country'] == x][variable].max()) for x in country_level['Country'].unique()], reverse=True, key=lambda y: 0 if np.isnan(y[1]) else y[1])
top_countries = [x[0] for x in top_countries]

sans = country_level[['Country', 'Date', variable]]
# Drop 03-12-2020 as there seems to be a reporting problem on the previous day
sans = sans.drop(sans[sans['Date'] == '03-12-2020'].index)
# Drop 02-12-2020 as there seems to be a reporting problem on the previous day
sans = sans.drop(sans[sans['Date'] == '02-12-2020'].index)

# Align the various series by date
data = pd.DataFrame(columns=['Date', variable])
legend = top_countries[:10]
for country in legend:
    data = data.merge(sans[sans['Country'] == country][['Date', variable]], how='outer', on='Date', suffixes=('', '_' + country), sort=True)

# Add all others and introduce it into the dataframe
other_countries = set(sans['Country'].unique()) - set(legend)
others = sans[sans['Country'].isin(other_countries)].groupby('Date', as_index=False).sum()
data = data.merge(others, how='outer', on='Date', suffixes=('', '_Others'))
legend += ['Others']

# Rename the columns to be country names
data.rename({variable + '_' + x : x for x in legend}, inplace=True, axis=1)
data = data.melt(id_vars='Date', value_vars=legend, var_name='Country', value_name='Value')

chart = alt.Chart(data).mark_area().encode(
    alt.X('Date:T', axis=alt.Axis(domain=False, format='%d %b', tickSize=0)),
    alt.Y('Value:Q', stack='center', axis=None),
    alt.Color('Country:N', scale=alt.Scale(scheme='tableau20'))
).properties(width=1000, height=600)

chart.display()

In [18]:
# Plot top contributing states to US trend

import altair as alt

us_state_level = df[df['Country'] == 'US'].groupby(['State', 'Date'], as_index=False).sum()
# Drop 03-22-2020 since county breakdown started from here and so there is break in continuity
us_state_level = us_state_level.drop(us_state_level[us_state_level['Date'] == '03-22-2020'].index)
us_state_level = us_state_level[us_state_level['Date'] >= '03-01-2020']

# Align the various series by date
data = pd.DataFrame(columns=['Date', variable])
legend = us_state_level[us_state_level['Date'] == days[-1]].sort_values('Confirmed', ascending=False)['State'].values.tolist()[:10]
for state in legend:
    data = data.merge(us_state_level[us_state_level['State'] == state][['Date', variable]], how='outer', on='Date', suffixes=('', '_' + state), sort=True)

# Add all others and introduce it into the dataframe
other_states = set(us_state_level['State'].unique()) - set(legend)
others = us_state_level[us_state_level['State'].isin(other_states)].groupby('Date', as_index=False).sum()
data = data.merge(others, how='outer', on='Date', suffixes=('', '_Others'))
legend += ['Others']

# Rename the columns to be state names
data.rename({variable + '_' + x : x for x in legend}, inplace=True, axis=1)
data = data.melt(id_vars='Date', value_vars=legend, var_name='State', value_name='Value')

chart = alt.Chart(data).mark_area().encode(
    alt.X('Date:T', axis=alt.Axis(domain=False, format='%d %b', tickSize=0)),
    alt.Y('Value:Q', stack='center', axis=None),
    alt.Color('State:N', scale=alt.Scale(scheme='category20'))
).properties(width=1000, height=600)

chart.display()