In [1]:
import pandas as pnd
import numpy as np
import altair as alt
from scipy import stats
from altair import datum
from ipywidgets import interact

# Set up some global config and variables
alt.renderers.enable('default')
pnd.options.mode.chained_assignment = None
np.seterr(all='ignore')

df = pnd.read_csv('jhu-daily-reports.csv')
df['Active']  = df.Confirmed - (df.Deaths + df.Recovered)
samples = df[['Date', 'Country']].groupby('Date').Country.nunique()
days = samples[samples > 1].index.tolist()
df = df[df['Date'].isin(days)]

country_level = df.groupby(['Country', 'Date'], as_index=False).sum()
def state_data(country):
    return df[df['Country'] == country].groupby(['State', 'Date'], as_index=False).sum()
def county_data(state):
    return df[(df['Country'] == 'US') & (df['State'] == state)].groupby(['County', 'Date'], as_index=False).sum()


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [2]:
def wave_chart(root, by, value, limit=5):
    source = root[root['Date'] == root.Date.max()]
    for var in source[by].unique():
        values = root[(root[by] == var) & (root.Date <= '06-08-2020')].sort_values('Date').tail(21)[['Confirmed', value]]
        slope, intercept, r_value, p_value, std_err = stats.linregress(values.Confirmed, values[value])
        source.loc[source[by] == var, 'Slope'] = 'Positive' if slope > 0 else 'Negative'
    source = source[[by, 'Slope']]
    data = root.merge(source, how='left', on=by)

    # Negative wave
    neg = data[data.Slope == 'Negative']
    neg_order = neg[neg.Date == neg.Date.max()][[by, 'Confirmed']]
    neg_order = neg_order.append(pnd.DataFrame(data={by: ['Others', 'Second Wave'], 'Confirmed': [1, 0]}), ignore_index=True)
    neg_order = neg_order.sort_values('Confirmed', ascending=False)
    neg_top = neg_order[by].values.tolist()
    nd = neg[neg[by].isin(neg_top[:limit])]
    others = neg[neg[by].isin(neg_top[limit:])].groupby('Date', as_index=False).sum()
    others.insert(0, by, 'Others')
    nd = nd.append(others, ignore_index=True)

    pos = data[data.Slope == 'Positive']
    # Transparent lower part
    pos_agg = pos.groupby('Date', as_index=False).sum()
    pos_agg.insert(0, by, 'Second Wave')
    nd = nd.append(pos_agg, ignore_index=True)

    neg_selection = alt.selection_multi(fields=[by], bind='legend')
    neg_chart = alt.Chart(nd).mark_area().encode(
            alt.X('Date:T', axis=alt.Axis(domain=False, format='%d %b', tickSize=0)),
            alt.Y(value + ':Q', axis=alt.Axis(title=' '.join(value.split('_'))), scale=alt.Scale(domain=(0, data.groupby('Date').sum()[value].max()))),
            alt.Color(by + ':N', scale=alt.Scale(scheme='greens'), sort=neg_top[:limit], title='First Wave'),
            order=alt.Order('sort:Q'),
            opacity=alt.condition(neg_selection, alt.value(1), alt.value(0.3)),
            tooltip=alt.Tooltip(['Date:T', by, value])
        ).transform_lookup(
            lookup=by,
            from_=alt.LookupData(neg_order, by, ['Confirmed']),
            as_=['sort']
        ).add_selection(neg_selection)

    # Positive wave
    pos_order = pos[pos.Date == neg.Date.max()].sort_values('Confirmed', ascending=False)
    pos_top = pos_order[by].values.tolist()
    pd = pos[pos[by].isin(pos_top[:limit])]
    others = pos[pos[by].isin(pos_top[limit:])].groupby('Date', as_index=False).sum()
    others.insert(0, by, 'Lowest')
    pd = pd.append(others, ignore_index=True)

    pos_selection = alt.selection_multi(fields=[by], bind='legend')
    pos_chart = alt.Chart(pd).mark_area().encode(
            alt.X('Date:T', axis=alt.Axis(domain=False, format='%d %b', tickSize=0)),
            alt.Y(value + ':Q'),
            alt.Color(by + ':N', scale=alt.Scale(scheme='reds'), sort=pos_top[:limit], title='Second Wave'),
            order=alt.Order('sort:Q'),
            opacity=alt.condition(pos_selection, alt.value(1), alt.value(0.3)),
            tooltip=alt.Tooltip(['Date:T', by, value])
        ).transform_lookup(
            lookup=by,
            from_=alt.LookupData(pos_order, by, ['Confirmed']),
            as_=['sort']
        ).add_selection(pos_selection)

    # Ratio of Postitive and Negative
    ratios = data.groupby(['Date', 'Slope'], as_index=False).sum()[['Date', 'Slope', value]]
    ratios = ratios.pivot_table(index='Date', columns='Slope', values=value, aggfunc=np.sum)
    ratios['Negative_Pct'] = ratios.Negative /(ratios.Negative + ratios.Positive) * 100
    ratios['Positive_Pct'] = ratios.Positive /(ratios.Negative + ratios.Positive) * 100
    ratios.reset_index(level=0, inplace=True)
    agg = ratios.melt(id_vars=['Date'], value_vars=['Positive', 'Negative'], var_name='Slope', value_name=value)
    agg_pct = ratios.melt(id_vars=['Date'], value_vars=['Positive_Pct', 'Negative_Pct'], var_name='Slope', value_name='Percent')
    agg_pct['Slope'].replace({'Positive_Pct': 'Positive', 'Negative_Pct': 'Negative'}, inplace=True)
    agg = agg.merge(agg_pct, on=['Date', 'Slope'])

    selection = alt.selection_single(fields=['Date'], empty='none', nearest=True, on='mouseover')

    bar = alt.Chart(agg).mark_bar().encode(
        alt.X('Date:T'),
        alt.Y(value + ':Q', stack='zero'),
        alt.Color('Slope:N', scale=alt.Scale(domain=['Positive', 'Negative'], range=['white', 'black']), legend=None),
        size=alt.condition(selection, alt.value(2), alt.value(0))
    ).add_selection(selection)

    text = alt.Chart(agg).mark_text(dx=-15, dy=10, color='red').encode(
        alt.X('Date:T'),
        alt.Y(value + ':Q', stack='zero'),
        alt.Color('Slope:N', scale=alt.Scale(domain=['Positive', 'Negative'], range=['black', 'black']), legend=None),
        text=alt.Text('label:N'),
        size=alt.condition(selection, alt.value(12), alt.value(0))
    ).transform_calculate(label=f'format(datum.Percent, ".0f") + "%"')

    return (neg_chart + pos_chart + bar + text).properties(width=1200, height=800).resolve_scale(color='independent')

In [3]:
root=state_data('US')
root = root.drop(root[root['Date'] == '03-28-2020'].index)
root = root.drop(root[root['Date'] == '03-22-2020'].index)
root = root.drop(root[root['Date'] == '03-18-2020'].index)
root = root.drop(root[root['Date'] == '08-31-2020'].index)
root = root[root['Date'] >= '03-01-2020']

chart = wave_chart(root, 'State', 'Confirmed_New')
chart.properties(width=1200, height=800).resolve_scale(color='independent').save('wave.html')
chart.interactive()

In [4]:
us_state_level = state_data('US')
states=us_state_level[us_state_level['Confirmed'] > 1000]['State'].unique()
@interact(value=['Confirmed_New', 'Deaths_New'], limit=(0,20,1), state=states)
def chart(state='CA', value='Confirmed_New', limit=5):
    county_level = df[df['State'] == state].fillna({'County': state}).groupby(['County', 'Date'], as_index=False).sum()
    return wave_chart(county_level, 'County', value, limit=limit).interactive()

interactive(children=(Dropdown(description='state', index=4, options=('AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT'…