In [1]:
import pandas as pnd
import numpy as np
import altair as alt
from scipy import stats
from altair import datum
from ipywidgets import interact

# Set up some global config and variables
alt.renderers.enable('default')
pnd.options.mode.chained_assignment = None
np.seterr(all='ignore')

df = pnd.read_csv('jhu-daily-reports.csv')
df['Active']  = df.Confirmed - (df.Deaths + df.Recovered)
samples = df[['Date', 'Country']].groupby('Date').Country.nunique()
days = samples[samples > 1].index.tolist()
df = df[df['Date'].isin(days)]

country_level = df.groupby(['Country', 'Date'], as_index=False).sum()
def state_data(country):
    return df[df['Country'] == country].groupby(['State', 'Date'], as_index=False).sum()
def county_data(state):
    return df[(df['Country'] == 'US') & (df['State'] == state)].groupby(['County', 'Date'], as_index=False).sum()


In [2]:
root=state_data('US')
source = root[root['Date'] == root.Date.max()]
for var in source['State'].unique():
    values = root[root['State'] == var].sort_values('Date').tail(21)[['Confirmed', 'Confirmed_New']]
    slope, intercept, r_value, p_value, std_err = stats.linregress(values.Confirmed, values.Confirmed_New)
    source.loc[source['State'] == var, 'Slope'] = 'Positive' if slope > 0 else 'Negative'
source = source[['State', 'Slope']]
data = root.merge(source, how='left', on='State')

data = data.drop(data[data['Date'] == '03-28-2020'].index)
data = data.drop(data[data['Date'] == '03-22-2020'].index)
data = data.drop(data[data['Date'] == '03-18-2020'].index)
data = data[data['Date'] >= '03-01-2020']

limit=5

# Negative wave
neg = data[(data.Slope == 'Negative') & (data.Confirmed_New > 0)]
neg_order = neg[neg.Date == neg.Date.max()][['State', 'Confirmed']]
neg_order = neg_order.append(pnd.DataFrame(data={'State': ['Others', 'Second Wave'], 'Confirmed': [1, 0]}), ignore_index=True)
neg_order = neg_order.sort_values('Confirmed', ascending=False)
neg_top = neg_order['State'].values.tolist()
nd = neg[neg['State'].isin(neg_top[:limit])]
others = neg[neg['State'].isin(neg_top[limit:])].groupby('Date', as_index=False).sum()
others.insert(0, 'State', 'Others')
nd = nd.append(others, ignore_index=True)

pos = data[(data.Slope == 'Positive') & (data.Confirmed_New > 0)]
# Transparent lower part
pos_agg = pos.groupby('Date', as_index=False).sum()
pos_agg.insert(0, 'State', 'Second Wave')
nd = nd.append(pos_agg, ignore_index=True)

neg_selection = alt.selection_multi(fields=['State'], bind='legend')
neg_chart = alt.Chart(nd).mark_area().encode(
        alt.X('Date:T', axis=alt.Axis(domain=False, format='%d %b', tickSize=0)),
        alt.Y('Confirmed_New:Q', axis=alt.Axis(title='Daily New Cases')),
        alt.Color('State:N', scale=alt.Scale(scheme='greens'), sort=neg_top[:limit], title='First Wave'),
        order=alt.Order('sort:Q'),
        opacity=alt.condition(neg_selection, alt.value(1), alt.value(0.3)),
        tooltip=alt.Tooltip(['Date:T', 'State', 'Confirmed_New'])
    ).transform_lookup(
        lookup='State',
        from_=alt.LookupData(neg_order, 'State', ['Confirmed']),
        as_=['sort']
    ).add_selection(neg_selection)

# Positive wave
pos_order = pos[pos.Date == neg.Date.max()].sort_values('Confirmed', ascending=False)
pos_top = pos_order['State'].values.tolist()
pd = pos[pos['State'].isin(pos_top[:limit])]
others = pos[pos['State'].isin(pos_top[limit:])].groupby('Date', as_index=False).sum()
others.insert(0, 'State', 'Lowest')
pd = pd.append(others, ignore_index=True)

pos_selection = alt.selection_multi(fields=['State'], bind='legend')
pos_chart = alt.Chart(pd).mark_area().encode(
        alt.X('Date:T', axis=alt.Axis(domain=False, format='%d %b', tickSize=0)),
        alt.Y('Confirmed_New:Q', axis=alt.Axis(title='Daily New Cases')),
        alt.Color('State:N', scale=alt.Scale(scheme='reds'), sort=pos_top[:limit], title='Second Wave'),
        order=alt.Order('sort:Q'),
        opacity=alt.condition(pos_selection, alt.value(1), alt.value(0.3)),
        tooltip=alt.Tooltip(['Date:T', 'State', 'Confirmed_New'])
    ).transform_lookup(
        lookup='State',
        from_=alt.LookupData(pos_order, 'State', ['Confirmed']),
        as_=['sort']
    ).add_selection(pos_selection)

(neg_chart + pos_chart).properties(width=1200, height=800).resolve_scale(color='independent').interactive()#.properties(width=608, height=385).save('wave.png', scale_factor=3.0)

In [3]:
ratios = data.groupby(['Date', 'Slope'], as_index=False).sum()[['Date', 'Slope', 'Confirmed_New']]
ratios = ratios.pivot_table(index='Date', columns='Slope', values='Confirmed_New', aggfunc=np.sum)
ratios['Negative_Pct'] = ratios.Negative /(ratios.Negative + ratios.Positive) * 100
ratios['Positive_Pct'] = ratios.Positive /(ratios.Negative + ratios.Positive) * 100
ratios['Mid'] = abs(ratios.Negative_Pct - 50)
ratios[ratios.Mid < 2]
ratios

Slope,Negative,Positive,Negative_Pct,Positive_Pct,Mid
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
03-01-2020,1.0,4.0,20.000000,80.000000,30.000000
03-02-2020,2.0,18.0,10.000000,90.000000,40.000000
03-03-2020,3.0,15.0,16.666667,83.333333,33.333333
03-04-2020,9.0,22.0,29.032258,70.967742,20.967742
03-05-2020,13.0,51.0,20.312500,79.687500,29.687500
...,...,...,...,...,...
06-14-2020,5411.0,14121.0,27.703256,72.296744,22.296744
06-15-2020,5442.0,14526.0,27.253606,72.746394,22.746394
06-16-2020,5530.0,18230.0,23.274411,76.725589,26.725589
06-17-2020,6336.0,19223.0,24.789702,75.210298,25.210298
