In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import common

In [2]:
SHOW_SET = set(('New Zealand', 'China', 'Italy', 'Spain', 'Germany', 'US', 'Korea, South', 'Japan', 'Canada', 'Australia', 'Israel', 'Ireland', 'France', 'United Kingdom'))
SERIES = ['confirmed', 'deaths', 'recovered']
URL_FORMAT = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_%s_global.csv'
DOCS_LINK = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQCN9pL21lGy3XPBhKwMX7jT1_SG-Sb_4ZWZ1I0Ctd-0vNhtmH4gFKaLsV5jhz4vSjYlQ9NR_fXF_b6/pub?output=xlsx'

In [3]:
def update_nz_data(cdf, nz_df):
    nz_cdf = cdf.loc[(['confirmed', 'recovered', 'deaths'], 'New Zealand'), :nz_df.index[0]]
    #nz_cdf.iloc[:,:-1] = nz_cdf.iloc[:,1:].values
    cdf.loc[(['confirmed', 'recovered', 'deaths'], 'New Zealand'), nz_df.index[:-1]] = nz_df.loc[nz_df.index[1]:, ['Cases', 'Deaths', 'Recovered']].fillna(0).values.T

In [4]:
cdf =pd.concat({series: pd.read_csv(URL_FORMAT%series).groupby('Country/Region').sum().iloc[:,2:] for series in SERIES}, names=['Series'])
the_dates = pd.to_datetime(cdf.columns, dayfirst=False)
cdf.set_axis(the_dates, axis=1, inplace=True)
cdf.columns.rename('date', inplace=True)

In [5]:
with pd.ExcelFile(DOCS_LINK) as excel_file:
        nz_df = excel_file.parse('New Zealand', index_col='Date', parse_dates=True, header=1)
nz_df = nz_df[~nz_df.isna().all(1)]

In [6]:
update_nz_data(cdf, nz_df)

In [7]:
c30_idx = ['New Zealand'] + list(cdf.loc['confirmed', cdf.columns[-1]].nlargest(40).index.values)

In [8]:
cdf.loc['confirmed'].loc[c30_idx].pct_change(1, axis=1).iloc[:,-10:]

date,2020-04-09,2020-04-10,2020-04-11,2020-04-12,2020-04-13,2020-04-14,2020-04-15,2020-04-16,2020-04-17,2020-04-18
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
New Zealand,0.035513,0.022603,0.01372,0.014286,0.012602,0.014641,0.010823,0.00571,0.009226,0.006329
US,0.078611,0.07294,0.060139,0.054934,0.045571,0.04659,0.047197,0.049424,0.047776,0.046435
Spain,0.033747,0.032965,0.030037,0.023334,0.019589,0.014356,0.029576,0.041116,0.031852,0.004648
Italy,0.030153,0.027509,0.031807,0.026873,0.020165,0.018631,0.016414,0.022924,0.020676,0.020245
France,0.05188,0.050042,0.034064,0.283029,0.030252,0.047586,0.02452,0.092947,0.013862,0.000127
Germany,0.043117,0.033762,0.022403,0.023585,0.017348,0.009895,0.025838,0.021855,0.026863,0.013756
United Kingdom,0.071542,0.132575,0.070625,0.066755,0.051217,0.058892,0.048901,0.046862,0.054002,0.050515
China,0.000894,0.0007,0.00088,0.001446,0.00095,0.001118,0.0006,0.000564,0.00428,0.000322
Turkey,0.106106,0.11227,0.109252,0.091801,0.071862,0.066537,0.065749,0.069187,0.058671,0.048163
Iran,0.0253,0.02978,0.026939,0.023662,0.022557,0.021473,0.020193,0.021024,0.019219,0.017284


In [9]:
flat_df = cdf.stack().unstack('Series').loc[c30_idx]
flat_df = flat_df.assign(active=flat_df.confirmed-flat_df.deaths-flat_df.recovered)

In [10]:
def gen_rates(df, days_ago,names):
    today = df.iloc[:,-1]
    return pd.DataFrame({name:today/pair[1] for name,pair in zip(names, df.iloc[:, -1 - np.array(days_ago)].items())})

In [11]:
numdays_series = (cdf.loc['confirmed']>0).sum(1)
has10days = numdays_series.index[numdays_series>=10]


In [12]:
rates_all = gen_rates(cdf.loc['confirmed'],[1,3,7,14],['1 day', '3 day', '1 week', '2 week'])
rates = rates_all.loc[has10days]

In [13]:
def set_widths(figure, factor_series):
    for trace in figure.data:
        country = trace['name']
        factor = factor_series.loc[country]
        trace['hovertemplate'] = trace['hovertemplate'] + (''.join('<br>Scaling - %s=%.1fx' % kv for kv in factor.items()))
        if country not in SHOW_SET:
            trace['visible'] = 'legendonly'
        trace['line'].update(width=np.log2(factor['1 week'])+1)
    return figure

FIGURE_KW = dict(facet_col_wrap=2, height=900, labels={'0':'count'}, log_y=True, color='Country/Region', line_dash='Country/Region')
    
def coronavirus_figure(xdf, countries, today, factors, y=0, **kw):
    title='Coronavirus case data for NZ & the 40 countries with the most confirmed cases for %s' % today
    return set_widths(px.line(xdf, category_orders={'Country/Region':list(countries)}, x='date', y=y, title=title, **dict(FIGURE_KW, **kw)), factors)
    

In [15]:
px.bar(rates.loc[c30_idx].unstack().reset_index(), barmode='group', color='Country/Region', x='level_0', y=0, log_y=True)

In [16]:
xdf = flat_df.stack().reset_index()
fig= coronavirus_figure(xdf, c30_idx, the_dates[-1].strftime('%d %B'), rates, facet_col='Series')
fig

In [17]:
cdf.loc['confirmed'].iloc[:,-1].nlargest(30)

Country/Region
US                732197.0
Spain             191726.0
Italy             175925.0
France            149149.0
Germany           143342.0
United Kingdom    115314.0
China              83787.0
Turkey             82329.0
Iran               80868.0
Belgium            37183.0
Russia             36793.0
Brazil             36658.0
Canada             34355.0
Netherlands        31766.0
Switzerland        27404.0
Portugal           19685.0
India              15722.0
Ireland            14758.0
Austria            14671.0
Peru               14420.0
Sweden             13822.0
Israel             13265.0
Korea, South       10653.0
Japan              10296.0
Chile               9730.0
Ecuador             9022.0
Poland              8742.0
Romania             8418.0
Saudi Arabia        8274.0
Pakistan            7638.0
Name: 2020-04-18 00:00:00, dtype: float64

In [18]:
xdf = flat_df.reset_index()
fig=coronavirus_figure(xdf, c30_idx, the_dates[-1].strftime('%e %B'), rates, y='confirmed', hover_data=['deaths', 'recovered', 'active'])
fig.write_html(common.site_file('Coronavirus_plot.html'), include_plotlyjs='cdn')
fig