In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import common

In [2]:
SHOW_SET = set(('New Zealand', 'China', 'Italy', 'Spain', 'Germany', 'US', 'Korea, South', 'Japan', 'Canada', 'Australia', 'Israel', 'Ireland', 'France', 'United Kingdom'))
SERIES = ['confirmed', 'deaths', 'recovered']
URL_FORMAT = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_%s_global.csv'
DOCS_LINK = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQCN9pL21lGy3XPBhKwMX7jT1_SG-Sb_4ZWZ1I0Ctd-0vNhtmH4gFKaLsV5jhz4vSjYlQ9NR_fXF_b6/pub?output=xlsx'

In [3]:
def update_nz_data(cdf, nz_df):
    nz_cdf = cdf.loc[(['confirmed', 'recovered', 'deaths'], 'New Zealand'), :nz_df.index[0]]
    #nz_cdf.iloc[:,:-1] = nz_cdf.iloc[:,1:].values
    cdf.loc[(['confirmed', 'recovered', 'deaths'], 'New Zealand'), nz_df.index[:-1]] = nz_df.loc[nz_df.index[1]:, ['Cases', 'Deaths', 'Recovered']].fillna(0).values.T

In [4]:
cdf =pd.concat({series: pd.read_csv(URL_FORMAT%series).groupby('Country/Region').sum().iloc[:,2:] for series in SERIES}, names=['Series'])
the_dates = pd.to_datetime(cdf.columns, dayfirst=False)
cdf.set_axis(the_dates, axis=1, inplace=True)
cdf.columns.rename('date', inplace=True)

In [5]:
cdf.loc['confirmed','Canada']['2020-03-16':]

date
2020-03-16      415
2020-03-17      478
2020-03-18      657
2020-03-19      800
2020-03-20      943
2020-03-21     1277
2020-03-22     1469
2020-03-23     2088
2020-03-24     2790
2020-03-25     3251
2020-03-26     4042
2020-03-27     4682
2020-03-28     5576
2020-03-29     6280
2020-03-30     7398
2020-03-31     8527
2020-04-01     9560
2020-04-02    11284
2020-04-03    12437
2020-04-04    12978
2020-04-05    15756
2020-04-06    16563
2020-04-07    17872
2020-04-08    19141
Name: (confirmed, Canada), dtype: int64

In [6]:
with pd.ExcelFile(DOCS_LINK) as excel_file:
        nz_df = excel_file.parse('New Zealand', index_col='Date', parse_dates=True, header=1)
nz_df = nz_df[~nz_df.isna().all(1)]

In [7]:
update_nz_data(cdf, nz_df)

In [8]:
c30_idx = ['New Zealand'] + list(cdf.loc['confirmed', cdf.columns[-1]].nlargest(30).index.values)

In [9]:
cdf.loc['confirmed'].loc[c30_idx].pct_change(1, axis=1).iloc[:,-10:]

date,2020-03-30,2020-03-31,2020-04-01,2020-04-02,2020-04-03,2020-04-04,2020-04-05,2020-04-06,2020-04-07,2020-04-08
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
New Zealand,0.098472,0.094281,0.125706,0.089084,0.09447,0.093684,0.064485,0.048825,0.043103,0.023967
US,0.148521,0.162725,0.13392,0.141743,0.131231,0.120703,0.091378,0.0878,0.080607,0.082855
Spain,0.09794,0.090579,0.085433,0.076327,0.063659,0.058465,0.043418,0.038201,0.038537,0.044229
Italy,0.041458,0.039837,0.045202,0.042216,0.039786,0.040099,0.03463,0.02791,0.022928,0.028292
France,0.10961,0.169515,0.093172,0.03775,0.087987,0.393331,0.032197,0.055346,0.112183,0.035379
Germany,0.07714,0.073604,0.084447,0.088889,0.075064,0.054114,0.041949,0.03247,0.04149,0.052321
China,0.000925,0.000985,0.000997,0.000862,0.000958,0.000388,0.000715,0.000763,0.000641,0.0011
Iran,0.083166,0.074949,0.066988,0.060408,0.053796,0.048136,0.044544,0.039055,0.034529,0.031907
United Kingdom,0.135137,0.134859,0.17205,0.144249,0.132151,0.097909,0.140288,0.079342,0.0702,0.098751
Turkey,0.174677,0.249746,0.158747,0.156643,0.153626,0.144018,0.130985,0.116295,0.128802,0.120701


In [10]:
flat_df = cdf.stack().unstack('Series').loc[c30_idx]
flat_df = flat_df.assign(active=flat_df.confirmed-flat_df.deaths-flat_df.recovered)

In [11]:
def gen_rates(df, days_ago,names):
    today = df.iloc[:,-1]
    return pd.DataFrame({name:today/pair[1] for name,pair in zip(names, df.iloc[:, -1 - np.array(days_ago)].items())})

In [12]:
numdays_series = (cdf.loc['confirmed']>0).sum(1)
has10days = numdays_series.index[numdays_series>=10]


In [13]:
rates_all = gen_rates(cdf.loc['confirmed'],[1,3,7,14],['1 day', '3 day', '1 week', '2 week'])
rates = rates_all.loc[has10days]

In [14]:
def set_widths(figure, factor_series):
    for trace in figure.data:
        country = trace['name']
        factor = factor_series.loc[country]
        trace['hovertemplate'] = trace['hovertemplate'] + (''.join('<br>Scaling - %s=%.1fx' % kv for kv in factor.items()))
        if country not in SHOW_SET:
            trace['visible'] = 'legendonly'
        trace['line'].update(width=np.log2(factor['1 week'])+1)
    return figure

FIGURE_KW = dict(facet_col_wrap=2, height=900, labels={'0':'count'}, log_y=True, color='Country/Region', line_dash='Country/Region')
    
def coronavirus_figure(xdf, countries, today, factors, y=0, **kw):
    title='Coronavirus case data for NZ & the 30 countries with the most confirmed cases for %s' % today
    return set_widths(px.line(xdf, category_orders={'Country/Region':list(countries)}, x='date', y=y, title=title, **dict(FIGURE_KW, **kw)), factors)
    

In [15]:
rates.loc[c30_idx].nlargest(30, '1 week')

Unnamed: 0_level_0,1 day,3 day,1 week,2 week
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Russia,1.156729,1.609204,3.122794,13.179331
India,1.113915,1.648829,2.960961,9.004566
Turkey,1.120701,1.412169,2.438038,15.711467
Brazil,1.152202,1.45283,2.365418,6.331245
United Kingdom,1.098751,1.26918,2.058396,6.376971
Poland,1.073639,1.268893,2.03798,4.952426
US,1.082855,1.272879,2.010817,6.522728
Canada,1.071005,1.214839,2.002197,5.887727
France,1.035379,1.215265,1.97335,4.451523
Romania,1.077881,1.232143,1.935366,5.254967


In [16]:
px.bar(rates.loc[c30_idx].unstack().reset_index(), barmode='group', color='Country/Region', x='level_0', y=0, log_y=True)

In [17]:
xdf = flat_df.stack().reset_index()
fig= coronavirus_figure(xdf, c30_idx, the_dates[-1].strftime('%d %B'), rates, facet_col='Series')
fig

In [18]:
xdf = flat_df.reset_index()
fig=coronavirus_figure(xdf, c30_idx, the_dates[-1].strftime('%e %B'), rates, y='confirmed', hover_data=['deaths', 'recovered', 'active'])
fig.write_html(common.site_file('Coronavirus_plot.html'), include_plotlyjs='cdn')
fig