In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import common

In [2]:
SHOW_SET = set(('New Zealand', 'China', 'Italy', 'Spain', 'Germany', 'US', 'Korea, South', 'Japan', 'Canada', 'Australia', 'Israel', 'Ireland', 'France', 'United Kingdom'))
SERIES = ['confirmed', 'deaths', 'recovered']
URL_FORMAT = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_%s_global.csv'
DOCS_LINK = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQCN9pL21lGy3XPBhKwMX7jT1_SG-Sb_4ZWZ1I0Ctd-0vNhtmH4gFKaLsV5jhz4vSjYlQ9NR_fXF_b6/pub?output=xlsx'

In [3]:
def update_nz_data(cdf, nz_df):
    nz_cdf = cdf.loc[(['confirmed', 'recovered', 'deaths'], 'New Zealand'), :nz_df.index[0]]
    #nz_cdf.iloc[:,:-1] = nz_cdf.iloc[:,1:].values
    cdf.loc[(['confirmed', 'recovered', 'deaths'], 'New Zealand'), nz_df.index[:-1]] = nz_df.loc[nz_df.index[1]:, ['Cases', 'Deaths', 'Recovered']].fillna(0).values.T

In [4]:
cdf =pd.concat({series: pd.read_csv(URL_FORMAT%series).groupby('Country/Region').sum().iloc[:,2:] for series in SERIES}, names=['Series'])
the_dates = pd.to_datetime(cdf.columns, dayfirst=False)
cdf.set_axis(the_dates, axis=1, inplace=True)
cdf.columns.rename('date', inplace=True)

In [5]:
cdf.loc['confirmed','Canada']['2020-03-16':]

date
2020-03-16      415
2020-03-17      478
2020-03-18      657
2020-03-19      800
2020-03-20      943
2020-03-21     1277
2020-03-22     1469
2020-03-23     2088
2020-03-24     2790
2020-03-25     3251
2020-03-26     4042
2020-03-27     4682
2020-03-28     5576
2020-03-29     6280
2020-03-30     7398
2020-03-31     8527
2020-04-01     9560
2020-04-02    11284
2020-04-03    12437
2020-04-04    12978
2020-04-05    15756
2020-04-06    16563
2020-04-07    17872
2020-04-08    19141
2020-04-09    20654
2020-04-10    22059
2020-04-11    23316
2020-04-12    24298
2020-04-13    25679
Name: (confirmed, Canada), dtype: int64

In [6]:
with pd.ExcelFile(DOCS_LINK) as excel_file:
        nz_df = excel_file.parse('New Zealand', index_col='Date', parse_dates=True, header=1)
nz_df = nz_df[~nz_df.isna().all(1)]

In [7]:
update_nz_data(cdf, nz_df)

In [8]:
c30_idx = ['New Zealand'] + list(cdf.loc['confirmed', cdf.columns[-1]].nlargest(30).index.values)

In [9]:
cdf.loc['confirmed'].loc[c30_idx].pct_change(1, axis=1).iloc[:,-10:]

date,2020-04-04,2020-04-05,2020-04-06,2020-04-07,2020-04-08,2020-04-09,2020-04-10,2020-04-11,2020-04-12,2020-04-13
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
New Zealand,0.093684,0.064485,0.048825,0.043103,0.023967,0.035513,0.022603,0.01372,0.014286,0.012602
US,0.120714,0.091367,0.0878,0.080607,0.082855,0.07548,0.076062,0.060139,0.054934,0.045571
Spain,0.058465,0.043418,0.038201,0.038537,0.044229,0.033747,0.032965,0.030037,0.023334,0.019589
Italy,0.040099,0.03463,0.02791,0.022928,0.028292,0.030153,0.027509,0.031807,0.026873,0.020165
France,0.393331,0.032197,0.055346,0.112183,0.035379,0.042313,0.060195,0.038084,0.022513,0.031458
Germany,0.054114,0.041949,0.03247,0.04149,0.052321,0.043117,0.033762,0.022403,0.023585,0.017348
United Kingdom,0.097909,0.140288,0.079342,0.0702,0.098751,0.071542,0.132575,0.070625,0.066755,0.051217
China,0.000388,0.000715,0.000763,0.000641,0.0011,0.000894,0.0007,0.00088,0.001446,0.00095
Iran,0.048136,0.044544,0.039055,0.034529,0.031907,0.0253,0.02978,0.026939,0.023662,0.022557
Turkey,0.144018,0.130985,0.116295,0.128802,0.120701,0.106106,0.11227,0.109252,0.091801,0.071862


In [10]:
flat_df = cdf.stack().unstack('Series').loc[c30_idx]
flat_df = flat_df.assign(active=flat_df.confirmed-flat_df.deaths-flat_df.recovered)

In [11]:
def gen_rates(df, days_ago,names):
    today = df.iloc[:,-1]
    return pd.DataFrame({name:today/pair[1] for name,pair in zip(names, df.iloc[:, -1 - np.array(days_ago)].items())})

In [12]:
numdays_series = (cdf.loc['confirmed']>0).sum(1)
has10days = numdays_series.index[numdays_series>=10]


In [13]:
rates_all = gen_rates(cdf.loc['confirmed'],[1,3,7,14],['1 day', '3 day', '1 week', '2 week'])
rates = rates_all.loc[has10days]

In [14]:
def set_widths(figure, factor_series):
    for trace in figure.data:
        country = trace['name']
        factor = factor_series.loc[country]
        trace['hovertemplate'] = trace['hovertemplate'] + (''.join('<br>Scaling - %s=%.1fx' % kv for kv in factor.items()))
        if country not in SHOW_SET:
            trace['visible'] = 'legendonly'
        trace['line'].update(width=np.log2(factor['1 week'])+1)
    return figure

FIGURE_KW = dict(facet_col_wrap=2, height=900, labels={'0':'count'}, log_y=True, color='Country/Region', line_dash='Country/Region')
    
def coronavirus_figure(xdf, countries, today, factors, y=0, **kw):
    title='Coronavirus case data for NZ & the 30 countries with the most confirmed cases for %s' % today
    return set_widths(px.line(xdf, category_orders={'Country/Region':list(countries)}, x='date', y=y, title=title, **dict(FIGURE_KW, **kw)), factors)
    

In [15]:
rates.loc[c30_idx].nlargest(30, '1 week')

Unnamed: 0_level_0,1 day,3 day,1 week,2 week
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Peru,1.301237,1.659149,3.820383,10.298947
Russia,1.162207,1.537971,2.889484,9.982571
India,1.135578,1.375757,2.187735,8.355715
Turkey,1.071862,1.298114,2.020353,5.638589
Japan,1.092175,1.332731,2.016968,3.949625
Ecuador,1.008438,1.051389,2.009341,3.837411
Ireland,1.102745,1.316232,1.984899,3.658763
Brazil,1.055786,1.193095,1.926651,5.116838
United Kingdom,1.051217,1.20059,1.713307,3.989222
Romania,1.052857,1.21328,1.634952,3.145092


In [16]:
px.bar(rates.loc[c30_idx].unstack().reset_index(), barmode='group', color='Country/Region', x='level_0', y=0, log_y=True)

In [17]:
xdf = flat_df.stack().reset_index()
fig= coronavirus_figure(xdf, c30_idx, the_dates[-1].strftime('%d %B'), rates, facet_col='Series')
fig

In [18]:
cdf.loc['confirmed'].iloc[:,-1].nlargest(30)

Country/Region
US                580619.0
Spain             170099.0
Italy             159516.0
France            137875.0
Germany           130072.0
United Kingdom     89570.0
China              83213.0
Iran               73303.0
Turkey             61049.0
Belgium            30589.0
Netherlands        26710.0
Switzerland        25688.0
Canada             25679.0
Brazil             23430.0
Russia             18328.0
Portugal           16934.0
Austria            14041.0
Israel             11586.0
Sweden             10948.0
Ireland            10647.0
Korea, South       10537.0
India              10453.0
Peru                9784.0
Ecuador             7529.0
Chile               7525.0
Japan               7370.0
Poland              6934.0
Romania             6633.0
Norway              6603.0
Denmark             6513.0
Name: 2020-04-13 00:00:00, dtype: float64

In [19]:
xdf = flat_df.reset_index()
fig=coronavirus_figure(xdf, c30_idx, the_dates[-1].strftime('%e %B'), rates, y='confirmed', hover_data=['deaths', 'recovered', 'active'])
fig.write_html(common.site_file('Coronavirus_plot.html'), include_plotlyjs='cdn')
fig