In [1]:
import numpy as np
import pandas as pd
import plotly.express as px

In [2]:
import plotly.io
plotly.io.renderers.default = 'notebook_connected'

In [3]:
SHOW_SET = set(('New Zealand', 'China', 'Italy', 'Spain', 'Germany', 'US', 'Korea, South', 'Japan', 'Canada', 'Australia', 'Israel', 'Ireland', 'France', 'United Kingdom'))
SERIES = ['confirmed', 'deaths', 'recovered']
URL_FORMAT = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_%s_global.csv'
DOCS_LINK = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQCN9pL21lGy3XPBhKwMX7jT1_SG-Sb_4ZWZ1I0Ctd-0vNhtmH4gFKaLsV5jhz4vSjYlQ9NR_fXF_b6/pub?output=xlsx'

In [4]:
def update_nz_data(cdf, nz_df):
    nz_cdf = cdf.loc[(['confirmed', 'recovered', 'deaths'], 'New Zealand'), :nz_df.index[0]]
    #nz_cdf.iloc[:,:-1] = nz_cdf.iloc[:,1:].values
    cdf.loc[(['confirmed', 'recovered', 'deaths'], 'New Zealand'), nz_df.index[:-1]] = nz_df.loc[nz_df.index[1]:, ['Total Cases', 'Deaths', 'Recovered']].fillna(0).values.T

In [5]:
cdf =pd.concat({series: pd.read_csv(URL_FORMAT%series).groupby('Country/Region').sum().iloc[:,2:] for series in SERIES}, names=['Series'])
the_dates = pd.to_datetime(cdf.columns, dayfirst=False)
cdf.set_axis(the_dates, axis=1, inplace=True)
cdf.columns.rename('date', inplace=True)

In [33]:
with pd.ExcelFile(DOCS_LINK) as excel_file:
        nz_df = excel_file.parse('From Briefings', index_col='Date', parse_dates=True, header=1)
        aus_df = excel_file.parse('Australia', index_col='date', parse_dates=True)
        can_df = excel_file.parse('Canada', index_col='date', parse_dates=True)
        uk_df = excel_file.parse('UK', index_col='date', parse_dates=True)
nz_df = nz_df[~nz_df.isna().all(1)]
us_df = pd.read_csv('https://covidtracking.com/api/us/daily.csv', parse_dates=True, index_col=0)


In [34]:
aus_df['deaths']=0
can_df['deaths']=0
uk_df['deaths'] = 0

In [8]:
update_nz_data(cdf, nz_df)

In [9]:
c30_idx = ['New Zealand'] + list(cdf.loc['confirmed', cdf.columns[-1]].nlargest(30).index.values)

In [10]:
cdf.loc['confirmed'].loc[c30_idx].pct_change(1, axis=1).iloc[:,-10:]

date,2020-03-22,2020-03-23,2020-03-24,2020-03-25,2020-03-26,2020-03-27,2020-03-28,2020-03-29,2020-03-30,2020-03-31
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
New Zealand,0.84514,0.383929,0.322581,0.380488,0.300353,0.225543,0.13969,0.145914,0.098472,0.094281
US,0.305504,0.317676,0.225625,0.224004,0.274529,0.21257,0.194979,0.159766,0.148496,0.162941
Italy,0.103774,0.08098,0.082109,0.075315,0.083389,0.073323,0.069065,0.056417,0.041458,0.039837
Spain,0.133759,0.221357,0.135161,0.241444,0.16704,0.137282,0.114366,0.093876,0.09794,0.090579
China,0.001599,0.000774,0.001141,0.000858,0.001482,0.001406,0.001245,0.0015,0.000925,0.000985
Germany,0.11975,0.168174,0.135256,0.13148,0.177237,0.157791,0.134143,0.076263,0.07714,0.073604
France,0.123073,0.238872,0.124186,0.131642,0.154336,0.130317,0.1408,0.068311,0.10961,0.169515
Iran,0.049879,0.065209,0.076446,0.088912,0.088426,0.099504,0.095138,0.081931,0.083166,0.074949
United Kingdom,0.133807,0.170757,0.213797,0.180794,0.225311,0.248307,0.174093,0.14256,0.135137,0.134859
Switzerland,0.13673,0.176746,0.123024,0.10327,0.083876,0.094573,0.0888,0.053495,0.073707,0.042897


In [11]:
flat_df = cdf.stack().unstack('Series').loc[c30_idx]
flat_df = flat_df.assign(active=flat_df.confirmed-flat_df.deaths-flat_df.recovered)

In [12]:
def gen_rates(df, days_ago,names):
    today = df.iloc[:,-1]
    return pd.DataFrame({name:today/pair[1] for name,pair in zip(names, df.iloc[:, -1 - np.array(days_ago)].items())})

In [13]:
numdays_series = (cdf.loc['confirmed']>0).sum(1)
has10days = numdays_series.index[numdays_series>=10]


In [14]:
rates_all = gen_rates(cdf.loc['confirmed'],[1,3,7,14],['1 day', '3 day', '1 week', '2 week'])
rates = rates_all.loc[has10days]

In [15]:
def set_widths(figure, factor_series):
    for trace in figure.data:
        country = trace['name']
        factor = factor_series.loc[country]
        trace['hovertemplate'] = trace['hovertemplate'] + (''.join('<br>Scaling - %s=%.1fx' % kv for kv in factor.items()))
        if country not in SHOW_SET:
            trace['visible'] = 'legendonly'
        trace['line'].update(width=np.log2(factor['1 week'])+1)
    return figure

FIGURE_KW = dict(facet_col_wrap=2, height=900, labels={'0':'count'}, log_y=True, color='Country/Region', line_dash='Country/Region')
    
def coronavirus_figure(xdf, countries, today, factors, y=0, **kw):
    title='Coronavirus case data for NZ & the 30 countries with the most confirmed cases for %s' % today
    return set_widths(px.line(xdf, category_orders={'Country/Region':list(countries)}, x='date', y=y, title=title, **dict(FIGURE_KW, **kw)), factors)
    

In [16]:
rates.nlargest(20, '2 week')

Unnamed: 0_level_0,1 day,3 day,1 week,2 week
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Angola,1.0,1.4,2.333333,inf
Cabo Verde,1.0,1.2,2.0,inf
Chad,1.4,2.333333,2.333333,inf
Djibouti,1.666667,2.142857,10.0,inf
Dominica,1.090909,1.090909,6.0,inf
El Salvador,1.066667,1.684211,6.4,inf
Eritrea,1.25,2.5,15.0,inf
Fiji,1.0,1.0,1.25,inf
Grenada,1.0,1.285714,9.0,inf
Haiti,1.0,1.875,2.142857,inf


In [17]:
px.bar(rates.loc[c30_idx].unstack().reset_index(), barmode='group', color='Country/Region', x='level_0', y=0, log_y=True)

In [18]:
xdf = flat_df.stack().reset_index()
fig= coronavirus_figure(xdf, c30_idx, the_dates[-1].strftime('%d %B'), rates, facet_col='Series')
fig

In [19]:
xdf = flat_df.reset_index()
fig=coronavirus_figure(xdf, c30_idx, the_dates[-1].strftime('%d %B'), rates, y='confirmed', hover_data=['deaths', 'recovered', 'active'])
fig.write_html('../Coronavirus_plot.html', include_plotlyjs='cdn')
fig

In [35]:
def make_country_tests_df(df, columns, millions, new_names=['confirmed', 'tests']):
    new_df = df[columns].rename(columns=dict(zip(columns,new_names)))
    for n in new_names:
        new_df[n+'/1M']=new_df[n]/millions
    return new_df

In [36]:
MILLIONS = {'NZ':4.974490, 'US':329.544974, 'Aus':25.663280, 'Can': 37.976754, 'UK':66.435600}
FULL_NAMES = {'NZ':'New Zealand', 'Aus': 'Australia', 'Can': 'Canada', 'UK':'United Kingdom', 'US':'USA'}
COLUMN_NAMES={'NZ':['Confirmed', 'Tests'], 'US':['positive', 'posNeg'], 'Can':['positives', 'tests'], 'Aus': ['cases', 'tests'], 'UK': ['cases', 'tests']}
def make_tests_df(dfs_map):
    tests_df = pd.concat({FULL_NAMES.get(c, c):make_country_tests_df(df, COLUMN_NAMES[c], MILLIONS[c]) for c,df in dfs_map.items()}, sort=True, names=['country']).reset_index()
    tests_df['case rate'] = tests_df['confirmed']/tests_df['tests']
    return tests_df
    
    

In [37]:
tests_df = make_tests_df({'NZ':nz_df, 'Aus':aus_df, 'Can':can_df, 'UK':uk_df, 'US':us_df })
tests_df['nice_date']=tests_df.iloc[:,1].dt.strftime('%e %b %Y')

In [38]:
LABELS = {
    'case rate':'confirmed<br>case rate', 
    'tests/1M':'total tests per 1M population', 
    'confirmed/1M':'confirmed cases<br>per 1M population',
    'title': 'Comparison of confirmed Covid-19 cases and total tests for selected countries normalised by population',
}

In [39]:
fig=(px.scatter(tests_df, y='confirmed/1M', x='tests/1M',symbol='country', color='case rate', hover_data=['confirmed', 'tests'], hover_name='nice_date', labels=LABELS, title=LABELS['title'])
    .update_traces(line_width=3, mode='markers+lines', hovertemplate='<b>%{fullData.name}</b> - %{hovertext}<br>confirmed cases=%{customdata[0]} (%{y:.1f} / 1M)<br>total tests=%{customdata[1]} (%{x:.1f} / 1M)<br>confirmed rate=%{marker.color:.1%}')
    .update_layout(coloraxis=dict(colorbar=dict(tickformat='.1%'), colorscale=px.colors.sequential.Sunsetdark_r), legend=dict(x=.025, y=0.95)))
fig

In [40]:
fig.write_html('../Coronavirus_testing_comparison.html', include_plotlyjs='cdn')