In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import common

In [2]:
URLS={
    'main': 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQCN9pL21lGy3XPBhKwMX7jT1_SG-Sb_4ZWZ1I0Ctd-0vNhtmH4gFKaLsV5jhz4vSjYlQ9NR_fXF_b6/pub?output=xlsx',
    'US': 'https://covidtracking.com/api/us/daily.csv',
    'Jap': 'https://docs.google.com/spreadsheets/d/e/2PACX-1vRri4r42DHwMHePjJfYN-qEWhGvKeOQullBtEzfle15i-xAsm9ZgV8oMxQNhPRO1CId39BPnn1IO5YO/pub?gid=211530313&single=true&output=csv',
    'It': 'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv',
}


In [3]:
MILLIONS = {'Kor': 51.780579, 'NZ':4.974490, 'NZC':4.974490, 'It':60.238522, 'Jap':125.95, 'US':329.544974, 'Aus':25.663280, 'Can': 37.976754, 'Den':5.822763, 'UK':66.435600}
FULL_NAMES = {'NZ':'New Zealand', 'NZC':'NZ (positive tests)', 'Aus': 'Australia', 'Can': 'Canada', 'Den':'Denmark', 'It':'Italy','Jap':'Japan','Kor': 'South Korea', 'UK':'United Kingdom', 'US':'USA'}
COLUMN_NAMES={'NZ':['Cases', 'Tests'], 'NZC':['Confirmed', 'Tests'], 'Kor':['Confirmed', 'tests'], 'It':['totale_casi','tamponi'], 'Jap':['Confirmed', 'Tested'], 'US':['positive', 'posNeg'], 'Den':['positives', 'tests'],'Can':['cases', 'people tested'], 'Aus': ['cases', 'tests'], 'UK': ['tested positive', 'tests']}


In [4]:
SHEET_NAMES = {'NZ':'From Briefings','Aus':'Australia', 'Den':'Denmark', 'Can':'Canada', 'UK':'UK', 'Kor':'South Korea'}
SHEET_CODES = ('NZ','Aus', 'Den', 'Can', 'UK', 'Kor')
def load_data():
    dfs = {}
    with pd.ExcelFile(URLS['main']) as excel_file:
        dfs = {key:excel_file.parse(FULL_NAMES[key], index_col=0, parse_dates=True, header=1) for key in SHEET_CODES}
    dfs['NZC'] = dfs['NZ'].loc['2020-03-27':]
    dfs['US'] = pd.read_csv(URLS['US'], parse_dates=True, index_col=0)
    jap_df = pd.read_csv(URLS['Jap'], parse_dates=True, index_col=0)
    first, last = jap_df.index[~jap_df.Tested.isna()][[0,-1]]
    dfs['Jap'] = jap_df.loc[first:last]
    dfs['It'] = pd.read_csv(URLS['It'], parse_dates=True, index_col=0)
    return dfs

In [5]:
def make_country_tests_df(df, columns, millions, new_names=['confirmed', 'tests']):
    new_df = df[columns].rename(columns=dict(zip(columns,new_names))).sort_index()
    for n in new_names:
        col = new_df[n]
        new_df[n+'/1M']=col/millions
        new_df['new '+n] = col.diff()
    return new_df

In [6]:
def make_tests_df(dfs_map):
    tests_df = pd.concat({fn:make_country_tests_df(dfs_map[c], COLUMN_NAMES[c], MILLIONS[c]) for c, fn in FULL_NAMES.items()}, sort=True, names=['country']).reset_index()
    tests_df['case rate'] = tests_df['confirmed']/tests_df['tests']
    tests_df['daily rate'] = tests_df['new confirmed']/tests_df['new tests']
    return tests_df
    
    

In [7]:
raw_dfs = load_data()

In [8]:
tests_df = make_tests_df(raw_dfs)
tests_df['nice_date']=tests_df.iloc[:,1].dt.strftime('%e %b %Y')

In [9]:
now = pd.Timestamp('now', tz='Pacific/Auckland').strftime('%e %b %Y %H:%M (%z)')

In [10]:
LABELS = {
    'case rate':'confirmed<br>case rate', 
    'tests/1M':'total tests per 1M population (10k is 1% of population)', 
    'confirmed/1M':'confirmed cases<br>per 1M population',
    'title': 'Comparison of confirmed Covid-19 cases and total tests for selected countries normalised by population<br>Last update: '+now,
}
HOVER_TEMPLATE = hovertemplate='<br>'.join([
    '<b>%{fullData.name}</b> - %{hovertext}',
    'confirmed cases=%{customdata[0]}<br>  - up %{customdata[2]} to %{y:.1f}/1M',
    'total tests=%{customdata[1]}<br>  - up %{customdata[3]} to %{x:.1f}/1M',
    'confirmed case rate=%{marker.color:.1%}<br>  - 1 day rate=%{customdata[4]:.1%}'])
COLORWAY = common.get_plotly_template()['layout']['colorway']
UPDATES = {
    'traces': dict(line_width=3, marker_size=5, mode='markers+lines'),
    'layout': dict(coloraxis=dict(colorbar=dict(thickness=20, tickformat='.0%'), colorscale=px.colors.sequential.Plasma[2:-2]), 
                   legend=dict(bgcolor='#eeeeee', x=.025, y=0.95), ), 
    'nzc_trace': dict(selector={'name': FULL_NAMES['NZC']}, marker_size=5, line=dict(dash='dot', width=3)),
}

In [11]:
fig=(px.scatter(tests_df, y='confirmed/1M', x='tests/1M',symbol='country', color='case rate', symbol_map={FULL_NAMES['NZC']:'diamond'}, range_color=[0,.2], hover_data=['confirmed', 'tests', 'new confirmed', 'new tests', 'daily rate'], hover_name='nice_date', labels=LABELS, title=LABELS['title'])
    .update_traces(hovertemplate=HOVER_TEMPLATE, **UPDATES['traces'])
    .update_layout(**UPDATES['layout'])
    .update_traces(**UPDATES['nzc_trace']) )
for cw, trace in zip(COLORWAY[:3], fig.data[1:]):
    trace.update(line_color=cw)
fig

In [12]:
fig.write_html(common.site_file('Coronavirus_testing_comparison.html'), include_plotlyjs='cdn')

In [13]:
#px.colors.sequential.swatches()