In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import common

In [2]:
DHBS = ['Northland', 'Waitemata', 'Auckland', 'Counties Manukau', 'Waikato', 'Bay of Plenty', 'Tairawhiti', 'Lakes','Taranaki', "Hawke's Bay", 'Whanganui', 'MidCentral', 'Hutt Valley', 'Capital and Coast', 'Wairarapa', 'Nelson Marlborough', 'West Coast', 'Canterbury',  'South Canterbury', 'Southern']
CASE_FILE_FORMAT = common.site_file('..', 'nz_data','covid-casedetails-%s.xlsx')
COLUMNS = ['Date of report', 'Sex', 'Age group', 'DHB', 'International travel',
        'Last country before return', 'Flight number', 'Flight departure date',
        'Arrival date']
INDEX_COLUMNS = ['Date of report', 'DHB']

In [3]:
dates = pd.date_range('2020-03-28', 'now')

In [4]:
all_dfs = [pd.read_excel(CASE_FILE_FORMAT % '28mar-confirmed', names=COLUMNS, index_col='Date of report', header=3), 
           pd.read_excel(CASE_FILE_FORMAT % '28mar-probable', names=COLUMNS, index_col='Date of report', header=3)]
hd=1
for date in dates[1:]:
    case_file = CASE_FILE_FORMAT % date.strftime('%e%B%Y').lower().strip()
    all_dfs.extend(pd.read_excel(case_file, sheet_name=None, names=COLUMNS, index_col='Date of report', header=hd).values())
    hd=3

FileNotFoundError: [Errno 2] No such file or directory: 'S:\\github\\timmclennan\\timmclennan.github.io\\..\\nz_data\\covid-casedetails-14april2020.xlsx'

In [None]:
all_df = pd.concat(all_dfs, keys=pd.MultiIndex.from_product([dates, ['confirmed','probable']]), names=['info date', 'type'])
all_df['Age group'].fillna('unknown', inplace=True)

In [None]:
counts_df = pd.pivot_table(all_df[['DHB','Age group']], index=['info date', 'type', 'DHB'], columns='Date of report', aggfunc='count')['Age group']

In [None]:
def format_series(date_df):
    df = pd.concat([date_df.loc[:,:'2020-03-26'].sum(axis=1), date_df.loc[:,'2020-03-27':]], axis=1)
    df.columns = pd.Index(['pre-lockdown']+date_df.columns[1-len(df.columns):].strftime('%B %d').values.tolist(), name='report date')
    return df.stack().unstack('type').reset_index()

In [None]:
def bar_plot(df, y_series='info date', y_order=None,title=None):
    kw = {} if y_order is None else dict(category_orders={y_series:y_order})
    fig = px.bar(df, orientation='h', color='report date', y=y_series, x='confirmed', labels={'info date':'Date of case details file'},**kw)#, category_orders={'report_date':cols})
    data = [None,None] * len(fig.data)
    data[::2] = fig.data
    data[1::2] = fig2 = px.bar(df, opacity=.4, orientation='h', color='report date', y=y_series, x='probable').update_traces(showlegend=False).data
    return go.Figure(data, layout=fig.layout).update_layout(yaxis_autorange='reversed' if y_order is None else True, xaxis_title_text='cases', title=title or 'Confirmed and probable cases for NZ')


In [None]:
today_str=dates[-1].strftime('%e %B %Y').strip()

In [None]:
fig = bar_plot(format_series(counts_df.loc[dates[-1]]), y_series='DHB', y_order=DHBS, title='Confirmed and probable cases for NZ by DHB<br>Data from case details on '+today_str)
fig

In [None]:
fig.write_html(common.site_file('Coronavirus_NZ_cases_by_DHB.html'), include_plotlyjs='cdn')

In [None]:
nz_df = format_series(counts_df.sum(level=['info date','type']))
fig=bar_plot(nz_df, title='Changes in confirmed and probable cases for all NZ<br>Data taken from the case details files')
fig

In [None]:
fig.write_html(common.site_file('Coronavirus_NZ_cases_over_time.html'), include_plotlyjs='cdn')

In [None]:
fig=bar_plot(format_series(counts_df.loc[pd.IndexSlice[:,:,'Canterbury'], :].fillna(0)), title='Changes in confirmed and probable cases for Canterbury<br>Data taken from the case details files')
fig

In [None]:
fig.write_html(common.site_file('Coronavirus_Canterbury_cases_over_time.html'), include_plotlyjs='cdn')