In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import common

In [2]:
DHBS = ['Northland', 'Waitemata', 'Auckland', 'Counties Manukau', 'Waikato', 'Bay of Plenty', 'Tairawhiti', 'Lakes','Taranaki', "Hawke's Bay", 'Whanganui', 'MidCentral', 'Hutt Valley', 'Capital and Coast', 'Wairarapa', 'Nelson Marlborough', 'West Coast', 'Canterbury',  'South Canterbury', 'Southern']
CASE_FILE_FORMAT = common.site_file('..', 'nz_data','covid-casedetails-%s.xlsx')
COLUMNS = ['Date of report', 'Sex', 'Age group', 'DHB', 'International travel',
        'Last country before return', 'Flight number', 'Flight departure date',
        'Arrival date']
INDEX_COLUMNS = ['Date of report', 'DHB']

In [3]:
dates = pd.date_range('2020-03-28', 'now')

In [4]:
testing_df = pd.read_csv(common.GOOGLE_SHEET_BASE+'pub?single=true&gid=1623378382&output=csv', index_col=0, header=1, na_values=['-'], dtype={'pop':np.float64})

In [5]:
all_dfs = [pd.read_excel(CASE_FILE_FORMAT % '28mar-confirmed', names=COLUMNS, index_col='Date of report', header=3), 
           pd.read_excel(CASE_FILE_FORMAT % '28mar-probable', names=COLUMNS, index_col='Date of report', header=3)]
hd=1
for date in dates[1:]:
    case_file = CASE_FILE_FORMAT % date.strftime('%e%B%Y').lower().strip()
    all_dfs.extend(pd.read_excel(case_file, sheet_name=None, names=COLUMNS, dayfirst=True, parse_dates=True, index_col='Date of report', header=hd).values())
    hd=3

In [6]:
all_df = pd.concat(all_dfs, keys=pd.MultiIndex.from_product([dates, ['confirmed','probable']]), names=['info date', 'type'])
all_df['Age group'].fillna('unknown', inplace=True)

In [7]:
all_df.DHB[all_df.DHB=='Waitematā']='Waitemata'
all_df.DHB[all_df.DHB=='Tairāwhiti']='Tairawhiti'



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [8]:
all_counts_df = pd.pivot_table(all_df[['DHB','Age group']], index=['info date', 'type', 'DHB'], columns='Date of report', aggfunc='count')['Age group']

In [9]:
PERIODS = [(all_counts_df.columns[0], '2020-03-26', 'pre-lockdown'),
                   ('2020-03-27', '2020-04-02', 'week 1'),
                   ('2020-04-03', '2020-04-09', 'week 2'),
                   ('2020-04-10', '2020-04-16', 'week 3'),
                   ('2020-04-17', '2020-04-23', 'week 4'),
                   ('2020-04-24', '2020-04-28', 'week 5*'),
                   ('2020-04-29', None, 'daily')
          ]

In [10]:
daily_start = PERIODS[-1][0]
#daily_names = dates[dates.get_loc(daily_start):].strftime('%B %d').values.tolist()

In [11]:
def make_period_df(dates_df, column_names=None, start=daily_start, periods=PERIODS[:-1]):
    daily_df = dates_df.loc[:,start:]
    daily_df.columns=daily_df.columns.strftime('%B %d').values.tolist() if column_names is None else column_names[:len(daily_df.columns)]
    df = pd.concat([dates_df.loc[:,st:end].sum(axis=1).rename(name) for (st, end, name) in periods]+[daily_df], axis=1)
    df.rename_axis('report date',axis=1, inplace=True)
    return df

In [12]:
counts_df = make_period_df(all_counts_df)

In [13]:
def format_series(df):
    return df.stack().unstack('type').reset_index()

In [14]:
report_date_values = counts_df.columns.values.tolist()
def bar_plot(df, y_series='info date', y_order=None,title=None, category_orders={'report date':report_date_values}):
    if y_order is not None:
        category_orders=dict(category_orders)
        category_orders[y_series] = y_order
    fig = px.bar(df, orientation='h', color='report date', y=y_series, x='confirmed', labels={'info date':'Date of case details file'},category_orders=category_orders)#, category_orders={'report_date':cols})
    data = [None,None] * len(fig.data)
    data[::2] = fig.data
    data[1::2] = fig2 = px.bar(df, opacity=.4, orientation='h', color='report date', y=y_series, x='probable',category_orders=category_orders).update_traces(showlegend=False).data
    return go.Figure(data, layout=fig.layout).update_layout(yaxis_autorange='reversed' if y_order is None else True, xaxis_title_text='cases', title=title or 'Confirmed and probable cases for NZ')


In [15]:
today_df=counts_df.loc[dates[-1]]
today_str = dates[-1].strftime('%e %B %Y')

In [16]:
DHB_pops100k = testing_df['pop']/100000

In [17]:
fig = bar_plot(format_series(today_df), y_series='DHB', y_order=DHBS, title='Confirmed and probable cases for NZ by DHB<br>Data from case details on '+today_str)
fig

In [18]:
fig.write_html(common.site_file('Coronavirus_NZ_cases_by_DHB.html'), include_plotlyjs='cdn')

In [19]:
fig = (bar_plot(format_series(today_df.div(DHB_pops100k, axis=0, level='DHB')), y_series='DHB', y_order=DHBS, title='Confirmed and probable cases for NZ by DHB per 100k population<br>Data from case details on '+today_str)
       .update_layout(xaxis_title='case per 100k population') )
fig

In [20]:
fig.write_html(common.site_file('Coronavirus_NZ_cases_by_DHB_normalised.html'), include_plotlyjs='cdn')

In [21]:
nz_df = format_series(counts_df.sum(level=['info date','type']))
fig=bar_plot(nz_df, title='Changes in confirmed and probable cases for all NZ<br>Data taken from the case details files')
fig

In [22]:
fig.write_html(common.site_file('Coronavirus_NZ_cases_over_time.html'), include_plotlyjs='cdn')

In [23]:
fig=bar_plot(format_series(counts_df.loc[pd.IndexSlice[:,:,'Canterbury'], :].fillna(0)), title='Changes in confirmed and probable cases for Canterbury<br>Data taken from the case details files')
fig

In [24]:
fig.write_html(common.site_file('Coronavirus_Canterbury_cases_over_time.html'), include_plotlyjs='cdn')