In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import common

In [2]:
DHBS = ['Northland', 'Waitemata', 'Auckland', 'Counties Manukau', 'Waikato', 'Bay of Plenty', 'Tairawhiti', 'Lakes','Taranaki', "Hawke's Bay", 'Whanganui', 'MidCentral', 'Hutt Valley', 'Capital and Coast', 'Wairarapa', 'Nelson Marlborough', 'West Coast', 'Canterbury',  'South Canterbury', 'Southern']

In [3]:
datestr='9-april-2020'
today = pd.to_datetime(datestr.replace('-', ' '))
CASE_URL_FORMAT = 'https://www.health.govt.nz/system/files/documents/pages/covid-casedetails-%s.xlsx'


In [4]:
datestr.replace('-', '')

'9april2020'

In [5]:
case_url = CASE_URL_FORMAT % datestr.replace('-', '')
#case_url = 'https://www.health.govt.nz/system/files/documents/pages/covid-casedeatails-7april2020.xlsx'

In [6]:
with pd.ExcelFile(case_url) as excel_file:
    cases_dfs = {name: excel_file.parse(i, header=3) for i, name in enumerate(['confirmed', 'probable'])}

In [7]:
def get_case_counts(df, date_name='Report Date', date_col=0):
    return df.groupby([df.columns[date_col], 'DHB']).agg(c=('DHB', 'count')).c.rename_axis([date_name, 'DHB'])

In [8]:
cdf = pd.concat({key: get_case_counts(df) for key, df in cases_dfs.items()}, axis=1, names=['type']).loc[:today - pd.Timedelta(days=1)]

In [9]:
totals_df = cdf.sum(1).rename('cases').reset_index()

In [10]:
px.bar(totals_df, x='Report Date', y='cases', color='DHB', category_orders={'DHB':DHBS})

In [11]:
px.bar(totals_df, color='Report Date', y='cases', x='DHB', color_continuous_scale = px.colors.sequential.algae)

In [12]:
tdf = cdf.unstack('DHB').fillna(0).cumsum().stack('DHB')
tdf['total'] = tdf.sum(1)




In [13]:
xdf = cdf.unstack('DHB').fillna(0).cumsum()
xdf = xdf.stack(['DHB', 'type'])
xdf = xdf[xdf>0].unstack('type')
cum_df = xdf.assign(total=xdf.sum(1))

In [31]:
xdf = pd.concat([cum_df.loc['2020-03-26',['confirmed', 'probable']], cdf.loc['2020-03-27':]]).reset_index('DHB')
xdf['report date']=xdf.index.strftime('%B %d')
xdf.loc['2020-03-26', 'report date']='pre-lockdown'

In [32]:

fig = px.bar(xdf, y='DHB', x='confirmed', category_orders={'DHB':DHBS}, color='report date', orientation='h', labels={'0':'cases'})
data = [None,None] * len(fig.data)
data[::2] = fig.data
data[1::2] = fig2 = px.bar(xdf, opacity=0.4, y='DHB', category_orders={'DHB':DHBS}, x='probable', color='report date', orientation='h', labels={'0':'cases'}).update_traces(showlegend=False).data
fig = go.Figure(data, layout=fig.layout).update_layout(xaxis_title_text='cases', title='Confirmed and probably cases for nz by DHB and date<br>Data from latest NZ case details')
fig

In [33]:
fig.write_html(common.site_file('Coronavirus_NZ_cases_by_DHB.html'), include_plotlyjs='cdn')

In [21]:

xdf = cum_df.reset_index().fillna(0)

In [22]:
cum_df

Unnamed: 0_level_0,type,confirmed,probable,total
Report Date,DHB,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-02-26,Auckland,1.0,,1.0
2020-03-02,Auckland,1.0,,1.0
2020-03-02,Waitemata,1.0,,1.0
2020-03-04,Auckland,1.0,,1.0
2020-03-04,Counties Manukau,1.0,,1.0
...,...,...,...,...
2020-04-08,Waikato,142.0,21.0,163.0
2020-04-08,Wairarapa,6.0,2.0,8.0
2020-04-08,Waitemata,121.0,49.0,170.0
2020-04-08,West Coast,4.0,1.0,5.0


In [23]:
totals_df = cum_df.sum(0, level='Report Date')

In [24]:
xdf=cum_df.fillna(0).append(totals_df.assign(DHB='total').set_index('DHB', append=True), sort=True).reset_index()
px.line(xdf, x='Report Date', y='total', color='DHB', hover_data=['confirmed', 'probable'], log_y=True)

In [25]:
xdf= cum_df.total.div(totals_df.total, level='Report Date').loc['2020-03-26':].reset_index().rename({'total':'percentage'})

In [26]:
fig=px.bar(xdf, x='Report Date', y='total', color='DHB', category_orders={'DHB':DHBS})
fig