In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from inputs.constants import DATA_PATH
pd.options.plotting.backend = 'plotly'

### Data wrangling
Initial processing

In [None]:
vacc_df = pd.read_csv(DATA_PATH / 'aus_vax_data.csv', index_col=424)
vacc_df.index = pd.to_datetime(vacc_df.index, infer_datetime_format=True)
vacc_df = vacc_df.sort_index()

Get masks for each program or type of vaccination

In [None]:
age_16_2dose = [
    c for c in vacc_df.columns if 'Number of people fully vaccinated' in c and 
    not any(s in c for s in [' - M - ', ' - F - '])
]
age_16_3dose = [
    col for col in vacc_df.columns if 
    'National' in col and 
    any([s in col for s in ['who have received 3 doses', 'with 3 or more doses', 'with more than two doses', 'with more than 2 doses']]) and 
    any([s in col for s in ['16', '18']]) and
    not any([s in col for s in ['increase', 'Percentage', 'Indigenous']])
]
age_16_4dose = [
    col for col in vacc_df.columns if
    'National' in col and
    any([s in col for s in ['Winter Boosters number', 'who have received 4 doses', 'Fourth dose number']]) and
    not any([s in col for s in ['30', '65', 'increase']])
]
age_12_15_2dose = [
    col for col in vacc_df.columns if
    '12-15' in col and
    any([s in col for s in ['National', 'Age group']]) and
    any([s in col for s in ['2 doses', 'fully vaccinated']]) and
    not any([s in col for s in ['Indigenous', 'Population', '- F -', '- M -']])
]
age_5_11_2dose = [
    col for col in vacc_df.columns if
    'National' in col and
    '5-11' in col and
    any([s in col for s in ['2 doses', 'fully vaccinated']])
]

### Display results

In [None]:
vacc_df[age_16_2dose].dropna().plot(title='Fully vaccinated coverage')

In [None]:
col_masks = {
    'Ages 16+, at least three doses': age_16_3dose,
    'Ages 16+, at least four doses': age_16_4dose,
    'Ages 12 to 15, at least two doses': age_12_15_2dose,
    'Ages 5 to 11, at least two doses': age_5_11_2dose,
}
fig = make_subplots(rows=2, cols=2, subplot_titles =list(col_masks.keys()))
for m, mask in enumerate(col_masks):
    col = m % 2 + 1
    row = int(np.floor(m / 2)) + 1
    fig.add_traces(px.line(vacc_df[col_masks[mask]]).data, rows=row, cols=col)
fig.update_layout(height=600, showlegend=False, title='Coverage by program')

### Derive new fields from booster program information
Address a single day discontinuity in at least four doses data, while at least three doses continues.

In [None]:
vacc_df['adult booster'] = vacc_df.loc[:, age_16_3dose + age_16_4dose].sum(axis=1)
vacc_df = vacc_df.drop(datetime(2022, 7, 8))
vacc_df['adult booster smooth'] = vacc_df.loc[:, 'adult booster'].rolling(7).mean()
vacc_df[['adult booster', 'adult booster smooth']].plot(title='Cumulative boosters in ages 16+')

In [None]:
vacc_df['incremental adult booster'] = vacc_df['adult booster smooth'].diff()
vacc_df['incremental adult booster'].plot()

In [None]:
immune_duration = 100
vacc_df['boosted in preceding'] = vacc_df['incremental adult booster'].rolling(immune_duration).sum()
vacc_df['boosted in preceding'].plot(title=f'Vaccinaged in preceding {immune_duration} days')