## Supplement
Generate the full supplemental appendix presented for the analysis.

### Running over Colab
Uncomment the commands in the following cell to install in Colab.
Once completed, click the 'Restart runtime' button that will appear to restart the Colab environment and then proceed to the following cells.

### Generating documentation
Running the final cell will take several minutes and will generate the corresponding PDF documentation.
This can be downloaded from the Files section of the sidebar.

In [None]:
# !pip uninstall numba -y
# !pip uninstall librosa -y
# !pip install estival==0.4.9 numpy==1.24.3 kaleido

In [None]:
try:
    import google.colab
    on_colab = True
    ! git clone https://github.com/monash-emu/aust-covid.git --branch main
    %cd aust-covid
    %pip install -e ./
    import multiprocessing as mp
    mp.set_start_method('forkserver')
except:
    on_colab = False

In [None]:
import arviz as az
import numpy as np
import pandas as pd
from arviz.labels import MapLabeller
from matplotlib import pyplot as plt
import matplotlib as mpl

from estival.sampling import tools as esamp
from estival.sampling.tools import idata_to_sampleiterator

from aust_covid.model import build_model
from emutools.calibration import plot_priors, tabulate_priors
from aust_covid.constants import MOBILITY_MAP, MOBILITY_AVERAGE_WINDOW, RUN_IDS, PRIMARY_ANALYSIS, BURN_IN, ANALYSIS_FEATURES, set_project_base_path
from aust_covid.calibration import get_priors, get_all_priors, get_targets
from emutools.tex import DummyTexDoc, StandardTexDoc, remove_underscore_multiindexcol, add_image_to_doc
from emutools.utils import load_param_info, param_table_to_tex
from emutools.calibration import plot_param_progression, plot_posterior_comparison, tabulate_calib_results, plot_output_ranges, plot_output_ranges_by_analysis
from emutools.calibration import get_like_components, plot_like_components_by_analysis, plot_spaghetti, plot_param_hover_spaghetti
from aust_covid.extra_text import add_intro_blurb_to_tex, add_parameters_blurb_to_tex, add_likelihood_blurb_to_tex, add_calibration_blurb_to_tex, \
    add_dispersion_blurb_to_tex, add_model_structure_blurb_to_tex, add_mobility_blurb_to_tex, add_vaccination_blurb_to_tex
from aust_covid.inputs import get_ifrs, get_base_vacc_data, get_raw_state_mobility
from aust_covid.plotting import plot_targets, plot_dispersion_examples, plot_full_vacc, plot_program_coverage, plot_multi_spaghetti, plot_vacc_implementation
from aust_covid.plotting import plot_example_model_matrices, plot_immune_props, plot_state_mobility, plot_processed_mobility, plot_cdr_examples
from aust_covid.plotting import plot_single_run_outputs, plot_subvariant_props, plot_infection_processes
from aust_covid.mobility import get_non_wa_mob_averages, get_relative_mobility, map_mobility_locations
from aust_covid.vaccination import get_vacc_data_masks, add_derived_data_to_vacc, get_full_vacc_props
project_paths = set_project_base_path("../")
SUPPLEMENT_PATH = project_paths["SUPPLEMENT_PATH"]
RUNS_PATH = project_paths["RUNS_PATH"]

import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

In [None]:
# Start appendix document
app_doc = StandardTexDoc(SUPPLEMENT_PATH, 'supplement', "Supplementary material to analysis of Australia's 2022 COVID-19 epidemic", 'austcovid', table_of_contents=True)
dummy_doc = DummyTexDoc()
add_intro_blurb_to_tex(app_doc)

In [None]:
# Priors
param_info = load_param_info()
all_priors = get_all_priors()
get_priors(False, param_info['abbreviations'], app_doc)
priors_table = tabulate_priors(all_priors, param_info)
caption = 'Parameters implemented in calibration code are given. ' \
    'Note that the values for several of these were generated ' \
    'through algorithms that aimed to approximate epidemiological user-specified ' \
    'plausible ranges. '
app_doc.include_table(priors_table, 'priors_table', 'Priors.', 'Calibration methods', subsection='Priors', col_splits=[0.25] * 4, caption=caption)
priors_fig = plot_priors(all_priors, param_info['abbreviations'], 4, 100, 0.1, 0.99)
caption = 'Illustrations of prior distributions implemented in calibration algorithm.'
add_image_to_doc(priors_fig, 'prior_distributions', 'svg', 'Priors.', app_doc, 'Calibration methods', subsection='Priors', caption=caption)

In [None]:
# Parameters
ifrs = get_ifrs(app_doc)
param_info['value'].update(ifrs)
parameters = param_info['value'].to_dict()
add_parameters_blurb_to_tex(app_doc)
prior_names = [p.name for p in get_priors(True, param_info['abbreviations'], dummy_doc)]
app_doc.include_table(param_table_to_tex(param_info, prior_names), 'params', 'Epidemiological parameters and evidence.', section='Parameters', col_splits=[0.15, 0.13, 0.13, 0.59], longtable=True)

In [None]:
# Targets
targets = get_targets(app_doc)
fig = plot_targets(targets, for_plotly=False)
add_image_to_doc(fig, 'target_fig', 'svg', 'Calibration targets with raw data from which they were derived.', app_doc, 'Targets')

In [None]:
# Mobility
mob_blurb = 'We extended our base model to incorporate empirically observed changes in population mobility over the simulation period.'
app_doc.add_line(mob_blurb, 'Mobility extension')
state_data, jurisdictions, mob_locs = get_raw_state_mobility(dummy_doc)
fig = plot_state_mobility(state_data, jurisdictions, mob_locs)
caption = 'Locations: Retail and recreation (green), grocery and pharmacy (purple), parks (orange), transit stations (yellow), workplaces (blue), and residential (red).'
add_image_to_doc(fig.update_layout(showlegend=False), 'state_mobility', 'svg', 'Raw state-level mobility obtained from Google.', app_doc, 'Mobility extension', caption=caption)
mapping_table = pd.DataFrame(MOBILITY_MAP)
mapping_table.index = mapping_table.index.str.replace('_', ' ').str.capitalize()
mapping_table.columns = mapping_table.columns.str.replace('_', ' ').str.capitalize()
mapping_table = mapping_table.applymap(str)
app_doc.include_table(mapping_table, 'mob_map', 'Mobility mapping.', 'Mobility extension')
wa_data = state_data.loc[state_data['sub_region_1'] == 'Western Australia', mob_locs]
state_averages = get_non_wa_mob_averages(state_data, mob_locs, jurisdictions, dummy_doc)
non_wa_relmob = get_relative_mobility(state_averages)
wa_relmob = get_relative_mobility(wa_data)
processed_mob = map_mobility_locations(wa_relmob, non_wa_relmob, dummy_doc)
smoothed_mob = processed_mob.rolling(MOBILITY_AVERAGE_WINDOW).mean().dropna()
squared_mob = smoothed_mob ** 2.0
mobility_types = {
    'original': remove_underscore_multiindexcol(processed_mob),
    'smoothed': remove_underscore_multiindexcol(smoothed_mob),
    'smoothed squared': remove_underscore_multiindexcol(squared_mob),
}
fig = plot_processed_mobility(mobility_types)
caption = 'Work mobility (yellow line), smoothed work mobility (blue line), squared smoothed work mobility (red line), ' \
    'averaged other locations mobility (green line), smoothed averaged other locations mobility (purple), and squared smoothed averaged other locations mobility (orange).'
add_image_to_doc(fig.update_layout(showlegend=False), 'processed_mobility', 'svg', 'Processed mobility for model.', app_doc, 'Mobility extension', caption=caption)
add_mobility_blurb_to_tex(app_doc)

In [None]:
analysis_folder = RUN_IDS[PRIMARY_ANALYSIS]
i_max = pd.read_hdf(RUNS_PATH / analysis_folder / 'output/results.hdf', 'likelihood')['logposterior'].idxmax()
idata = az.from_netcdf(RUNS_PATH / analysis_folder / 'output/calib_full_out.nc')
best_params = idata_to_sampleiterator(idata).convert('pandas').loc[i_max].to_dict()
parameters.update(best_params)

In [None]:
# Get completed models with each extension for use in sections below
add_model_structure_blurb_to_tex(app_doc)
epi_model = build_model(app_doc, param_info['abbreviations'], mobility_ext=True)
epi_model.run(parameters=parameters)
vacc_model = build_model(dummy_doc, param_info['abbreviations'], vacc_ext=True)
vacc_model.run(parameters=parameters)

In [None]:
# Vaccination
vacc_df = get_base_vacc_data()
masks = get_vacc_data_masks(vacc_df)
vacc_df, lagged_df = add_derived_data_to_vacc(vacc_df)
full_prop_df = get_full_vacc_props(vacc_df, masks['age 16+, 2+ doses'])
full_vacc_fig = plot_full_vacc(masks['age 16+, 2+ doses'], vacc_df, full_prop_df)
caption = "Second (`full') dose roll-out by age group. Number of persons receiving second dose (upper panel) and " \
    'proportion of population having received second dose (lower panel). ' \
    'Age groups coloured from cyan (12 to 15 years-old) to purple (95+ years-old). ' \
    'Some proportions exceed one, which is a feature of the source data. ' \
    'Although the explanation for this observation is not provided, ' \
    'it presumably relates to an underestimate of the true denominators due to population changes. '
add_image_to_doc(full_vacc_fig.update_layout(showlegend=False), 'full_vacc', 'svg', 'Full vaccination coverage', app_doc, 'Vaccination extension', caption=caption)
coverage_by_program = plot_program_coverage({m: masks[m] for m in ['age 16+, 3+ doses', 'age 16+, 4+ doses', 'age 12-15, 2+ doses', 'age 5-11, 2+ doses']}, vacc_df)
add_image_to_doc(coverage_by_program, 'program_coverage', 'svg', 'Vaccination coverage by subsequent programs.', app_doc, 'Vaccination extension')
caption = 'Booster program for persons aged 16 and above (red line), primary vaccination course for persons aged 5 to 11.'
implement_fig = plot_vacc_implementation(vacc_df)
add_image_to_doc(implement_fig.update_layout(showlegend=False), 'vacc_implement', 'svg', 'Vaccination implementation.', app_doc, 'Vaccination extension', caption=caption)
vacc_distribution = plot_immune_props(vacc_model, vacc_df, lagged_df)
caption = 'Reported vaccination coverage by program (dashed black line), and lagged vaccination coverage (dotted black line). ' \
    'Coloured areas represent distribution of population by vaccination status under vaccination extension: not yet vaccinated under program (green), ' \
    'recently vaccinated under program (red), and vaccinated under program but protective effect lost (blue).'
add_image_to_doc(vacc_distribution.update_layout(showlegend=False), 'vaccination_distribution', 'svg', 'Comparison of reported to modelled vaccination status distribution. ', app_doc, 'Vaccination extension', caption=caption)
add_vaccination_blurb_to_tex(app_doc)

In [None]:
# Example matrices
example_matrix_fig = plot_example_model_matrices(epi_model, parameters)
add_image_to_doc(example_matrix_fig.update_layout(height=550), 'example_matrices', 'svg', 'Dynamic mixing matrices.', app_doc, 'Mixing')

In [None]:
# Spaghetti plots
spaghettis = {k: pd.read_hdf(RUNS_PATH / v / 'output/results.hdf', 'spaghetti') for k, v in RUN_IDS.items()}
spaghetti = spaghettis[PRIMARY_ANALYSIS]
key_indicators_fig = plot_spaghetti(spaghetti, ['notifications_ma', 'deaths_ma', 'adult_seropos_prop', 'reproduction_number'], 2, targets)
key_outputs_title = 'Key outputs for randomly sampled runs from calibration algorithm.'
add_image_to_doc(key_indicators_fig.update_layout(showlegend=False), 'multioutput_spaghetti', 'svg', key_outputs_title, app_doc, 'Calibration results')
variant_prop_caption = 'Proportion of modelled cases attributable to each sub-variant over time. ' \
    'Solid curved lines, proportion of prevalence attributable to BA.1, ' \
    'Dashed curved lines, proportion of prevalence attributable to BA.1 or BA.2. ' \
    'Key dates for each variant are shown as vertical bars: blue, BA.1; red, BA.2; green, BA.5; ' \
    'dotted, first detection; dashed, \>1\%; solid, \>50\%. '
variant_prop_fig = plot_subvariant_props(spaghetti)
add_image_to_doc(variant_prop_fig, 'variant_prop_spaghetti', 'svg', 'Proportional prevalence of modelled sub-variants.', app_doc, 'Calibration results', caption=variant_prop_caption)

In [None]:
# Get calibration results
idata = az.from_netcdf(RUNS_PATH / RUN_IDS[PRIMARY_ANALYSIS] / 'output/calib_full_out.nc')
idata = idata.sel(draw=np.s_[BURN_IN:])
n_samples = 12
sampled_idata = az.extract(idata, num_samples=n_samples)
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]
quantile_outputs = {k: esamp.quantiles_for_results(spaghettis[k], quantiles) for k in RUN_IDS.keys()}

In [None]:
# CDR examples
cdr_caption = 'Examples of the modelled effect of various starting CDR proportion parameters.'
cdr_fig = plot_cdr_examples(sampled_idata.variables['start_cdr'])
caption = f'Modelled case detection ratio over time for {str(n_samples)} randomly selected parameter draws from calibration algorithm.'
add_image_to_doc(cdr_fig, 'cdr_examples', 'svg', cdr_caption, app_doc, 'Outputs', caption=caption)

In [None]:
# Example outputs figure
outputs_fig = plot_single_run_outputs(epi_model, targets)
add_image_to_doc(outputs_fig.update_layout(showlegend=False), 'single_run_outputs', 'svg', 'Outputs from single model run', app_doc, 'Outputs')

In [None]:
# Infection processes figure
infection_fig = plot_infection_processes(epi_model.get_derived_outputs_df(), targets, 'notifications_ma')
caption = 'Colour shows infection with BA.1 (greens), BA.2 (blues) and BA.5 (purples). ' \
    'Shading depth shows infection process, with initial infection (dark), early reinfection (intermediate darkness), late reinfection (light).' \
    'Note early reinfection with BA.1 does not occur to a significant extent. '
add_image_to_doc(infection_fig.update_layout(showlegend=False), 'infect_process', 'svg', 'Simulated infection processes for maximum likelihood results', app_doc, 'Outputs', caption=caption)
output_text = 'Results from an example run are presented in Figure \\ref{single_run_outputs}, ' \
    'Figure \\ref{example_matrix} and Figure \\ref{infect_process}, and model construction is described in the following sections. '
app_doc.add_line(output_text, 'Outputs')

In [None]:
# Credible interval plots
case_ranges_by_analysis = plot_output_ranges_by_analysis(quantile_outputs, targets, 'notifications_ma', quantiles)
title = 'Cases credible intervals by analysis.'
caption_end = ' with median estimate (black line), 2.5 to 97.5 centile credible interval ' \
    '(light blue shading), and 25 to 75 centile credible interval (dark blue shading), ' \
    'with comparison against epidemiological targets (red circles). ' \
    'Panel for each of the four candidate analyses. '
caption = f'Case notifications {caption_end}'
add_image_to_doc(case_ranges_by_analysis, 'case_ranges', 'svg', title, app_doc, 'Analysis comparison', caption=caption)
death_ranges_by_analysis = plot_output_ranges_by_analysis(quantile_outputs, targets, 'deaths_ma', quantiles)
caption = f'Deaths {caption_end}'
add_image_to_doc(death_ranges_by_analysis, 'death_ranges', 'svg', 'Deaths credible intervals by analysis.', app_doc, 'Analysis comparison', caption=caption)
seropos_ranges_by_analysis = plot_output_ranges_by_analysis(quantile_outputs, targets, 'adult_seropos_prop', quantiles)
caption = f'Seropositive proportion {caption_end}'
add_image_to_doc(seropos_ranges_by_analysis, 'seropos_ranges', 'svg', 'Seropositive proportion credible intervals by analysis.', app_doc, 'Analysis comparison', caption=caption)
outputs = ['notifications_ma', 'deaths_ma', 'adult_seropos_prop', 'reproduction_number']
base_analysis_ranges = plot_output_ranges(quantile_outputs, targets, outputs, PRIMARY_ANALYSIS, quantiles)
caption = 'Primary analysis model median estimate (black line), 2.5 to 97.5 centile credible interval ' \
    '(light blue shading), and 25 to 75 centile credible interval (dark blue shading), ' \
    'with comparison against epidemiological targets (red circles). ' \
    'Panel for each epidemiological output. '
add_image_to_doc(base_analysis_ranges, 'base_ranges', 'svg', 'Primary analysis output credible intervals.', app_doc, 'Calibration results', caption=caption)
range_text = 'Results for the credible intervals around the main outputs compared against their targets are presented in Figure \\ref{base_ranges}. '
app_doc.add_line(range_text, 'Calibration results')

In [None]:
# Dispersion
prior_names = [p.name for p in get_priors(False, param_info['abbreviations'], dummy_doc)]
dispersion_fig = plot_dispersion_examples(idata, epi_model, parameters, prior_names, targets, ['10, 10, 100', '100, 10, 10'], np.linspace(0.1, 0.9, 9)).update_layout(showlegend=False)
caption = 'Examples of the effect of values of the negative binomial distribution dispersion parameter, ' \
    'centiles of likelihood distribution. Actual targets used for likelihood calculation circles. '
add_image_to_doc(dispersion_fig.update_layout(showlegend=False), 'dispersion_examples', 'svg', 'Dispersion examples.', app_doc, 'Targets', caption=caption)
add_dispersion_blurb_to_tex(app_doc)

In [None]:
# Likelihood comparison
like_outputs = get_like_components(['loglikelihood', 'll_adult_seropos_prop', 'll_deaths_ma', 'll_notifications_ma'])
requests = {
    'loglikelihood': (-28.0, -12.0, 'total likelihood'),
    'll_adult_seropos_prop': (-3.0, 2.0, 'seroprevalence contribution'),
    'll_deaths_ma': (-10.0, -3.0, 'deaths contribution'),
    'll_notifications_ma': (-17.0, -10.0, 'cases contribution'),
}
like_comparison_fig = plot_like_components_by_analysis(like_outputs, 'kdeplot', plot_requests=requests, alpha=0.1, linewidth=1.5);
caption = 'Comparison of the kernel density distribution of the final likelihood from calibration algorithm, ' \
    'with the contributions to the final likelihood of the three targets from which it was constructed.'
add_image_to_doc(like_comparison_fig, 'like_comparison', 'svg', 'Likelihood comparison, kernel densities.', app_doc, 'Analysis comparison', caption=caption)
plt.close()
add_likelihood_blurb_to_tex(app_doc)

In [None]:
# Calibration results
priors = get_priors(PRIMARY_ANALYSIS in ['vacc', 'both'], param_info['abbreviations'], dummy_doc)
prior_names = [p.name for p in priors]
first_third_priors = prior_names[:6]
middle_third_priors = prior_names[6: 12]
last_third_priors = prior_names[12:]
trace_fig1 = plot_param_progression(idata, param_info['descriptions'], req_vars=first_third_priors)
trace_fig2 = plot_param_progression(idata, param_info['descriptions'], req_vars=middle_third_priors)
trace_fig3 = plot_param_progression(idata, param_info['descriptions'], req_vars=last_third_priors)
calibration_table = tabulate_calib_results(idata, priors, param_info)
app_doc.include_table(calibration_table, 'calibration_metrics', 'Calibration metrics', section='Calibration results', subsection='Calibration performance', col_splits=[0.142] * 7, table_width=12.0, longtable=True)
trace_figs = [trace_fig1, trace_fig2, trace_fig3]
for i_fig in range(1, 4):
    add_image_to_doc(trace_figs[i_fig - 1], f'trace_fig_{i_fig}', 'svg', f'Parameter posteriors and traces by chain, {i_fig}.', app_doc, 'Calibration results', subsection='Parameter inference')

In [None]:
# Parameter inference results for all alternative analyses
for analysis in RUN_IDS.keys():
    alternative_idata = az.from_netcdf(RUNS_PATH / RUN_IDS[analysis] / 'output/calib_full_out.nc')
    priors = get_priors(ANALYSIS_FEATURES[analysis]['vacc'], param_info['abbreviations'], dummy_doc)
    prior_names = [p.name for p in priors]
    n_half_priors = round(len(priors) / 2)
    first_priors = prior_names[:n_half_priors]
    last_priors = prior_names[n_half_priors:]
    comp_fig1 = plot_posterior_comparison(alternative_idata, priors, first_priors, param_info['abbreviations'].to_dict(), 0.995, param_info["units"])
    comp_fig2 = plot_posterior_comparison(alternative_idata, priors, last_priors, param_info['abbreviations'].to_dict(), 0.995, param_info["units"])
    comp_figs = [comp_fig1, comp_fig2]
    for i_fig in range(1, 3):
        caption = 'Inferred parameter posterior densities (blue areas) compared against corresponding calibration algorithm prior distributions (grey areas).'
        add_image_to_doc(comp_figs[i_fig - 1], f'comp_fig_{i_fig}_{analysis}', 'svg', f"Posterior densities and prior distributions under the `{analysis}' analysis, {i_fig}.", app_doc, 'Calibration results', subsection='Parameter inference', caption=caption, fig_width=1.0)
add_calibration_blurb_to_tex(app_doc)

In [None]:
# Parameter correlation
az.rcParams['plot.max_subplots'] = 200
mpl.rcParams['axes.facecolor'] = (0.2, 0.2, 0.4)
epi_params = [param for param in idata.posterior.keys() if '_dispersion' not in param]
key_params = ['contact_rate', 'latent_period', 'infectious_period', 'natural_immunity_period', 'start_cdr', 'imm_infect_protect', 'ba2_escape', 'ba5_escape', 'imm_prop']
imm_params = ['natural_immunity_period', 'imm_infect_protect', 'ba2_escape', 'ba5_escape', 'imm_prop']
fig = az.plot_pair(idata, var_names=epi_params, kind='kde', textsize=35, labeller=MapLabeller(var_name_map=param_info['abbreviations']));
add_image_to_doc(fig, 'all_param_correlation', 'svg', 'All parameter correlation plot matrix.', app_doc, 'Calibration results', subsection='Parameter correlation', fig_width=1.0)
plt.close()
fig = az.plot_pair(idata, var_names=key_params, kind='kde', textsize=30, labeller=MapLabeller(var_name_map=param_info['abbreviations']));
add_image_to_doc(fig, 'key_param_correlation', 'svg', 'Selected parameter correlation plot matrix.', app_doc, 'Calibration results', subsection='Parameter correlation', fig_width=1.0)
plt.close()
fig = az.plot_pair(idata, var_names=imm_params, kind='kde', textsize=20, labeller=MapLabeller(var_name_map=param_info['abbreviations']));
add_image_to_doc(fig, 'imm_param_correlation', 'svg', 'Immunity-related parameter correlation plot matrix.', app_doc, 'Calibration results', subsection='Parameter correlation', fig_width=1.0)
plt.close()
blurb = 'Figures \\ref{all_param_correlation}, \\ref{key_param_correlation} and \\ref{imm_param_correlation} ' \
    'show the bivariate distributions of various sets of pairs of parameters used in the calibration algorithm. '
app_doc.add_line(blurb, 'Calibration results', subsection='Parameter correlation')

In [None]:
section_order = [
    'Approach to analyses',
    'Base compartmental structure',
    'Population',
    'Stratification',
    'Reinfection',
    'Mixing',
    'Mobility extension',
    'Vaccination extension',
    'Outputs',
    'Parameters',
    'Targets',
    'Calibration methods',
    'Analysis comparison',
    'Calibration results',
]
app_doc.write_doc(order=section_order)

In [None]:
if on_colab:
    # To build a PDF, we need the appropriate tex packages installed
    ! apt-get -y update
    ! apt-get -y install inkscape texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra texlive-bibtex-extra biber
    # To avoid clutter and navigate to the right directory
    import os
    os.chdir(SUPPLEMENT_PATH)
    # And finally build the formatted PDF, repeated commands are necessary
    ! pdflatex -shell-escape supplement
    ! biber supplement
    ! pdflatex -shell-escape supplement
    ! pdflatex -shell-escape supplement