In [None]:
from IPython.display import display, Markdown, HTML
from tbh.paths import REPO_ROOT_PATH, DATA_FOLDER
analysis_path = REPO_ROOT_PATH / "remote_cluster" / "outputs" / "49167504_full_analysis" / "task_1"

In [None]:
import tbh.plotting as pl
import pandas as pd
import arviz as az
from matplotlib import pyplot as plt 
plt.style.use("ggplot")

intervention_scenarios = ["scenario_1", "scenario_2", "scenario_3"]
all_scenarios = ['baseline'] + intervention_scenarios
unc_dfs = {
    sc: pd.read_parquet(analysis_path / f"uncertainty_df_{sc}.parquet") for sc in all_scenarios
}
diff_outputs_dfs = {
    sc: pd.read_parquet(analysis_path / f"diff_quantiles_df_ref_baseline_{sc}.parquet") for sc in intervention_scenarios
}
idata = az.from_netcdf(analysis_path / "idata.nc")

In [None]:
import yaml

with open(analysis_path / "details.yaml" , "r") as f:
    docs = list(yaml.safe_load_all(f))

model_config = docs[1]
analysis_config = docs[2]

In [None]:
from tbh import runner_tools as rt
from tbh.model import get_tb_model
from estival.model import BayesianCompartmentalModel


params, priors, tv_params = rt.get_parameters_and_priors()

model = get_tb_model(model_config, tv_params)
bcm = BayesianCompartmentalModel(model, params, priors, rt.targets)

# Background and Introduction
## Aims and simulated population
This modelling analysis aims to explore and compare various scenarios of screening for TB and TBI in Kiribati. It focuses on simulating what the next phase of PEARL screening could potentially look like under different diagnostic approaches and considering different screening rates.
For this purpose, the simulated population is the population of South Tarawa excluding that already screened by the end of 2025. Thus, the model is designed to capture the part of South Tarawa extending from Nanikai to Bonriki.

We will use the data collected to date through the PEARL study to estimate TB and TBI prevalence in the population that has already been screened and will assume that similar epidemiological patterns apply to the population yet to be screened. These prevalence estimates will serve as calibration targets for the modelling exercise, with certain parameters automatically adjusted to ensure that the model reproduces the observed estimates.

## Modelled scenarios
The table below summarises the screening scenarios evaluated in this analysis. Scenario 0 represents a base case with no active case finding, included for comparison. Scenarios 1–5 progressively increase screening rate by expanding target coverage and modifying screening components (CXR, Xpert, and TST). Each scenario is defined by the number of individuals targeted from the total South Tarawa population that could potentially be reached by the PEARL activities in 2026 (up to 35,000 people). The “CXR”, “Xpert”, and “TST” columns indicate the population subgroups eligible for each respective screening component. For all scenarios, we assume a TB treatment success rate of 95% and a TPT completion rate of 70%.

| Sc.| Strategy / Coverage             | Target (/35k)| Enrolled | CXR  | Xpert | TST |
|---|----------------------------------|--------------|----------|------|-------|-----|
| 0 | No screening - for comparison    | 0            | 0%       | No   | No    | No  |
| 1 | Current approach / Low           | 15k          | 85%      | 3+   | 35% of 10+| 3+  |
| 2 | Current approach / Med           | 20k          | 85%      | 3+   | 35% of 10+| 3+  |
| 3 | Drop Xpert / High                | 25k          | 85%      | 3+   | No        | 3+  |
| 4 | Drop Xpert – Screen 10+ / VHigh  | 30k          | 85%      | 10+  | No        | 10+ |
| 5 | Drop Xpert & TST / Max           | 35k          | 85%      | 3+   | No        | No  |

# Model calibration and modelled baseline epidemic
## Model fits to data
@fig-multifit shows the model fits to calibration targets derived from data collected during the intervention. The black dots represent the observed quantities, whereas model estimates are shown in blue (solid line: median; dark shade: interquartile range; light shade: 95% CI).

In [None]:
#| fig-pos: "H" 
#| label: fig-multifit
#| fig-cap: "Model fits to observations. The black dots represent the observed quantities, whereas model estimates are shown in blue (solid line: median; dark shade: interquartile range; light shade: 95% CI)."
fig = pl.plot_all_model_fits(unc_dfs['baseline'], bcm, n_col=2, excluded_outputs=[o for o in bcm.targets if o.startswith("measured_tbi_prevalenceXage")])
display(fig)
plt.close(fig)

## Parameters' posterior distributions
@fig-posteriors shows a comparison between the prior and posterior distributions of the calibrated model parameters. The prior distributions, shown in grey, reflect our knowledge about the parameter values **before** running the analysis and before considering the data used for calibration. Prior distributions are often chosen to be non-informative when there is very little evidence available to inform the parameter value. The posterior distributions, displayed in red, show the parameter values accepted during model calibration. These are the values found to be 'plausible', or appropriate to produce reasonable fits to the observations.

In [None]:
#| fig-pos: "H" 
#| label: fig-posteriors
#| fig-cap: "Prior and posterior distributions of calibrated parameters. Prior distributions are shown in grey, and posterior distributions in red."
fig = pl.plot_post_prior_comparison(idata, analysis_config['burn_in'], req_vars=list(bcm.priors.keys()),
                              priors=list(bcm.priors.values()))
display(fig)
plt.close(fig)

## Model outputs for non-fitted epidemic indicators
### Past epidemic trajectories

In [None]:
# selected_outputs = ['tb_incidence_per100k', 'tb_mortality_per100k', 'viable_tbi_prevalence_perc', 'passive_detection_rate_clin']
selected_outputs = ['tb_incidence_per100k', 'tb_mortality_per100k', 'passive_detection_rate_clin']

for output in selected_outputs:
    out_name = output if output not in pl.title_lookup else pl.title_lookup[output]
    display(Markdown(f"**{out_name}**"))
    fig, ax = plt.subplots(figsize=(5, 3))
    x_min = 1950 if output == "passive_detection_rate_clin" else 1990
    pl.plot_model_fit_with_uncertainty(ax, unc_dfs['baseline'], output, bcm, x_lim=(x_min, 2025), ylab_fontsize=10)
    display(fig)
    plt.close(fig)


### Estimated age-specific TST positivity rates

In [None]:
#| fig-pos: "H" 
#| label: fig-age_tbi
#| fig-cap: "Age-specific TST positivity rate (observed and modelled). Red crosses indicate the measured TST positivity rate (%). Blue boxes represent the model estimates (median, interquartile range, 95% CI)."
fig = pl.plot_age_spec_tbi_prev(unc_dfs['baseline'], bcm)
display(fig)
plt.close(fig)

# Projected trajectories under various screening scenarios

In [None]:
outputs_to_plot = ['tb_incidence_per100k', 'viable_tbi_prevalence_perc', 'measured_tbi_prevalence_perc', 'tb_mortality_per100k']

for sc in intervention_scenarios:
    display(Markdown(f"## {sc}"))
    for output in outputs_to_plot:
        out_name = output if output not in pl.title_lookup else pl.title_lookup[output]
        display(Markdown(f"**{out_name}**"))
        fig, ax = plt.subplots(figsize=(5, 3))
        pl.plot_two_scenarios(ax, unc_dfs, output, scenarios=['baseline', sc], xlim=(2015,2050), include_unc=True, ylab_fontsize=10)
        display(fig)
        plt.close(fig)


# Estimated cumulative impact of interventions on TB disease and mortality

## Cumulative outputs over 2020-2050

In [None]:
for output in ['cum_tb_incidence', 'cum_tb_mortality']:
    
    fig, ax = plt.subplots(1, 1)
    pl.plot_final_size_compare(ax,unc_dfs, output, all_scenarios)
    display(fig)
    plt.close(fig)

## TB episodes and deaths averted over 2020-2050 (ref. no intervention)

In [None]:
for output in ["TB_averted", "TB_averted_relative"]:
    fig, ax = plt.subplots(1, 1)
    pl.plot_diff_outputs(ax, diff_outputs_dfs, output, intervention_scenarios)
    display(fig)
    plt.close(fig)

# Model structure and parameters

![Model structure. Not shown: age-stratification; natural mortality (all compartments); TB mortality (clinical TB compartments); self-recovery (subclinical TB compartments); reinfection from 'Contained', 'Cleared' and 'Recovered' compartments transitions back to 'Incipient'.](tb_model.png){#fig:tb_model width=100%}

In [None]:
#| tab-params: "H" 
#| label: tab-params
#| tab-cap: "Model parameters"
params_file_path = DATA_FOLDER / "parameters.xlsx"
param_df = pd.read_excel(params_file_path, sheet_name="constant")
param_df["value_or_prior"] = param_df.apply(
    lambda row: f"{row['distribution']} ({row['distri_param1']}, {row['distri_param2']})"
    if pd.notna(row['distribution'])
    else str(row['value']),
    axis=1
)
param_df = param_df.rename(columns={"full_text": "definition"})
param_df = param_df.fillna("")
md_table = param_df[["parameter", "definition", "value_or_prior", "unit"]].to_markdown(index=False)  # index=False to skip the row numbers
display(Markdown(md_table))

In [None]:
from importlib import reload
reload(pl);

In [None]:
#| fig-pos: "H" 
#| label: fig-pairs
#| fig-cap: "Pairwise relationships between calibrated parameters, with 2D kernel density estimates shown. The posterior mode is indicated by a black dot."
fig = pl.plot_posterior_pairs(idata, analysis_config['burn_in'], list(bcm.priors.keys()), 'kde')
display(fig)
plt.close(fig)