In [9]:
import arviz as az
from pathlib import Path
from datetime import datetime
import yaml

from estival.sampling import tools as esamptools

import utils as ut
import model as md

parent_output_folder = Path.cwd() / "outputs"

In [10]:
TEST_ANALYSIS_CONFIG = {
    'opti_budget': 100,

    'mcmc_chains': 4,
    'mcmc_cores': 4,
    'mcmc_tune': 100,
    'mcmc_samples': 100,

    'full_runs_burnin': 50,
    'full_runs_samples': 100,
}

FULL_ANALYSIS_CONFIG = {
    'opti_budget': 10000,

    'mcmc_chains': 4,
    'mcmc_cores': 4,
    'mcmc_tune': 1000,
    'mcmc_samples': 5000,

    'full_runs_burnin': 2000,
    'full_runs_samples': 1000,
}

In [11]:
model_config = {
    "start_time": 1850,
    "end_time": 2050,
    "population": 1.e6,
    "seed": 100,   
    "intervention_time": 2025,
}

intervention_params = {
    "transmission_reduction": {
        "rel_reduction": .20
    },
    "preventive_treatment": {
        "rate": .10,
        "efficacy": .8
    },
    "faster_detection": {
        "detection_rate_mutliplier": 2.
    },
    "improved_treatment": {
        "negative_outcomes_rel_reduction": .50
    }
}

# Optimisation

### Find optimal parameter set, varying all parameters, using model with no intervention

In [4]:
model = md.get_tb_model(model_config, intervention_params, active_interventions=[])
mle_params = ut.find_mle(model, opti_budget=analysis_config['opti_budget'])

with open("mle_params.yml", 'w') as file:
    yaml.dump(mle_params, file, default_flow_style=False)

### Check optimal model fit

In [3]:
model.run(ut.default_params | mle_params)
do = model.get_derived_outputs_df()
do['tb_prevalence_per100k'].loc[2010:].plot()
ut.target_data.plot(style='.',color='red')

# Main Analysis 

### Run Metropolis sampling 

In [12]:
mle_path = Path.cwd() / "data" / "mle_params.yml"
with open(mle_path, 'r') as file:
    mle_params = yaml.safe_load(file)

In [17]:
from copy import deepcopy

def run_analysis(fixed_param, mle_params, analysis_config, model_config, intervention_params, folder_path):
    
    folder_path.mkdir(parents=True, exist_ok=True)
    model = md.get_tb_model(model_config, intervention_params, active_interventions=[])

    if mle_params is None:
        mle_params = ut.find_mle(model, opti_budget=analysis_config['opti_budget'])

    print(f"Running Metropolis sampling fixing {fixed_param}")
    idata = ut.run_sampling(model, mle_params, fixed_param, draws=analysis_config['mcmc_samples'], tune=analysis_config['mcmc_tune'], cores=analysis_config['mcmc_cores'], chains=analysis_config['mcmc_chains'])
    idata.to_netcdf(folder_path / f"idata_{fixed_param}.nc")

    print(f"Running full runs fixing {fixed_param}")
    chain_length = idata.sample_stats.sizes['draw']
    burnt_idata = idata.sel(draw=range(analysis_config['full_runs_burnin'], chain_length))  # Discard burn-in
    full_run_param_samples =  az.extract(burnt_idata, num_samples=analysis_config['full_runs_samples'])
    (folder_path / "full_runs").mkdir(exist_ok=True)
    (folder_path / "diff_output_dfs").mkdir(exist_ok=True)
    for intervention in [None] + list(intervention_params.keys()):
        active_interventions = [intervention] if intervention else []
        model = md.get_tb_model(model_config, intervention_params, active_interventions)
        bcm = ut.get_bcm_object(model, ut.default_params | mle_params, fixed_param)
        full_runs = esamptools.model_results_for_samples(full_run_param_samples, bcm)
        full_runs.results.to_parquet(folder_path / "full_runs" / f"fullruns_{fixed_param}_{intervention}.parquet")

        if intervention is None:
            ref_full_runs = deepcopy(full_runs)
        else:
            diff_output_dfs = ut.calculate_diff_output_quantiles(ref_full_runs, full_runs)
            diff_output_dfs.to_csv(folder_path / "diff_output_dfs" / f"diff_outputs_{fixed_param}_{intervention}.csv")


### Run full scenario runs for sampled parameters

In [18]:
fixed_param = None
analysis_config = TEST_ANALYSIS_CONFIG
analysis_name = 'test' 
folder_path = parent_output_folder / f"{datetime.now().strftime('%Y_%m_%d@%H_%M_%S')}_{analysis_name}_fixed_{fixed_param}"

run_analysis(fixed_param, mle_params, analysis_config, model_config, intervention_params, folder_path)

Running Metropolis sampling fixing None


Only 100 samples in chain.
Multiprocess sampling (4 chains in 4 jobs)
DEMetropolisZ: [transmission_rate, activation_rate_early, activation_rate_late, stabilisation_rate, rr_reinfection_latent_late, rr_reinfection_recovered, self_recovery_rate, tb_death_rate, current_passive_detection_rate]


Sampling 4 chains for 100 tune and 100 draw iterations (400 + 400 draws total) took 48 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


Running full runs fixing None
