# Type-II MAP vs MCMC (Scenario A, Seed 0)
- Goal: Compare posterior quality and runtime across inference methods.
- Flow: Run smoke test, execute full comparison, and export summary tables/figures.


In [1]:
from __future__ import annotations

import importlib.util
import sys
from pathlib import Path
import os
# Avoid matplotlib cache warnings in restricted environments
os.environ.setdefault('MPLCONFIGDIR', str(Path('/tmp') / 'mplconfig_bapc'))
import numpy as np
import pandas as pd

# Ensure project root is on sys.path when running from notebooks/
_cwd = Path.cwd().resolve()
ROOT = _cwd if (_cwd / 'src').exists() else _cwd.parent
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

print('[1/7] Importing project modules...')
from src.data import simulate_cohort_data
from src.models import CohortPeriodModel
from src.evaluation import run_scenario_a_single_n, save_method_artifacts
print('[1/7] Imports ready.')

RESULT_ROOT = ROOT / 'results' / 'simulations' / 'method_comparison' / 'A'
SUMMARY_ROOT = ROOT / 'results' / 'summary_simulations'
RESULT_ROOT.mkdir(parents=True, exist_ok=True)
SUMMARY_ROOT.mkdir(parents=True, exist_ok=True)
print('[2/7] Output directories ready.')
print('Root:', ROOT)
print('Result root:', RESULT_ROOT)


[1/7] Importing project modules...
[1/7] Imports ready.
[2/7] Output directories ready.
Root: /Users/leyaotan/Desktop/bapc-finalproject/BayesianWorkflow-Final
Result root: /Users/leyaotan/Desktop/bapc-finalproject/BayesianWorkflow-Final/results/simulations/method_comparison/A


In [2]:
print('[3/7] Checking MCMC dependencies...')
HAS_JAX = importlib.util.find_spec('jax') is not None
HAS_NUMPYRO = importlib.util.find_spec('numpyro') is not None
HAS_MCMC = HAS_JAX and HAS_NUMPYRO
print({'jax': HAS_JAX, 'numpyro': HAS_NUMPYRO, 'full_3_method_run_available': HAS_MCMC})
print('[3/7] Dependency check complete.')


[3/7] Checking MCMC dependencies...
{'jax': True, 'numpyro': True, 'full_3_method_run_available': True}
[3/7] Dependency check complete.


In [3]:
print('[4/7] Building run configuration...')
# Scenario A design
SEED = 0
SCENARIO = 'A'
N_LIST = [100, 300, 500]
N_COHORTS = 10
N_PERIODS = 10

DGP_PARAMS = {
    'ell_c': 2.0,
    'sf_c': 1.0,
    'ell_t': 3.0,
    'sf_t': 1.0,
    'sn': 0.3,
}

TYPE2MAP_CFG = {'use_global_mean': False, 'n_laplace_samples': 200, 'n_map_steps': 50}
NUTS_HYPER_CFG = {
    'seed': SEED,
    'use_global_mean': False,
    'num_warmup': 500,
    'num_samples': 500,
    'num_chains': 1,
    'jitter': 1e-6,
    'prior_log_sd': 2.0,
    'prior_mu_sd': 5.0,
    'return_samples': True,
}
FULL_NUTS_CFG = {
    'seed': SEED,
    'use_global_mean': False,
    'num_warmup': 500,
    'num_samples': 500,
    'num_chains': 1,
    'jitter': 1e-6,
    'prior_log_sd': 2.0,
    'prior_mu_sd': 5.0,
    'return_samples': True,
}
print('[4/7] Config ready:', {'scenario': SCENARIO, 'seed': SEED, 'sizes': N_LIST})


[4/7] Building run configuration...
[4/7] Config ready: {'scenario': 'A', 'seed': 0, 'sizes': [100, 300, 500]}


## Smoke Test
- Step: Run a minimal Type-II MAP check to verify setup.
- Output: Quick sanity check before full run.


In [4]:
print('[5/7] Running Type-II MAP smoke test (A, N=100)...')
sim = simulate_cohort_data(
    n_cohorts=N_COHORTS,
    n_periods=N_PERIODS,
    n_reps=1,
    interaction=False,
    seed=SEED,
    **DGP_PARAMS,
)

m1 = CohortPeriodModel(**TYPE2MAP_CFG)
m1.fit(
    y=sim['y'],
    obs_c=sim['obs_c'],
    obs_t=sim['obs_t'],
    n_cohorts=N_COHORTS,
    n_periods=N_PERIODS,
    seed=SEED,
    true_hyperparams=sim.get('hyperparams'),
)
m1_res = m1.results_dict()
print('[5/7] Smoke test complete. MAP hyperparams:', m1_res['hyperparams_map'])


[5/7] Running Type-II MAP smoke test (A, N=100)...
[5/7] Smoke test complete. MAP hyperparams: {'ell_c': 1.9283849277150205, 'sf_c': 1.1643554556869224, 'ell_t': 4.864192179067615, 'sf_t': 0.28884423459961356, 'sn': 0.2821756084974834}


## Full Run
- Step: Execute all configured methods and sample-size settings.
- Save: Posterior comparison figures and runtime/error summary tables.


In [None]:
print('[6/7] Starting full Scenario A run (3 methods x 3 sizes)...')
if not HAS_MCMC:
    raise RuntimeError(
        'jax/numpyro not installed in this environment. '
        'Install them, then rerun this cell for full 3-method comparison.'
    )

all_comp = []
all_speed = []
run_outputs = {}

for i, n_total in enumerate(N_LIST, start=1):
    print(f'  -> [{i}/{len(N_LIST)}] Running N={n_total}, seed={SEED} ...')
    run_out = run_scenario_a_single_n(
        n_total=n_total,
        seed=SEED,
        n_cohorts=N_COHORTS,
        n_periods=N_PERIODS,
        dgp_params=DGP_PARAMS,
        type2map_cfg=TYPE2MAP_CFG,
        nuts_hyper_cfg=NUTS_HYPER_CFG,
        full_nuts_cfg=FULL_NUTS_CFG,
    )

    n_tag = f'n{n_total}'
    out_dir = RESULT_ROOT / n_tag / f'seed{SEED}'
    paths = save_method_artifacts(output_dir=out_dir, run_output=run_out)

    run_outputs[n_total] = {'run': run_out, 'paths': paths}
    all_comp.append(run_out['comparison_table'])
    all_speed.append(run_out['speed_table'])

    print(f'     saved -> {out_dir}')
    if paths.get('plot_error'):
        print('     plot warning:', paths['plot_error'])
    m1t = run_out['method_results']['method1']['runtime_seconds']
    m2t = run_out['method_results']['method2']['runtime_seconds']
    m3t = run_out['method_results']['method3']['runtime_seconds']
    print(f'     times (s): M1={m1t:.3f}, M2={m2t:.3f}, M3={m3t:.3f}')

comparison_all = pd.concat(all_comp, ignore_index=True)
speed_all = pd.concat(all_speed, ignore_index=True)

comparison_path = SUMMARY_ROOT / 'table_type2map_vs_mcmc_A_seed0_all.csv'
speed_path = SUMMARY_ROOT / 'table_type2map_vs_mcmc_A_speed_seed0_all.csv'
comparison_all.to_csv(comparison_path, index=False)
speed_all.to_csv(speed_path, index=False)

print('[7/7] Full run complete.')
print('Wrote:', comparison_path)
print('Wrote:', speed_path)
display(comparison_all)
display(speed_all)


[6/7] Starting full Scenario A run (3 methods x 3 sizes)...
  -> [1/3] Running N=100, seed=0 ...
Figure saved -> /Users/leyaotan/Desktop/bapc-finalproject/BayesianWorkflow-Final/results/simulations/method_comparison/A/n100/seed0/compare_posterior_beta_seed0.png
Figure saved -> /Users/leyaotan/Desktop/bapc-finalproject/BayesianWorkflow-Final/results/simulations/method_comparison/A/n100/seed0/compare_hyperparam_posteriors_seed0.png
     saved -> /Users/leyaotan/Desktop/bapc-finalproject/BayesianWorkflow-Final/results/simulations/method_comparison/A/n100/seed0
     times (s): M1=0.170, M2=6.722, M3=177.726
  -> [2/3] Running N=300, seed=0 ...
