# Compare Bayes runs: distance on vs off

This notebook is intentionally thin.

- It imports the typed pipeline in `src/`.
- It writes run artifacts into `data/runs/`.
- Then you can open `bayes_streamlit.py` and use **Compare two runs** to see deltas.



In [1]:
import asyncio
import os
import sys

import pandas as pd

# Ensure repo root is on sys.path so `import src...` works even when Jupyter's cwd is `notebooks/`.
sys.path.insert(0, os.path.abspath(".."))

from src.pipeline import DatasetBuildConfig, build_dataset, run_bayes, run_heuristic, write_run_artifacts

# Settings
LABEL_BASE = "nb-compare-distance"
# Always write artifacts into the repo-root data/runs, even if the notebook cwd is `notebooks/`.
RUNS_DIR = os.path.abspath(os.path.join("..", "data", "runs"))

# Keep these smaller for notebook iteration; increase for serious runs.
DRAWS = 800
TUNE = 800
TARGET_ACCEPT = 0.95
SEED = 0
HDI_PROB = 0.9

# Build dataset once (distance ON so uk_distance_km exists; the model can ignore it when use_distance=False)
df = await build_dataset(DatasetBuildConfig(dataset_csv=None, use_distance=True))

heur = run_heuristic(
    df,
    use_language_factor=False,
    language_english_factor=1.25,
    language_euro_latin_factor=1.0,
    language_other_factor=0.75,
)

# Run A: with distance
bayes_dist = run_bayes(
    df,
    use_distance=True,
    draws=DRAWS,
    tune=TUNE,
    target_accept=TARGET_ACCEPT,
    seed=SEED,
    hdi_prob=HDI_PROB,
)
out_dist = pd.merge(heur, bayes_dist, on="alpha_3", how="left")
csv_dist, meta_dist = write_run_artifacts(
    out_dist,
    runs_dir=RUNS_DIR,
    label=f"{LABEL_BASE}-with-distance",
    meta={"kind": "bayes_run", "use_distance": True, "source": "notebook"},
)

# Run B: no distance
bayes_nodist = run_bayes(
    df,
    use_distance=False,
    draws=DRAWS,
    tune=TUNE,
    target_accept=TARGET_ACCEPT,
    seed=SEED,
    hdi_prob=HDI_PROB,
)
out_nodist = pd.merge(heur, bayes_nodist, on="alpha_3", how="left")
csv_nodist, meta_nodist = write_run_artifacts(
    out_nodist,
    runs_dir=RUNS_DIR,
    label=f"{LABEL_BASE}-no-distance",
    meta={"kind": "bayes_run", "use_distance": False, "source": "notebook"},
)

print("Wrote:")
print(" -", csv_dist)
print(" -", csv_nodist)

# Quick diff: biggest movers in Bayes rank
A = pd.read_csv(csv_dist)[
    ["alpha_3", "country_name", "bayes_rank", "bayes_p_one_mean", "bayes_mu_mean"]
].rename(
    columns={
        "bayes_rank": "rank_a",
        "bayes_p_one_mean": "p1_a",
        "bayes_mu_mean": "mu_a",
    }
)
B = pd.read_csv(csv_nodist)[
    ["alpha_3", "bayes_rank", "bayes_p_one_mean", "bayes_mu_mean"]
].rename(
    columns={
        "bayes_rank": "rank_b",
        "bayes_p_one_mean": "p1_b",
        "bayes_mu_mean": "mu_b",
    }
)
cmp = A.merge(B, on="alpha_3", how="inner")
cmp["delta_rank"] = cmp["rank_b"] - cmp["rank_a"]
cmp["delta_p1"] = cmp["p1_b"] - cmp["p1_a"]
cmp.sort_values("delta_rank").head(25)


Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [alpha, beta]
Sampling 4 chains for 800 tune and 800 draw iterations (3_200 + 3_200 draws total) took 1 seconds.
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [alpha, beta]
Sampling 4 chains for 800 tune and 800 draw iterations (3_200 + 3_200 draws total) took 1 seconds.


Wrote:
 - /Users/joshuamason/git/renc/data/runs/nb-compare-distance-with-distance_20251221T105541Z.csv
 - /Users/joshuamason/git/renc/data/runs/nb-compare-distance-no-distance_20251221T105542Z.csv


Unnamed: 0,alpha_3,country_name,rank_a,p1_a,mu_a,rank_b,p1_b,mu_b,delta_rank,delta_p1
41,NZL,New Zealand,227,0.0,6250.303,47,0.320508,0.563956,-180,0.320508
60,URY,Uruguay,213,6.280825e-59,179.3101,38,0.332635,0.61708,-175,0.332635
43,SGP,Singapore,224,1.04101e-223,692.9194,56,0.304681,0.505646,-168,0.304681
34,HKG,Hong Kong,225,1.311506e-233,723.617,59,0.296004,0.47779,-166,0.296004
42,OMN,Oman,205,2.951367e-17,49.82317,49,0.317049,0.550206,-156,0.317049
31,KWT,Kuwait,184,7.701355e-08,22.02363,32,0.337504,0.642885,-152,0.337504
11,ARE,United Arab Emirates,217,6.583448e-81,272.2374,71,0.28226,0.437907,-146,0.28226
40,QAT,Qatar,168,4.70062e-06,16.67095,22,0.357346,0.783714,-146,0.357341
107,FJI,Fiji,157,2.358803e-05,14.39501,16,0.36267,0.84398,-141,0.362646
68,JAM,Jamaica,174,2.329318e-06,16.67458,34,0.337161,0.639723,-140,0.337159
