
# Supplementary Figures/Tables (S1-S5)

This notebook runs and exports supplementary materials:

- **S1** smoothness sensitivity (multi-lambda grid + C baseline)
- **S2** EM convergence (log-likelihood vs iteration)
- **S3** memory scaling (100/250/500/1000Mb; optional 2000Mb via env)
- **S4** Rust thread scaling (1/2/4/8 + ideal linear)
- **S5** format consistency (psmcfa/mhs/vcf)

Outputs are written to:
- `experiment/runs/supplementary/figures` (`png/svg/pdf`)
- `experiment/runs/supplementary/tables` (`csv/tsv/md`)
- `experiment/runs/supplementary/logs` (`commands.jsonl`)

> Fairness: default Rust runs here enforce `smooth-lambda = 0`; S1 explicitly compares no-smooth vs smooth.


In [None]:

from __future__ import annotations

import importlib.util
import os
from pathlib import Path

try:
    from IPython.display import display
except Exception:
    def display(x):
        print(x)


def find_repo_root(start: Path) -> Path:
    for p in [start, *start.parents]:
        if (p / 'Cargo.toml').exists() and (p / 'src').exists():
            return p
    raise RuntimeError(f'Cannot locate psmc-rs root from {start}')


ROOT = find_repo_root(Path.cwd().resolve())
SCRIPT = ROOT / 'experiment' / 'scripts' / 'run_supplementary.py'
assert SCRIPT.exists(), f'missing script: {SCRIPT}'

spec = importlib.util.spec_from_file_location('supp', SCRIPT)
supp = importlib.util.module_from_spec(spec)
assert spec and spec.loader
spec.loader.exec_module(supp)

print('ROOT =', ROOT)
print('SCRIPT =', SCRIPT)
print('RUN_DIR =', supp.RUN_DIR)
print('PSMC_RS_BIN =', supp.PSMC_RS_BIN)
print('C_PSMC_BIN =', supp.C_PSMC_BIN)


In [None]:

# Optional overrides before running:
# os.environ['SIM_LENGTH_BP'] = '500000000'
# os.environ['PERF_REPEATS'] = '10'
# os.environ['BOOTSTRAP_REPS'] = '100'
# os.environ['S3_INCLUDE_2000'] = '1'
# os.environ['S1_SMOOTH_LAMBDAS'] = '0,1e-5,1e-4,1e-3,1e-2'
# os.environ['S4_CONTIGS'] = '8'
# os.environ['S4_CONTIG_MB'] = '75'

FORCE = False
STEPS = ['S1', 'S2', 'S3', 'S4', 'S5']

print('STEPS =', STEPS)
print('FORCE =', FORCE)
print('SIM_LENGTH_BP =', supp.SIM_LENGTH_BP)
print('PERF_REPEATS =', supp.PERF_REPEATS)
print('BOOTSTRAP_REPS =', supp.BOOTSTRAP_REPS)
print('S3_LENGTHS_MB =', supp.S3_LENGTHS_MB)
print('THREAD_LIST =', supp.THREAD_LIST)

print('S1_SMOOTH_LAMBDAS =', supp.S1_SMOOTH_LAMBDAS)
print('S4_CONTIGS =', supp.S4_CONTIGS, 'S4_CONTIG_MB =', supp.S4_CONTIG_MB)


In [None]:

results = {}
for s in STEPS:
    print(f'\n=== Running {s} ===')
    if s == 'S1':
        results[s] = supp.run_s1_smooth_ablation(force=FORCE)
    elif s == 'S2':
        results[s] = supp.run_s2_em_convergence(force=FORCE)
    elif s == 'S3':
        results[s] = supp.run_s3_memory_scaling(force=FORCE)
    elif s == 'S4':
        results[s] = supp.run_s4_thread_scaling(force=FORCE)
    elif s == 'S5':
        results[s] = supp.run_s5_format_consistency(force=FORCE)

for k, v in results.items():
    print(f'\n[{k}]')
    display(v.head() if hasattr(v, 'head') else v)

print('\nFigures:')
for p in sorted((supp.FIG_DIR).glob('*')):
    print(' -', p)

print('\nTables:')
for p in sorted((supp.TABLE_DIR).glob('*')):
    print(' -', p)

print('\nLog:', supp.LOG_DIR / 'commands.jsonl')
