# Batch 0 (0–10K): LOO-style pool 1–39 vs holdout-style pool 10–39

Predict the **same** batch (0–10K) twice:
1. **Pool 1–39 (LOO-style):** Gamma from batches 1,2,…,39 (39 batches; excludes batch 0). Fit delta on batch 0 → π₁.
2. **Pool 10–39 (holdout-style):** Gamma from batches 10,11,…,39 (30 batches; excludes 0–9). Fit delta on batch 0 → π₂.

Run **identical** static 10yr AUC evaluation on π₁ and π₂ (same Y, E, pce for 0–10K). Compare AUCs side by side.

## Setup paths and imports

In [None]:
import sys
import gc
from pathlib import Path

import numpy as np
import pandas as pd
import torch

CLAUDE_DIR = Path('/Users/sarahurbut/aladynoulli2/claudefile').resolve()
DATA_DIR = Path('/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/data_for_running')
PCE_PATH = '/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/pce_prevent_full.csv'
BATCH_SIZE = 10_000
N_BATCH = 0  # batch 0 = 0-10K

sys.path.insert(0, str(CLAUDE_DIR))
sys.path.insert(0, str(CLAUDE_DIR.parent / 'pyScripts'))

from slope_holdout_auc import (
    load_data,
    load_and_pool_slope_params,
    fit_slope_delta_and_extract_pi,
    SLOPE_CKPT_DIR_1PHASE,
)
from fig5utils import evaluate_major_diseases_wsex_with_bootstrap_from_pi

print('Imports OK. Slope checkpoints:', SLOPE_CKPT_DIR_1PHASE)

Imports OK. Slope checkpoints: /Users/sarahurbut/Library/CloudStorage/Dropbox/slope_model_nokappa_v3_single_phase


## Load data for batch 0 (0–10K)

In [None]:
Y_full, E_full, G_full, prevalence_t, signature_refs, disease_names, pce_df_full = load_data()

start = N_BATCH * BATCH_SIZE
stop = start + BATCH_SIZE
Y_batch = Y_full[start:stop]
E_batch = E_full[start:stop]
G_batch = G_full[start:stop]
pce_batch = pce_df_full.iloc[start:stop].reset_index(drop=True)

print(f'Batch 0: Y {Y_batch.shape}, E {E_batch.shape}, G {G_batch.shape}, pce {len(pce_batch)}')

Batch 0: Y torch.Size([10000, 348, 52]), E torch.Size([10000, 348]), G (10000, 47), pce 10000


## Prediction 1: Pool from batches 1–39 (LOO-style for batch 0)

In [None]:
train_indices_1_39 = list(range(1, 40))  # 39 batches (true LOO-style for batch 0)
print('Loading slope params pooled from batches 1-39...')
gl_1_39, gs_1_39, psi_1_39, eps_1_39, gh_1_39 = load_and_pool_slope_params(
    train_indices_1_39, slope_ckpt_dir=SLOPE_CKPT_DIR_1PHASE)

print('Fitting delta on batch 0 (pool 1-39)...')
pi_1_39, nll_1_39 = fit_slope_delta_and_extract_pi(
    Y_batch, E_batch, G_batch, prevalence_t, signature_refs,
    gl_1_39, gs_1_39, psi_1_39, eps_1_39,
    gamma_health=gh_1_39,
    pretrained_delta=None,
    n_epochs=200,
)
print(f'  NLL = {nll_1_39:.4f}')
del gl_1_39, gs_1_39, psi_1_39, eps_1_39, gh_1_39
gc.collect()

Loading slope params pooled from batches 1-39...
  Slope: pooled 39 checkpoints, |gamma_level|=0.1137, |gamma_slope|=0.0596, |epsilon|=0.1075
Fitting delta on batch 0 (pool 1-39)...
Warm-starting from pretrained gamma (torch.Size([47, 21])), psi (torch.Size([21, 348]))
Using provided gamma_health for alpha_i
  alpha_i: mean=0.0000, std=0.0009, range=[-0.0039, 0.0046]
Slope model init: gamma_level torch.Size([47, 21]), gamma_slope torch.Size([47, 21]) (zeros), delta torch.Size([10000, 21, 52]), epsilon torch.Size([21, 348, 52])
  NLL = 11.7463


0

## Prediction 2: Pool from batches 10–39 (holdout-style)

In [None]:
train_indices_10_39 = list(range(10, 40))  # 30 batches
print('Loading slope params pooled from batches 10-39...')
gl_10_39, gs_10_39, psi_10_39, eps_10_39, gh_10_39 = load_and_pool_slope_params(
    train_indices_10_39, slope_ckpt_dir=SLOPE_CKPT_DIR_1PHASE)

print('Fitting delta on batch 0 (pool 10-39)...')
pi_10_39, nll_10_39 = fit_slope_delta_and_extract_pi(
    Y_batch, E_batch, G_batch, prevalence_t, signature_refs,
    gl_10_39, gs_10_39, psi_10_39, eps_10_39,
    gamma_health=gh_10_39,
    pretrained_delta=None,
    n_epochs=200,
)
print(f'  NLL = {nll_10_39:.4f}')
del gl_10_39, gs_10_39, psi_10_39, eps_10_39, gh_10_39
gc.collect()

Loading slope params pooled from batches 10-39...
  Slope: pooled 30 checkpoints, |gamma_level|=0.1134, |gamma_slope|=0.0611, |epsilon|=0.1076
Fitting delta on batch 0 (pool 10-39)...
Warm-starting from pretrained gamma (torch.Size([47, 21])), psi (torch.Size([21, 348]))
Using provided gamma_health for alpha_i
  alpha_i: mean=-0.0000, std=0.0010, range=[-0.0054, 0.0051]
Slope model init: gamma_level torch.Size([47, 21]), gamma_slope torch.Size([47, 21]) (zeros), delta torch.Size([10000, 21, 52]), epsilon torch.Size([21, 348, 52])
  NLL = 11.7284


0

## Identical AUC evaluation (static 10yr, same Y, E, pce)

In [None]:
N_BOOTSTRAPS = 100
FOLLOW_UP_YEARS = 10

print('Evaluating pool 1-39 (static 10yr)...')
res_1_39 = evaluate_major_diseases_wsex_with_bootstrap_from_pi(
    pi=pi_1_39, Y_100k=Y_batch, E_100k=E_batch,
    disease_names=disease_names, pce_df=pce_batch,
    n_bootstraps=N_BOOTSTRAPS, follow_up_duration_years=FOLLOW_UP_YEARS)

print('\nEvaluating pool 10-39 (static 10yr)...')
res_10_39 = evaluate_major_diseases_wsex_with_bootstrap_from_pi(
    pi=pi_10_39, Y_100k=Y_batch, E_100k=E_batch,
    disease_names=disease_names, pce_df=pce_batch,
    n_bootstraps=N_BOOTSTRAPS, follow_up_duration_years=FOLLOW_UP_YEARS)

Evaluating pool 1-39 (static 10yr)...

Evaluating ASCVD (10-Year Outcome, 1-Year Score)...
AUC: 0.758 (0.744-0.777) (calculated on 10000 individuals)
Events (10-Year in Eval Cohort): 831 (8.3%) (from 10000 individuals)
Excluded 0 prevalent cases for ASCVD.

   Sex-stratified analysis:
   Female: AUC = 0.728, Events = 273/5409
   Male: AUC = 0.723, Events = 558/4591

   ASCVD risk in patients with pre-existing conditions:
   RA: AUC = 0.823, Events = 8/37
   Breast_Cancer: AUC = 0.759, Events = 13/170

Evaluating Diabetes (10-Year Outcome, 1-Year Score)...
AUC: 0.699 (0.675-0.724) (calculated on 10000 individuals)
Events (10-Year in Eval Cohort): 581 (5.8%) (from 10000 individuals)
Excluded 0 prevalent cases for Diabetes.

   Sex-stratified analysis:
   Female: AUC = 0.694, Events = 264/5409
   Male: AUC = 0.701, Events = 317/4591

Evaluating Atrial_Fib (10-Year Outcome, 1-Year Score)...
AUC: 0.717 (0.694-0.746) (calculated on 9864 individuals)
Events (10-Year in Eval Cohort): 376 (3.8%

## Comparison table

In [None]:
rows = []
for disease, m1 in res_1_39.items():
    m2 = res_10_39.get(disease, {})
    rows.append({
        'disease': disease,
        'auc_pool_1_39': m1.get('auc', np.nan),
        'ci_lower_1_39': m1.get('ci_lower', np.nan),
        'ci_upper_1_39': m1.get('ci_upper', np.nan),
        'auc_pool_10_39': m2.get('auc', np.nan),
        'ci_lower_10_39': m2.get('ci_lower', np.nan),
        'ci_upper_10_39': m2.get('ci_upper', np.nan),
        'diff_auc': (m1.get('auc', np.nan) - m2.get('auc', np.nan)) if m2 else np.nan,
    })

df = pd.DataFrame(rows).sort_values('auc_pool_1_39', ascending=False)
df['diff_auc'] = df['auc_pool_1_39'] - df['auc_pool_10_39']
display(df)

print('Summary (batch 0, 10k):')
print(f'  Mean AUC pool 1-39:   {df["auc_pool_1_39"].mean():.4f}')
print(f'  Mean AUC pool 10-39:  {df["auc_pool_10_39"].mean():.4f}')
print(f'  Mean diff (1-39 − 10-39): {df["diff_auc"].mean():.4f}')
print(f'  N better with 1-39:   {(df["diff_auc"] > 0).sum()} / {len(df)}')

Unnamed: 0,disease,auc_pool_1_39,ci_lower_1_39,ci_upper_1_39,auc_pool_10_39,ci_lower_10_39,ci_upper_10_39,diff_auc
15,Bladder_Cancer,0.803666,0.739964,0.853036,0.803315,0.752888,0.856371,0.000351
0,ASCVD,0.75799,0.744151,0.776985,0.757846,0.742,0.776431,0.000144
3,CKD,0.755575,0.731128,0.785194,0.755872,0.726437,0.785918,-0.000297
6,Heart_Failure,0.745388,0.706419,0.780646,0.745607,0.715366,0.780376,-0.00022
4,All_Cancers,0.73382,0.712185,0.749374,0.734022,0.71424,0.759037,-0.000202
7,Pneumonia,0.7223,0.696048,0.749325,0.721927,0.699513,0.748989,0.000373
2,Atrial_Fib,0.717288,0.693854,0.745826,0.716976,0.692894,0.739545,0.000312
8,COPD,0.715313,0.692881,0.740001,0.716315,0.694494,0.742038,-0.001002
1,Diabetes,0.699067,0.674746,0.72371,0.699139,0.676353,0.720697,-7.1e-05
14,Lung_Cancer,0.696307,0.642931,0.745619,0.696993,0.651205,0.759525,-0.000686


Summary (batch 0, 10k):
  Mean AUC pool 1-39:   0.6449
  Mean AUC pool 10-39:  0.6453
  Mean diff (1-39 − 10-39): -0.0003
  N better with 1-39:   9 / 28


## Optional: save results

In [None]:
out_dir = CLAUDE_DIR / 'results_holdout_auc'
out_dir.mkdir(parents=True, exist_ok=True)
out_path = out_dir / 'batch0_pool1_39_vs_10_39_auc.csv'
df.to_csv(out_path, index=False)
print(f'Saved: {out_path}')

Saved: /Users/sarahurbut/aladynoulli2/claudefile/results_holdout_auc/batch0_pool1_39_vs_10_39_auc.csv


In [None]:
cd /Users/sarahurbut/aladynoulli2/claudefile

nohup python slope_loo_like_400k_eval.py \
  --n_patients 400000 \
  --n_bootstraps 100 \
  > slope_loo_like_400k_eval.log 2>&1 &