# ============================================================================
# RESULTS GENERATION DOCUMENTATION
# ============================================================================
"""
This notebook orchestrates all performance evaluation analyses for Aladynoulli.

STEP 0: ASSEMBLE FULL PI TENSORS
---------------------------------
- Concatenates batch pi tensors (0-10000, 10000-20000, ..., 390000-400000) 
  into full pi tensors for 0-400K patients
- Creates pi_enroll_fixedphi_sex_FULL.pt files needed by generation scripts
- Run ONCE before running generation cells, then mark as "not evaluated"
- Script: assemble_full_pi_tensor.py

STEP 1: GENERATE TIME HORIZON PREDICTIONS
------------------------------------------
- Generates predictions for multiple time horizons: 5yr, 10yr, 30yr, static 10yr
- Processes ALL 400K patients at once using pre-computed pi tensors
- Uses evaluate_major_diseases_wsex_with_bootstrap_dynamic_from_pi() for dynamic predictions
- Uses evaluate_major_diseases_wsex_with_bootstrap_from_pi() for static predictions
- Computes AUC on pooled predictions (statistically better than batch-averaging)
- Approaches: pooled_enrollment and pooled_retrospective
- Run ONCE, then mark as "not evaluated"
- Script: generate_time_horizon_predictions.py
- Results saved to: results/time_horizons/{approach}/

STEP 2: GENERATE WASHOUT PREDICTIONS
------------------------------------
- Generates 1-year predictions with different washout periods: 0yr, 1yr, 2yr offsets
- Processes ALL 400K patients at once using pre-computed pi tensors
- Uses evaluate_major_diseases_wsex_with_bootstrap_dynamic_1year_different_start_end_numeric_sex()
- Evaluates predictions made at enrollment_age + offset to predict events in subsequent year
- Computes AUC on pooled predictions
- Approaches: pooled_enrollment and pooled_retrospective
- Run ONCE, then mark as "not evaluated"
- Script: generate_washout_predictions.py
- Results saved to: results/washout/{approach}/

STEP 3: GENERATE AGE OFFSET PREDICTIONS
---------------------------------------
- Generates rolling 1-year predictions using models trained at different time offsets
- Evaluates models trained at enrollment + 0, 1, 2, ..., 9 years (offsets 0-9)
- Uses pre-computed pi batches from AWS run (downloaded to ~/Downloads/age_offset_files/)
- Currently evaluates on batch 0-10000 (first 10K patients)
- Shows how model performance changes when predictions are made at different time points
  after enrollment, using models retrained with additional follow-up data
- Approaches: pooled_enrollment and pooled_retrospective
- Run ONCE, then mark as "not evaluated"
- Script: generate_age_offset_predictions.py
- Results saved to: results/age_offset/{approach}/

STEP 4: LOAD GENERATED RESULTS
--------------------------------
- Loads all generated CSV files into dictionaries for analysis
- Safe to run multiple times (reloads results without regenerating)
- Loads: time_horizon_results, washout_results, age_offset_results

STEP 5: COMPARE WITH EXTERNAL SCORES
-------------------------------------
- Compares Aladynoulli predictions with external risk scores on same 400K population
- External scores: PCE (10-year ASCVD), PREVENT (30-year ASCVD), 
                   Gail (10-year Breast Cancer), QRISK3 (10-year ASCVD)
- Uses unified file: ukb_pce_prevent_gail_qrisk3_combined.csv
- Main comparison uses pooled_retrospective approach (clinically implementable)
- Run ONCE, then mark as "not evaluated"
- Script: compare_with_external_scores.py
- Results saved to: results/comparisons/{approach}/external_scores_comparison.csv

MAIN APPROACH: Pooled Retrospective
-------------------------------------
All comparisons use the pooled_retrospective approach by default, which:
- Uses phi trained externally and validated with LOO tests
- Represents clinically implementable performance
- Uses pi from: enrollment_predictions_fixedphi_RETROSPECTIVE_pooled/pi_enroll_fixedphi_sex_FULL.pt
"""

In [9]:

# ============================================================================
# STEP 0: ASSEMBLE FULL PI TENSORS (RUN ONCE, THEN MARK AS "NOT EVALUATED")
# ============================================================================
"""
IMPORTANT: This cell assembles batch pi tensors into full pi tensors.
- Run this ONCE before running the generation cells
- After assembly is complete, mark this cell as "not evaluated"
- This creates pi_enroll_fixedphi_sex_FULL.pt files needed by the generation scripts
"""

import subprocess
import sys
from pathlib import Path

# Set script directory
script_dir = Path('/Users/sarahurbut/aladynoulli2/pyScripts/new_oct_revision/new_notebooks')

print("="*80)
print("ASSEMBLING FULL PI TENSORS FROM BATCH FILES")
print("="*80)
print("\nThis will concatenate all batch pi tensors (0-10000, 10000-20000, ..., 390000-400000)")
print("into single full tensors for 0-400K patients.")
print("\nNOTE: Run once, then mark this cell as 'not evaluated'.")
print("="*80)

# Assemble retrospective pooled pi tensor
print("\n1. Assembling pooled_retrospective pi tensor...")
result1 = subprocess.run([
    sys.executable,
    str(script_dir / 'assemble_full_pi_tensor.py'),
    '--approach', 'pooled_retrospective',
    '--max_patients', '400000'
], capture_output=True, text=True)
print(result1.stdout)
if result1.stderr:
    print("STDERR:", result1.stderr)
if result1.returncode != 0:
    print(f"ERROR: Assembly failed with return code {result1.returncode}")

# Assemble enrollment pooled pi tensor
print("\n2. Assembling pooled_enrollment pi tensor...")
result2 = subprocess.run([
    sys.executable,
    str(script_dir / 'assemble_full_pi_tensor.py'),
    '--approach', 'pooled_enrollment',
    '--max_patients', '400000'
], capture_output=True, text=True)
print(result2.stdout)
if result2.stderr:
    print("STDERR:", result2.stderr)
if result2.returncode != 0:
    print(f"ERROR: Assembly failed with return code {result2.returncode}")

print("\n" + "="*80)
print("PI TENSOR ASSEMBLY COMPLETE")
print("="*80)
print("\nFull pi tensors should now be available at:")
print("  - enrollment_predictions_fixedphi_RETROSPECTIVE_pooled/pi_enroll_fixedphi_sex_FULL.pt")
print("  - enrollment_predictions_fixedphi_ENROLLMENT_pooled/pi_enroll_fixedphi_sex_FULL.pt")



ASSEMBLING FULL PI TENSORS FROM BATCH FILES

This will concatenate all batch pi tensors (0-10000, 10000-20000, ..., 390000-400000)
into single full tensors for 0-400K patients.

NOTE: Run once, then mark this cell as 'not evaluated'.

1. Assembling pooled_retrospective pi tensor...
ASSEMBLING FULL PI TENSOR: POOLED_RETROSPECTIVE
Base directory: /Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/enrollment_predictions_fixedphi_RETROSPECTIVE_pooled
Max patients: 400000
Batch size: 10000

Will assemble 40 batches (0-400000)
Loading batch 1/40: 0-10000... ✓ Shape: torch.Size([10000, 348, 52])
Loading batch 2/40: 10000-20000... ✓ Shape: torch.Size([10000, 348, 52])
Loading batch 3/40: 20000-30000... ✓ Shape: torch.Size([10000, 348, 52])
Loading batch 4/40: 30000-40000... ✓ Shape: torch.Size([10000, 348, 52])
Loading batch 5/40: 40000-50000... ✓ Shape: torch.Size([10000, 348, 52])
Loading batch 6/40: 50000-60000... ✓ Shape: torch.Size([10000, 348, 52])
Loading batch 7/40: 60000-70000...

In [10]:
# ============================================================================
# STEP 1: GENERATE RESULTS (RUN ONCE, THEN MARK AS "NOT EVALUATED")
# ============================================================================
"""
IMPORTANT: These cells generate the results CSV files.
- Run them ONCE to generate all results
- After results are generated, mark these cells as "not evaluated" to prevent re-running
- The results will be saved to results/time_horizons/ and results/washout/
"""

import subprocess
import sys
from pathlib import Path

# Set script directory
script_dir = Path('/Users/sarahurbut/aladynoulli2/pyScripts/new_oct_revision/new_notebooks')

print("="*80)
print("GENERATING TIME HORIZON PREDICTIONS")
print("="*80)
print("\nThis will generate 5yr, 10yr, 30yr, and static 10yr predictions")
print("for both pooled_enrollment and pooled_retrospective approaches.")
print("\nNOTE: This takes a while! Run once, then mark this cell as 'not evaluated'.")
print("="*80)

# Generate time horizon predictions for pooled retrospective (main approach)
print("\n1. Generating pooled_retrospective time horizons...")
result1 = subprocess.run([
    sys.executable, 
    str(script_dir / 'generate_time_horizon_predictions.py'),
    '--approach', 'pooled_retrospective',
    '--horizons', '5,10,30,static10',
    '--n_bootstraps', '100'
], capture_output=True, text=True)
print(result1.stdout)
if result1.stderr:
    print("STDERR:", result1.stderr)
if result1.returncode != 0:
    print(f"\n⚠️  WARNING: Script exited with return code {result1.returncode}")
else:
    print("✓ pooled_retrospective completed successfully")

# Generate time horizon predictions for pooled enrollment (for comparison)
print("\n2. Generating pooled_enrollment time horizons...")
result2 = subprocess.run([
    sys.executable,
    str(script_dir / 'generate_time_horizon_predictions.py'),
    '--approach', 'pooled_enrollment',
    '--horizons', '5,10,30,static10',
    '--n_bootstraps', '100'
], capture_output=True, text=True)
print(result2.stdout)
if result2.stderr:
    print("STDERR:", result2.stderr)
if result2.returncode != 0:
    print(f"\n⚠️  WARNING: Script exited with return code {result2.returncode}")
else:
    print("✓ pooled_enrollment completed successfully")

print("\n" + "="*80)
print("TIME HORIZON PREDICTIONS COMPLETE")
print("="*80)


GENERATING TIME HORIZON PREDICTIONS

This will generate 5yr, 10yr, 30yr, and static 10yr predictions
for both pooled_enrollment and pooled_retrospective approaches.

NOTE: This takes a while! Run once, then mark this cell as 'not evaluated'.

1. Generating pooled_retrospective time horizons...
GENERATING TIME HORIZON PREDICTIONS: POOLED_RETROSPECTIVE
Pi tensor: /Users/sarahurbut/Downloads/pi_full_400k.pt
Horizons: ['5', '10', '30', 'static10']
Output directory: /Users/sarahurbut/aladynoulli2/pyScripts/new_oct_revision/new_notebooks/results/time_horizons/pooled_retrospective

Loading data...
Loaded pi tensor: torch.Size([400000, 348, 52])
Loaded Y tensor: torch.Size([407878, 348, 52])
Loaded E tensor: torch.Size([407878, 348])
Loaded pce_df: 400000 patients

Subsetting to first 400000 patients...
After subsetting: pi: 400000, Y: 400000, E: 400000, pce_df: 400000

PROCESSING HORIZON: 5
Evaluating dynamic 5-year predictions...

Evaluating ASCVD (Dynamic 10-Year Risk)...
AUC: 0.761 (0.757-

In [11]:
# ============================================================================
# STEP 2: GENERATE WASHOUT PREDICTIONS (RUN ONCE, THEN MARK AS "NOT EVALUATED")
# ============================================================================

print("="*80)
print("GENERATING WASHOUT PREDICTIONS")
print("="*80)
print("\nThis will generate 1-year predictions with 0yr, 1yr, 2yr washout")
print("for both pooled_enrollment and pooled_retrospective approaches.")
print("\nNOTE: This takes a while! Run once, then mark this cell as 'not evaluated'.")
print("="*80)

# Generate washout predictions for pooled retrospective (main approach)
print("\n1. Generating pooled_retrospective washout predictions...")
result1 = subprocess.run([
    sys.executable,
    str(script_dir / 'generate_washout_predictions.py'),
    '--approach', 'pooled_retrospective',
    '--n_bootstraps', '100'
], capture_output=True, text=True)
print(result1.stdout)
if result1.stderr:
    print("STDERR:", result1.stderr)

# Generate washout predictions for pooled enrollment (for comparison)
print("\n2. Generating pooled_enrollment washout predictions...")
result2 = subprocess.run([
    sys.executable,
    str(script_dir / 'generate_washout_predictions.py'),
    '--approach', 'pooled_enrollment',
    '--n_bootstraps', '100'
], capture_output=True, text=True)
print(result2.stdout)
if result2.stderr:
    print("STDERR:", result2.stderr)

print("\n" + "="*80)
print("WASHOUT PREDICTIONS COMPLETE")
print("="*80)

GENERATING WASHOUT PREDICTIONS

This will generate 1-year predictions with 0yr, 1yr, 2yr washout
for both pooled_enrollment and pooled_retrospective approaches.

NOTE: This takes a while! Run once, then mark this cell as 'not evaluated'.

1. Generating pooled_retrospective washout predictions...
GENERATING WASHOUT PREDICTIONS: POOLED_RETROSPECTIVE
Pi tensor: /Users/sarahurbut/Downloads/pi_full_400k.pt
Washout periods: 0yr, 1yr, 2yr
Output directory: /Users/sarahurbut/aladynoulli2/pyScripts/new_oct_revision/new_notebooks/results/washout/pooled_retrospective

Loading data...
Loaded pi tensor: torch.Size([400000, 348, 52])
Loaded Y tensor: torch.Size([407878, 348, 52])
Loaded E tensor: torch.Size([407878, 348])
Loaded pce_df: 400000 patients

Subsetting to first 400000 patients...
After subsetting: pi: 400000, Y: 400000, E: 400000, pce_df: 400000

PROCESSING WASHOUT: 0yr
Evaluating 1-year predictions with 0-year washout...
Filtering for 1: Found 182542 individuals in cohort

Summary of Re

In [27]:
# ============================================================================
# STEP 2B: GENERATE RETROSPECTIVE WASHOUT ALL HORIZONS (RUN ONCE, THEN MARK AS "NOT EVALUATED")
# ============================================================================
"""
IMPORTANT: This cell generates 10-year, 30-year, and static 10-year predictions with washout for pooled_retrospective.
- Generates dynamic 10-year and 30-year predictions with 1-year washout
- Generates static 10-year predictions (1-year score) with 1-year washout
- Processes ALL 400K patients at once using pre-computed pi tensors
- Uses _from_pi versions: evaluate_major_diseases_wsex_with_bootstrap_dynamic_withwashout_from_pi() and 
  evaluate_major_diseases_wsex_with_bootstrap_withwashout_from_pi()
- Only runs for pooled_retrospective approach (main clinically implementable approach)
- Run once, then mark as "not evaluated"
- Script: generate_retrospective_washout_all_horizons.py
- Results saved to: results/washout_time_horizons/pooled_retrospective/
"""

import subprocess
import sys
from pathlib import Path

# Set script directory
script_dir = Path('/Users/sarahurbut/aladynoulli2/pyScripts/new_oct_revision/new_notebooks')

print("="*80)
print("GENERATING RETROSPECTIVE WASHOUT ALL HORIZONS")
print("="*80)
print("\nThis will generate:")
print("  - 10-year dynamic predictions with 1-year washout")
print("  - 30-year dynamic predictions with 1-year washout")
print("  - Static 10-year predictions (1-year score) with 1-year washout")
print("Approach: pooled_retrospective only")
print("Processing: ALL 400K patients at once")
print("\nNOTE: Run once, then mark this cell as 'not evaluated'.")
print("="*80)

# Generate washout predictions for all horizons
print("\nGenerating pooled_retrospective washout predictions for all horizons...")
result = subprocess.run([
    sys.executable,
    str(script_dir / 'generate_retrospective_washout_all_horizons.py'),
    '--n_bootstraps', '100',
    '--washout_years', '1'
], capture_output=True, text=True)
print(result.stdout)
if result.stderr:
    print("STDERR:", result.stderr)
if result.returncode != 0:
    print(f"\n⚠️  WARNING: Script exited with return code {result.returncode}")
else:
    print("✓ pooled_retrospective washout predictions completed successfully")

print("\n" + "="*80)
print("RETROSPECTIVE WASHOUT PREDICTIONS COMPLETE")
print("="*80)
print("\nResults saved to: results/washout_time_horizons/pooled_retrospective/")
print("  - washout_1yr_10yr_dynamic_results.csv")
print("  - washout_1yr_30yr_dynamic_results.csv")
print("  - washout_1yr_10yr_static_results.csv")
print("  - washout_1yr_comparison_all_horizons.csv")


GENERATING RETROSPECTIVE WASHOUT ALL HORIZONS

This will generate:
  - 10-year dynamic predictions with 1-year washout
  - 30-year dynamic predictions with 1-year washout
  - Static 10-year predictions (1-year score) with 1-year washout
Approach: pooled_retrospective only
Processing: ALL 400K patients at once

NOTE: Run once, then mark this cell as 'not evaluated'.

Generating pooled_retrospective washout predictions for all horizons...
GENERATING RETROSPECTIVE WASHOUT PREDICTIONS: POOLED_RETROSPECTIVE
Pi tensor: /Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/enrollment_predictions_fixedphi_RETROSPECTIVE_pooled/pi_enroll_fixedphi_sex_FULL.pt
Washout period: 1 years
Horizons: 10yr (dynamic), 30yr (dynamic), static 10yr
Output directory: /Users/sarahurbut/aladynoulli2/pyScripts/new_oct_revision/new_notebooks/results/washout_time_horizons/pooled_retrospective

Loading data...

Capping at 400,000 patients (from 400,000 available)
✓ Loaded 400,000 patients
  Pi shape: torch.Size([4

In [22]:
# ============================================================================
# STEP 3: GENERATE AGE OFFSET PREDICTIONS (RUN ONCE, THEN MARK AS "NOT EVALUATED")
# ============================================================================
"""
IMPORTANT: This cell generates rolling 1-year predictions using models trained at different time offsets.
- Evaluates 1-year predictions using models trained at enrollment + 0, 1, 2, ..., 9 years
- Uses pre-computed pi batches from AWS run (downloaded to ~/Downloads/age_offset_files/)
- Currently runs on batch 0-10000 (first 10K patients)
- Run once, then mark as "not evaluated"

This analysis shows how model performance changes when predictions are made at different time points
after enrollment, using models that have been retrained with additional follow-up data.
"""

import subprocess
import sys
from pathlib import Path

# Set script directory
script_dir = Path('/Users/sarahurbut/aladynoulli2/pyScripts/new_oct_revision/new_notebooks')

print("="*80)
print("GENERATING AGE OFFSET PREDICTIONS")
print("="*80)
print("\nThis will generate rolling 1-year predictions using models trained at")
print("enrollment + 0, 1, 2, ..., 9 years (offsets 0-9).")
print("\nUses pre-computed pi batches from AWS run.")
print("Currently evaluates on batch 0-10000 (first 10K patients).")
print("\nNOTE: Run once, then mark this cell as 'not evaluated'.")
print("="*80)

# Generate age offset predictions for pooled retrospective (main approach)
print("\n1. Generating pooled_retrospective age offset predictions...")
result1 = subprocess.run([
    sys.executable,
    str(script_dir / 'generate_age_offset_predictions.py'),
    '--approach', 'pooled_retrospective',
    '--max_offset', '9',
    '--start_idx', '0',
    '--end_idx', '10000'
], capture_output=True, text=True)
print(result1.stdout)
if result1.stderr:
    print("STDERR:", result1.stderr)
if result1.returncode != 0:
    print(f"\n⚠️  WARNING: Script exited with return code {result1.returncode}")
else:
    print("✓ pooled_retrospective completed successfully")

# Generate age offset predictions for pooled retrospective local version
print("\n2. Generating pooled_retrospective_local (local) age offset predictions...")
result2 = subprocess.run([
    sys.executable,
    str(script_dir / 'generate_age_offset_predictions.py'),
    '--approach', 'pooled_retrospective_local',
    '--max_offset', '9',
    '--start_idx', '0',
    '--end_idx', '10000'
], capture_output=True, text=True)
print(result2.stdout)
if result2.stderr:
    print("STDERR:", result2.stderr)
if result2.returncode != 0:
    print(f"\n⚠️  WARNING: Script exited with return code {result2.returncode}")
else:
    print("✓ pooled_retrospective_local (local) completed successfully")

print("\n" + "="*80)
print("AGE OFFSET PREDICTIONS COMPLETE")
print("="*80)
print("\nResults saved to: results/age_offset/{approach}/")
print("  - age_offset_aucs_batch_0_10000.csv (AUCs by disease and offset)")
print("  - age_offset_aucs_pivot_batch_0_10000.csv (pivot table for easy viewing)")
print("\nROC curves are plotted for ASCVD by default (saved as PDF).")

GENERATING AGE OFFSET PREDICTIONS

This will generate rolling 1-year predictions using models trained at
enrollment + 0, 1, 2, ..., 9 years (offsets 0-9).

Uses pre-computed pi batches from AWS run.
Currently evaluates on batch 0-10000 (first 10K patients).

NOTE: Run once, then mark this cell as 'not evaluated'.

1. Generating pooled_retrospective age offset predictions...
GENERATING AGE OFFSET PREDICTIONS
Approach: pooled_retrospective
Batch: 0-10000
Max offset: 9
Loading full data tensors...
Subsetting to batch 0-10000...

Loading pi batches for offsets 0-9...
  Loading offset 0: pi_enroll_fixedphi_age_offset_0_sex_0_10000_try2_withpcs_newrun.pt
  Loading offset 1: pi_enroll_fixedphi_age_offset_1_sex_0_10000_try2_withpcs_newrun.pt
  Loading offset 2: pi_enroll_fixedphi_age_offset_2_sex_0_10000_try2_withpcs_newrun.pt
  Loading offset 3: pi_enroll_fixedphi_age_offset_3_sex_0_10000_try2_withpcs_newrun.pt
  Loading offset 4: pi_enroll_fixedphi_age_offset_4_sex_0_10000_try2_withpcs_newru

In [12]:
# ============================================================================
# STEP 4: LOAD GENERATED RESULTS
# ============================================================================
"""
After running the generation cells above, load the results here for analysis.
"""

import pandas as pd
import numpy as np
from pathlib import Path

results_base = Path('/Users/sarahurbut/aladynoulli2/pyScripts/new_oct_revision/new_notebooks/results')

# Load time horizon results
print("Loading time horizon results...")
time_horizon_results = {}
for approach in ['pooled_retrospective', 'pooled_enrollment']:
    approach_dir = results_base / 'time_horizons' / approach
    if approach_dir.exists():
        time_horizon_results[approach] = {}
        for horizon_file in approach_dir.glob('*_results.csv'):
            horizon_name = horizon_file.stem.replace('_results', '')
            time_horizon_results[approach][horizon_name] = pd.read_csv(horizon_file, index_col=0)
            print(f"  ✓ Loaded {approach}/{horizon_name}")
        # Also load comparison file if exists
        comparison_file = approach_dir / 'comparison_all_horizons.csv'
        if comparison_file.exists():
            time_horizon_results[approach]['comparison'] = pd.read_csv(comparison_file, index_col=0)
            print(f"  ✓ Loaded {approach}/comparison")


# Load washout time horizons results (10yr/30yr dynamic with 1yr washout)
print("\nLoading washout time horizons results...")
washout_time_horizons_results = {}
washout_time_horizons_dir = results_base / 'washout_time_horizons' / 'pooled_retrospective'
if washout_time_horizons_dir.exists():
    # Load individual horizon results
    for horizon_file in washout_time_horizons_dir.glob('washout_1yr_*_results.csv'):
        horizon_name = horizon_file.stem.replace('washout_1yr_', '').replace('_results', '')
        washout_time_horizons_results[horizon_name] = pd.read_csv(horizon_file, index_col=0)
        print(f"  ✓ Loaded washout_time_horizons/{horizon_name}")
    # Load comparison file
    comparison_file = washout_time_horizons_dir / 'washout_1yr_comparison_all_horizons.csv'
    if comparison_file.exists():
        washout_time_horizons_results['comparison'] = pd.read_csv(comparison_file, index_col=0)
        print(f"  ✓ Loaded washout_time_horizons/comparison")


# Load washout results
print("\nLoading washout results...")
washout_results = {}
for approach in ['pooled_retrospective', 'pooled_enrollment']:
    approach_dir = results_base / 'washout' / approach
    if approach_dir.exists():
        washout_results[approach] = {}
        for washout_file in approach_dir.glob('washout_*_results.csv'):
            washout_name = washout_file.stem.replace('washout_', '').replace('_results', '')
            washout_results[approach][washout_name] = pd.read_csv(washout_file, index_col=0)
            print(f"  ✓ Loaded {approach}/{washout_name}")
        # Also load comparison file if exists
        comparison_file = approach_dir / 'washout_comparison_all_offsets.csv'
        if comparison_file.exists():
            washout_results[approach]['comparison'] = pd.read_csv(comparison_file, index_col=0)
            print(f"  ✓ Loaded {approach}/comparison")

print("\nLoading age offset results...")
age_offset_results = {}
for approach in ['pooled_retrospective', 'pooled_retrospective_local']:
    approach_dir = results_base / 'age_offset' / approach
    if approach_dir.exists():
        age_offset_results[approach] = {}
        # Load AUC summary
        auc_file = approach_dir / 'age_offset_aucs_batch_0_10000.csv'
        if auc_file.exists():
            age_offset_results[approach]['aucs'] = pd.read_csv(auc_file)
            print(f"  ✓ Loaded {approach}/aucs")
        # Load pivot table
        pivot_file = approach_dir / 'age_offset_aucs_pivot_batch_0_10000.csv'
        if pivot_file.exists():
            age_offset_results[approach]['pivot'] = pd.read_csv(pivot_file, index_col=0)
            print(f"  ✓ Loaded {approach}/pivot")




print("\n" + "="*80)
print("RESULTS LOADED - READY FOR ANALYSIS")
print("="*80)


Loading time horizon results...
  ✓ Loaded pooled_retrospective/static_10yr
  ✓ Loaded pooled_retrospective/5yr
  ✓ Loaded pooled_retrospective/10yr
  ✓ Loaded pooled_retrospective/30yr
  ✓ Loaded pooled_retrospective/comparison
  ✓ Loaded pooled_enrollment/static_10yr
  ✓ Loaded pooled_enrollment/5yr
  ✓ Loaded pooled_enrollment/10yr
  ✓ Loaded pooled_enrollment/30yr
  ✓ Loaded pooled_enrollment/comparison

Loading washout time horizons results...

Loading washout results...
  ✓ Loaded pooled_retrospective/0yr
  ✓ Loaded pooled_retrospective/2yr
  ✓ Loaded pooled_retrospective/1yr
  ✓ Loaded pooled_retrospective/comparison
  ✓ Loaded pooled_enrollment/0yr
  ✓ Loaded pooled_enrollment/2yr
  ✓ Loaded pooled_enrollment/1yr
  ✓ Loaded pooled_enrollment/comparison

Loading age offset results...
  ✓ Loaded pooled_retrospective/aucs
  ✓ Loaded pooled_retrospective/pivot
  ✓ Loaded pooled_retrospective_local/aucs
  ✓ Loaded pooled_retrospective_local/pivot

RESULTS LOADED - READY FOR ANALYSIS

In [16]:
# ============================================================================
# STEP 5: COMPARE WITH EXTERNAL SCORES (RUN ONCE, THEN MARK AS "NOT EVALUATED")
# ============================================================================
"""
IMPORTANT: This cell compares Aladynoulli predictions with external risk scores.
- Compares with PCE (10-year ASCVD), PREVENT (30-year ASCVD), Gail (10-year Breast Cancer), QRISK3
- Evaluates on the same 400K population
- Run once, then mark as "not evaluated"

External score files needed:
- pce_prevent_full.csv (PCE and PREVENT scores)
- gail_dat_ordered.csv (Gail model scores)
- ukb_qrisk3 (QRISK3 scores, optional)
"""

import subprocess
import sys
from pathlib import Path

# Set script directory
script_dir = Path('/Users/sarahurbut/aladynoulli2/pyScripts/new_oct_revision/new_notebooks')

print("="*80)
print("COMPARING WITH EXTERNAL RISK SCORES")
print("="*80)
print("\nThis will compare Aladynoulli with:")
print("  - PCE (10-year ASCVD)")
print("  - PREVENT (30-year ASCVD)")
print("  - Gail Model (10-year Breast Cancer)")
print("  - QRISK3 (10-year ASCVD, if available)")
print("\nNOTE: Run once, then mark this cell as 'not evaluated'.")
print("="*80)

# Compare for pooled retrospective (main approach)
print("\n1. Comparing pooled_retrospective with external scores...")
result1 = subprocess.run([
    sys.executable,
    str(script_dir / 'compare_with_external_scores.py'),
    '--approach', 'pooled_retrospective',
    '--n_bootstraps', '100'
], capture_output=True, text=True)
print(result1.stdout)
if result1.stderr:
    print("STDERR:", result1.stderr)
if result1.returncode != 0:
    print(f"\n⚠️  WARNING: Script exited with return code {result1.returncode}")
else:
    print("✓ pooled_retrospective comparison completed successfully")



print("\n" + "="*80)
print("EXTERNAL SCORE COMPARISON COMPLETE")
print("="*80)
print("\nResults saved to: results/comparisons/{approach}/external_scores_comparison.csv")

COMPARING WITH EXTERNAL RISK SCORES

This will compare Aladynoulli with:
  - PCE (10-year ASCVD)
  - PREVENT (30-year ASCVD)
  - Gail Model (10-year Breast Cancer)
  - QRISK3 (10-year ASCVD, if available)

NOTE: Run once, then mark this cell as 'not evaluated'.

1. Comparing pooled_retrospective with external scores...
Set random seed to 42 for reproducibility
COMPARING WITH EXTERNAL SCORES: POOLED_RETROSPECTIVE

Loading data...
Loaded external scores file: 400000 patients
Loaded pi tensor: torch.Size([400000, 348, 52])
Loaded Y tensor: torch.Size([407878, 348, 52])
Loaded pce_df: 400000 patients

ASCVD COMPARISON: Aladynoulli vs PCE (10yr) and PREVENT (30yr)

Applying LOESS calibration to predictions...
  Calibrating predictions in batches...
✓ Calibration applied

Computing Aladynoulli predictions...
Processing 400000 patients (this may take 10-20 minutes)...
  Processed 50000/400000 patients (12.5%)...
  Processed 100000/400000 patients (25.0%)...
  Processed 150000/400000 patients 

In [None]:
# ============================================================================
# COMPARE WITH DELPHI (1-YEAR PREDICTIONS WITH 0-YEAR AND 1-YEAR WASHOUT)
# ============================================================================

%run compare_delphi_1yr_import.py



ALADYNOULLI vs DELPHI: 1-YEAR PREDICTION COMPARISON

Loading Aladynoulli results...
Loaded Aladynoulli results for 28 diseases

Extracting Delphi results from supplementary table...
Extracted Delphi results for 28 diseases

Creating comparison...

ALADYNOULLI vs DELPHI: DISEASES WHERE ALADYNOULLI WINS (1-YEAR, 0-YEAR GAP)

Total wins: 15 out of 28 diseases
Win rate: 53.6%

Disease                    Aladynoulli       Delphi    Advantage    Percent
----------------------------------------------------------------------------------------------------
Parkinsons                      0.8217       0.6108       0.2109      34.5%
Prostate_Cancer                 0.8451       0.6636       0.1814      27.3%
Multiple_Sclerosis              0.8343       0.6545       0.1798      27.5%
ASCVD                           0.8949       0.7370       0.1579      21.4%
Atrial_Fib                      0.8162       0.6721       0.1442      21.4%
Breast_Cancer                   0.8182       0.6985       0.1197   

## condition on one model, which events are correlated with the prediction of the model (what events explain the association between the model rpediction and the truth the best) -- there should be a a good way to ablate/reproduce

* wha'ts happenign in the dropout with the washout analysis : look at diseases associated with delta_0-1
diseases 

In [9]:
# ============================================================================
# COMPARE WITH DELPHI (1-YEAR PREDICTIONS WITH 0-YEAR AND 1-YEAR WASHOUT)
# ============================================================================

%run compare_delphi_1yr_import.py



ALADYNOULLI vs DELPHI: 1-YEAR PREDICTION COMPARISON

Loading Aladynoulli results...
Loaded Aladynoulli results for 28 diseases

Extracting Delphi results from supplementary table...
Extracted Delphi results for 28 diseases

Creating comparison...

ALADYNOULLI vs DELPHI: DISEASES WHERE ALADYNOULLI WINS (1-YEAR, 0-YEAR GAP)

Total wins: 15 out of 28 diseases
Win rate: 53.6%

Disease                    Aladynoulli       Delphi    Advantage    Percent
----------------------------------------------------------------------------------------------------
Parkinsons                      0.8217       0.6108       0.2109      34.5%
Prostate_Cancer                 0.8451       0.6636       0.1814      27.3%
Multiple_Sclerosis              0.8343       0.6545       0.1798      27.5%
ASCVD                           0.8949       0.7370       0.1579      21.4%
Atrial_Fib                      0.8162       0.6721       0.1442      21.4%
Breast_Cancer                   0.8182       0.6985       0.1197   



#### COMPARISON VIA COX

# vs cox (in cox baseline without noulli, calculated in R script tdccdoe20.R) # Compare Fixed_Retrospective_Pooled vs Cox Baseline (Age + Sex only) on UK Biobank
import pandas as pd
import numpy as np

# Load Cox baseline results (age + sex only, no Aladyn)
cox_df = pd.read_csv('/Users/sarahurbut/Library/CloudStorage/Dropbox/auc_results_cox_20000_30000train_0_10000test_1121.csv')
# Take first occurrence of each disease (remove duplicates)
cox_baseline = cox_df.groupby('disease_group')['auc'].first().to_dict()

# Load your Fixed_Retrospective_Pooled results (maybe replace 10 year with static?)
df_10yr = pd.read_csv('/Users/sarahurbut/aladynoulli2/pyScripts/new_oct_revision/new_notebooks/comparison_all_approaches_10yr.csv', index_col=0)
df_30yr = pd.read_csv('/Users/sarahurbut/aladynoulli2/pyScripts/new_oct_revision/new_notebooks/comparison_all_approaches_30yr.csv', index_col=0)

# Create comparison
comparison_data = []
for disease in df_10yr.index:
    if disease in cox_baseline:
        comparison_data.append({
            'Disease': disease,
            'Cox_Baseline_AUC': cox_baseline[disease],
            'Your_10yr_AUC': df_10yr.loc[disease, 'Fixed_Retrospective_Pooled'],
            'Your_30yr_AUC': df_30yr.loc[disease, 'Fixed_Retrospective_Pooled'],
            'Improvement_10yr': df_10yr.loc[disease, 'Fixed_Retrospective_Pooled'] - cox_baseline[disease],
            'Improvement_30yr': df_30yr.loc[disease, 'Fixed_Retrospective_Pooled'] - cox_baseline[disease],
        })

comparison_df = pd.DataFrame(comparison_data).set_index('Disease').sort_values('Improvement_10yr', ascending=False)

print("="*100)
print("COMPARISON: Your Fixed_Retrospective_Pooled vs Cox Baseline (Age + Sex only) on UK Biobank")
print("="*100)
print("\n10-YEAR PREDICTIONS:")
print("-"*100)
print(comparison_df[['Cox_Baseline_AUC', 'Your_10yr_AUC', 'Improvement_10yr']].round(4))
print(f"\nMean improvement: {comparison_df['Improvement_10yr'].mean():.4f}")
print(f"Median improvement: {comparison_df['Improvement_10yr'].median():.4f}")
print(f"Diseases with improvement >0.05: {(comparison_df['Improvement_10yr'] > 0.05).sum()} / {len(comparison_df)}")
print(f"Diseases with improvement >0.10: {(comparison_df['Improvement_10yr'] > 0.10).sum()} / {len(comparison_df)}")

print("\n" + "="*100)
print("30-YEAR PREDICTIONS:")
print("-"*100)
print(comparison_df[['Cox_Baseline_AUC', 'Your_30yr_AUC', 'Improvement_30yr']].round(4))
print(f"\nMean improvement: {comparison_df['Improvement_30yr'].mean():.4f}")
print(f"Median improvement: {comparison_df['Improvement_30yr'].median():.4f}")
print(f"Diseases with improvement >0.05: {(comparison_df['Improvement_30yr'] > 0.05).sum()} / {len(comparison_df)}")
print(f"Diseases with improvement >0.10: {(comparison_df['Improvement_30yr'] > 0.10).sum()} / {len(comparison_df)}")

print("\n" + "="*100)
print("TOP IMPROVEMENTS (10-year):")
print("-"*100)
print(comparison_df.nlargest(10, 'Improvement_10yr')[['Cox_Baseline_AUC', 'Your_10yr_AUC', 'Improvement_10yr']].round(4))

# Save comparison
comparison_df.to_csv('comparison_vs_cox_baseline.csv')
print("\n✓ Saved to comparison_vs_cox_baseline.csv")


In [14]:
# ============================================================================
# STEP 6: RUN LOO AND AWS VALIDATION (RUN ONCE, THEN MARK AS "NOT EVALUATED")
# ============================================================================
"""
IMPORTANT: This cell runs validation comparisons:
- LOO vs Full Pooled: Compares leave-one-out validation (phi trained excluding one batch) 
  vs full pooled (phi trained on all batches) for excluded batches
- AWS vs Local: Compares AWS retrospective models vs local retrospective models for batches 0-10
- Run once, then mark as "not evaluated"
"""

import subprocess
import sys
from pathlib import Path

# Set script directory
script_dir = Path('/Users/sarahurbut/aladynoulli2/pyScripts/new_oct_revision/new_notebooks')

print("="*80)
print("RUNNING LOO AND AWS VALIDATION")
print("="*80)
print("\nThis will compare:")
print("  - LOO validation vs Full Pooled (for excluded batches)")
print("  - AWS retrospective models vs Local retrospective models (batches 0-10)")
print("\nNOTE: This takes a while! Run once, then mark this cell as 'not evaluated'.")
print("="*80)

result = subprocess.run([
    sys.executable,
    str(script_dir / 'run_loo_aws_validation.py'),
    '--n_bootstraps', '100'
], capture_output=True, text=True)
print(result.stdout)
if result.stderr:
    print("STDERR:", result.stderr)
if result.returncode != 0:
    print(f"\n⚠️  WARNING: Script exited with return code {result.returncode}")
else:
    print("✓ Validation completed successfully")

print("\n" + "="*80)
print("VALIDATION COMPLETE")
print("="*80)
print("\nResults saved to: results/validation/")

RUNNING LOO AND AWS VALIDATION

This will compare:
  - LOO validation vs Full Pooled (for excluded batches)
  - AWS retrospective models vs Local retrospective models (batches 0-10)

NOTE: This takes a while! Run once, then mark this cell as 'not evaluated'.
LOO AND AWS VALIDATION

Loading model and essentials...
Loading components...
Loaded all components successfully!
torch.Size([10000, 47])

Cluster Sizes:
Cluster 0: 14 diseases
Cluster 1: 7 diseases
Cluster 2: 21 diseases
Cluster 3: 15 diseases
Cluster 4: 17 diseases
Cluster 5: 16 diseases
Cluster 6: 57 diseases
Cluster 7: 18 diseases
Cluster 8: 13 diseases
Cluster 9: 11 diseases
Cluster 10: 18 diseases
Cluster 11: 12 diseases
Cluster 12: 26 diseases
Cluster 13: 7 diseases
Cluster 14: 9 diseases
Cluster 15: 8 diseases
Cluster 16: 7 diseases
Cluster 17: 11 diseases
Cluster 18: 6 diseases
Cluster 19: 55 diseases

Calculating gamma for k=0:
Number of diseases in cluster: 14
Base value (first 5): tensor([-13.8155, -13.8155, -13.1095, -

Validation:
    loo and aws

* LOO: also in lifetime.ipynb # Compare Leave-One-Out vs Full Pooled results
# For batches that were excluded in LOO validation

# Batches excluded in LOO (from the folder list)
excluded_batches = [0, 6, 15, 17, 18, 20, 24, 34, 35, 37]

# Storage for results
loo_10yr_results = []
loo_30yr_results = []
loo_static_10yr_results = []

full_pooled_10yr_results = []
full_pooled_30yr_results = []
full_pooled_static_10yr_results = []

# Load full tensors once
if 'Y_full' not in globals():
    Y_full = torch.load('/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/data_for_running/Y_tensor.pt')
if 'E_full' not in globals():
    E_full = torch.load('/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/data_for_running/E_enrollment_full.pt')

# Loop through excluded batches
for batch_idx in excluded_batches:
    start_idx = batch_idx * 10000
    end_idx = (batch_idx + 1) * 10000
    
    print(f"\n{'='*80}")
    print(f"Processing batch {batch_idx}: {start_idx} to {end_idx}")
    print(f"{'='*80}")
    
    # Get pce_df subset for this batch
    pce_df_subset = pce_df_full[start_idx:end_idx].copy().reset_index(drop=True)
    
    # Extract batch from full tensors
    Y_batch = Y_full[start_idx:end_idx]
    E_batch = E_full[start_idx:end_idx]
    
    # ===== LEAVE-ONE-OUT RESULTS =====
    loo_ckpt_path = f'/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/leave_one_out_validation/batch_{batch_idx}/model_enroll_fixedphi_sex_{start_idx}_{end_idx}.pt'
    
    try:
        print(f"\n--- Leave-One-Out (excluded batch {batch_idx}) ---")
        loo_ckpt = torch.load(loo_ckpt_path, weights_only=False)
        model.load_state_dict(loo_ckpt['model_state_dict'])
        
        model.Y = torch.tensor(Y_batch, dtype=torch.float32)
        model.N = Y_batch.shape[0]
       
        # 10-year predictions
        print(f"LOO - 10 year predictions...")
        loo_10yr = evaluate_major_diseases_wsex_with_bootstrap_dynamic(
            model, Y_batch, E_batch, disease_names, pce_df_subset, 
            n_bootstraps=100, follow_up_duration_years=10, patient_indices=None
        )
        loo_10yr['batch_idx'] = batch_idx
        loo_10yr['analysis_type'] = 'leave_one_out'
        loo_10yr_results.append(loo_10yr)
        
        # 30-year predictions
        print(f"LOO - 30 year predictions...")
        loo_30yr = evaluate_major_diseases_wsex_with_bootstrap_dynamic(
            model, Y_batch, E_batch, disease_names, pce_df_subset, 
            n_bootstraps=100, follow_up_duration_years=30, patient_indices=None
        )
        loo_30yr['batch_idx'] = batch_idx
        loo_30yr['analysis_type'] = 'leave_one_out'
        loo_30yr_results.append(loo_30yr)
        
        # Static 10-year predictions
        print(f"LOO - Static 10 year predictions...")
        loo_static_10yr = evaluate_major_diseases_wsex_with_bootstrap(
            model=model,
            Y_100k=Y_batch,
            E_100k=E_batch,
            disease_names=disease_names,
            pce_df=pce_df_subset,
            n_bootstraps=100,
            follow_up_duration_years=10,
        )
        loo_static_10yr['batch_idx'] = batch_idx
        loo_static_10yr['analysis_type'] = 'leave_one_out'
        loo_static_10yr_results.append(loo_static_10yr)
        
    except FileNotFoundError:
        print(f"LOO checkpoint not found: {loo_ckpt_path}")
    except Exception as e:
        print(f"Error processing LOO checkpoint {batch_idx}: {e}")
        import traceback
        traceback.print_exc()
    
    # ===== FULL POOLED RESULTS =====
    full_pooled_ckpt_path = f'/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/enrollment_predictions_fixedphi_RETROSPECTIVE_pooled/model_enroll_fixedphi_sex_{start_idx}_{end_idx}.pt'
    
    try:
        print(f"\n--- Full Pooled (all 40 batches) ---")
        full_ckpt = torch.load(full_pooled_ckpt_path, weights_only=False)
        model.load_state_dict(full_ckpt['model_state_dict'])
        
        model.Y = torch.tensor(Y_batch, dtype=torch.float32)
        model.N = Y_batch.shape[0]
       
        # 10-year predictions
        print(f"Full Pooled - 10 year predictions...")
        full_10yr = evaluate_major_diseases_wsex_with_bootstrap_dynamic(
            model, Y_batch, E_batch, disease_names, pce_df_subset, 
            n_bootstraps=100, follow_up_duration_years=10, patient_indices=None
        )
        full_10yr['batch_idx'] = batch_idx
        full_10yr['analysis_type'] = 'full_pooled'
        full_pooled_10yr_results.append(full_10yr)
        
        # 30-year predictions
        print(f"Full Pooled - 30 year predictions...")
        full_30yr = evaluate_major_diseases_wsex_with_bootstrap_dynamic(
            model, Y_batch, E_batch, disease_names, pce_df_subset, 
            n_bootstraps=100, follow_up_duration_years=30, patient_indices=None
        )
        full_30yr['batch_idx'] = batch_idx
        full_30yr['analysis_type'] = 'full_pooled'
        full_pooled_30yr_results.append(full_30yr)
        
        # Static 10-year predictions
        print(f"Full Pooled - Static 10 year predictions...")
        full_static_10yr = evaluate_major_diseases_wsex_with_bootstrap(
            model=model,
            Y_100k=Y_batch,
            E_100k=E_batch,
            disease_names=disease_names,
            pce_df=pce_df_subset,
            n_bootstraps=100,
            follow_up_duration_years=10,
        )
        full_static_10yr['batch_idx'] = batch_idx
        full_static_10yr['analysis_type'] = 'full_pooled'
        full_pooled_static_10yr_results.append(full_static_10yr)
        
    except FileNotFoundError:
        print(f"Full pooled checkpoint not found: {full_pooled_ckpt_path}")
    except Exception as e:
        print(f"Error processing full pooled checkpoint {batch_idx}: {e}")
        import traceback
        traceback.print_exc()

print(f"\n{'='*80}")
print("Completed processing!")
print(f"{'='*80}")
print(f"LOO - 10yr: {len(loo_10yr_results)} batches")
print(f"LOO - 30yr: {len(loo_30yr_results)} batches")
print(f"Full Pooled - 10yr: {len(full_pooled_10yr_results)} batches")
print(f"Full Pooled - 30yr: {len(full_pooled_30yr_results)} batches")

# Extract AUCs and compare
def extract_aucs_from_results(results_list):
    aucs_by_batch = {}
    for result in results_list:
        batch_idx = result['batch_idx']
        if batch_idx not in aucs_by_batch:
            aucs_by_batch[batch_idx] = {}
        for disease, metrics in result.items():
            if disease not in ['batch_idx', 'analysis_type'] and isinstance(metrics, dict):
                if 'auc' in metrics:
                    aucs_by_batch[batch_idx][disease] = metrics['auc']
    return aucs_by_batch

loo_10yr_aucs = extract_aucs_from_results(loo_10yr_results)
loo_30yr_aucs = extract_aucs_from_results(loo_30yr_results)
loo_static_10yr_aucs = extract_aucs_from_results(loo_static_10yr_results)

full_10yr_aucs = extract_aucs_from_results(full_pooled_10yr_results)
full_30yr_aucs = extract_aucs_from_results(full_pooled_30yr_results)
full_static_10yr_aucs = extract_aucs_from_results(full_pooled_static_10yr_results)

# Compare using the same function
compare_results(loo_10yr_aucs, full_10yr_aucs, "LEAVE-ONE-OUT vs FULL POOLED - 10-YEAR PREDICTIONS")
compare_results(loo_30yr_aucs, full_30yr_aucs, "LEAVE-ONE-OUT vs FULL POOLED - 30-YEAR PREDICTIONS")
compare_results(loo_static_10yr_aucs, full_static_10yr_aucs, "LEAVE-ONE-OUT vs FULL POOLED - STATIC 10-YEAR PREDICTIONS")


### and then AWS validation (versus AWS)for ten orso baches ... 


from fig5utils import *
import pandas as pd
import numpy as np
import torch

# Load full pce_df
pce_df_full = pd.read_csv('/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/pce_prevent_full.csv')
disease_names = essentials['disease_names']

# Storage for results - SEPARATE variables for each analysis type
# Fixed phi from retrospective AWS data
aws_10yr_results = []
aws_30yr_results = []
aws_static_10yr_results = []

# Fixed phi from RETROSPECTIVE data run locally
fixed_retrospective_10yr_results = []
fixed_retrospective_30yr_results = []
fixed_retrospective_static_10yr_results = []

# Load full tensors once (shared across both analyses)
if 'Y_full' not in globals():
    Y_full = torch.load('/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/data_for_running/Y_tensor.pt')
if 'E_full' not in globals():
    E_full = torch.load('/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/data_for_running/E_enrollment_full.pt')

# Loop through checkpoints 0-10 (10 batches)
for batch_idx in range(11):
    start_idx = batch_idx * 10000
    end_idx = (batch_idx + 1) * 10000
    
    print(f"\n{'='*80}")
    print(f"Processing batch {batch_idx}: {start_idx} to {end_idx}")
    print(f"{'='*80}")
    
    # Get pce_df subset for this batch
    pce_df_subset = pce_df_full[start_idx:end_idx].copy().reset_index(drop=True)
    
    # Extract batch from full tensors (shared for both analyses)
    Y_batch = Y_full[start_idx:end_idx]
    E_batch = E_full[start_idx:end_idx]
    
    # ===== FIXED PHI FROMAWS POOLED DATA =====
    fixed_enrollment_ckpt_path = f'/Users/sarahurbut/Downloads/aws_first_10_batches_models/model_enroll_fixedphi_sex_{start_idx}_{end_idx}.pt'
    
    try:
        print(f"\n--- Fixed Phi (retrospective AWS) ---")
        fixed_ckpt = torch.load(fixed_enrollment_ckpt_path, weights_only=False)
        model.load_state_dict(fixed_ckpt['model_state_dict'])
        
        # Update model.Y and model.N so forward() uses correct patients
        model.Y = torch.tensor(Y_batch, dtype=torch.float32)
        model.N = Y_batch.shape[0]
       
        # 10-year predictions
        print(f"Fixed Phi (retrospective AWS) - 10 year predictions...")
        aws_10yr = evaluate_major_diseases_wsex_with_bootstrap_dynamic(
            model, Y_batch, E_batch, disease_names, pce_df_subset, 
            n_bootstraps=100, follow_up_duration_years=10, patient_indices=None
        )
        aws_10yr['batch_idx'] = batch_idx
        aws_10yr['analysis_type'] = 'fixed_enrollment'
        aws_10yr_results.append(aws_10yr)
        
        # 30-year predictions
        print(f"Fixed Phi (retrospective AWS) - 30 year predictions...")
        aws_30yr = evaluate_major_diseases_wsex_with_bootstrap_dynamic(
            model, Y_batch, E_batch, disease_names, pce_df_subset, 
            n_bootstraps=100, follow_up_duration_years=30, patient_indices=None
        )
        aws_30yr['batch_idx'] = batch_idx
        aws_30yr['analysis_type'] = 'fixed_enrollment'
        aws_30yr_results.append(aws_30yr)
        
        # Static 10-year predictions (using 1-year score)
        print(f"Fixed Phi (retrospective AWS) - Static 10 year predictions...")
        aws_static_10yr = evaluate_major_diseases_wsex_with_bootstrap(
            model=model,
            Y_100k=Y_batch,
            E_100k=E_batch,
            disease_names=disease_names,
            pce_df=pce_df_subset,
            n_bootstraps=100,
            follow_up_duration_years=10,
        )
        aws_static_10yr['batch_idx'] = batch_idx
        aws_static_10yr['analysis_type'] = 'fixed_enrollment'
        aws_static_10yr_results.append(aws_static_10yr)
        
    except FileNotFoundError:
        print(f"Fixed phi (ENROLLMENT) checkpoint not found: {fixed_enrollment_ckpt_path}")
    except Exception as e:
        print(f"Error processing fixed phi (ENROLLMENT) checkpoint {batch_idx}: {e}")
        import traceback
        traceback.print_exc()
    
    # ===== FIXED PHI FROM RETROSPECTIVE DATA =====
    fixed_retrospective_ckpt_path = f'/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/enrollment_predictions_fixedphi_RETROSPECTIVE_pooled/model_enroll_fixedphi_sex_{start_idx}_{end_idx}.pt'
    
    try:
        print(f"\n--- Fixed Phi (RETROSPECTIVE) ---")
        fixed_ckpt = torch.load(fixed_retrospective_ckpt_path, weights_only=False)
        model.load_state_dict(fixed_ckpt['model_state_dict'])
        
        # Update model.Y and model.N so forward() uses correct patients
        model.Y = torch.tensor(Y_batch, dtype=torch.float32)
        model.N = Y_batch.shape[0]
       
        # 10-year predictions
        print(f"Fixed Phi (RETROSPECTIVE) - 10 year predictions...")
        fixed_10yr = evaluate_major_diseases_wsex_with_bootstrap_dynamic(
            model, Y_batch, E_batch, disease_names, pce_df_subset, 
            n_bootstraps=100, follow_up_duration_years=10, patient_indices=None
        )
        fixed_10yr['batch_idx'] = batch_idx
        fixed_10yr['analysis_type'] = 'fixed_retrospective'
        fixed_retrospective_10yr_results.append(fixed_10yr)
        
        # 30-year predictions
        print(f"Fixed Phi (RETROSPECTIVE) - 30 year predictions...")
        fixed_30yr = evaluate_major_diseases_wsex_with_bootstrap_dynamic(
            model, Y_batch, E_batch, disease_names, pce_df_subset, 
            n_bootstraps=100, follow_up_duration_years=30, patient_indices=None
        )
        fixed_30yr['batch_idx'] = batch_idx
        fixed_30yr['analysis_type'] = 'fixed_retrospective'
        fixed_retrospective_30yr_results.append(fixed_30yr)
        
        # Static 10-year predictions (using 1-year score)
        print(f"Fixed Phi (RETROSPECTIVE) - Static 10 year predictions...")
        fixed_static_10yr = evaluate_major_diseases_wsex_with_bootstrap(
            model=model,
            Y_100k=Y_batch,
            E_100k=E_batch,
            disease_names=disease_names,
            pce_df=pce_df_subset,
            n_bootstraps=100,
            follow_up_duration_years=10,
        )
        fixed_static_10yr['batch_idx'] = batch_idx
        fixed_static_10yr['analysis_type'] = 'fixed_retrospective'
        fixed_retrospective_static_10yr_results.append(fixed_static_10yr)
        
    except FileNotFoundError:
        print(f"Fixed phi (RETROSPECTIVE) checkpoint not found: {fixed_retrospective_ckpt_path}")
    except Exception as e:
        print(f"Error processing fixed phi (RETROSPECTIVE) checkpoint {batch_idx}: {e}")
        import traceback
        traceback.print_exc()

print(f"\n{'='*80}")
print("Completed processing all checkpoints!")
print(f"{'='*80}")
print(f"Fixed Enrollment - 10yr: {len(fixed_enrollment_10yr_results)} batches")
print(f"Fixed Enrollment - 30yr: {len(fixed_enrollment_30yr_results)} batches")
print(f"Fixed Retrospective - 10yr: {len(fixed_retrospective_10yr_results)} batches")
print(f"Fixed Retrospective - 30yr: {len(fixed_retrospective_30yr_results)} batches")
)