In [None]:
# Load model predictions (pi) and signature loadings (lambda/theta) for a patient with diabetes
# This demonstrates how Aladynoulli can predict subsequent diseases

import torch
from pathlib import Path
import pandas as pd
import numpy as np

print("="*80)
print("MODEL PREDICTIONS: USING PI AND LAMBDA TO PREDICT SUBSEQUENT DISEASES")
print("="*80)

# Try to load pi predictions from age offset analy
# Load disease names
disease_names_path = Path("/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/data_for_running/disease_names.csv")
if disease_names_path.exists():
    disease_names_df = pd.read_csv(disease_names_path)
    # Disease names are in column 1 (the "x" column), not column 0
    # Column 0 is the row number/ID
    # pandas.read_csv uses first row as column names, so iloc[:, 1] gives us the disease names
    disease_names = disease_names_df.iloc[:, 1].tolist()
    # Remove header value "x" if it's the first element
    if len(disease_names) > 0 and str(disease_names[0]).lower() == 'x':
        disease_names = disease_names[1:]
    # Convert all disease names to strings (they might be integers or have NaN)
    disease_names = [str(name) if pd.notna(name) else f"Disease_{i}" for i, name in enumerate(disease_names)]
    print(f"✓ Loaded {len(disease_names)} disease names")
    print(f"  First few: {disease_names[:5]}")
    print(f"  Looking for diabetes...")
else:
    disease_names = [f"Disease_{i}" for i in range(pi_predictions.shape[1])]
    print("⚠️  Using placeholder disease names")

# Load Y and E to find patients with MI → Cancer progression
Y_path = Path("/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/data_for_running/Y_tensor.pt")
E_path = Path("/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/data_for_running/E_matrix.pt")

# Try to load pi_full_400k.pt (full dataset)
pi_full_path = Path.home() / "Downloads" / "pi_full_400k.pt"
#pi_full_path = Path("/Users/sarahurbut/Library/CloudStorage/Dropbox-Personal/enrollment_predictions_fixedphi_RETROSPECTIVE_pooled/pi_enroll_fixedphi_sex_FULL.pt")
if pi_full_path.exists():
    print(f"\nLoading full 400K pi predictions from: {pi_full_path}")
    pi_predictions = torch.load(str(pi_full_path), weights_only=False)
    print(f"✓ Loaded full pi predictions: {pi_predictions.shape}")
    use_full_dataset = True
else:
    print(f"\n⚠️  pi_full_400k.pt not found, using batch file")
    use_full_dataset = False

if Y_path.exists() and E_path.exists():
    Y_full = torch.load(str(Y_path), weights_only=False)
    E_full = torch.load(str(E_path), weights_only=False)
    
    if use_full_dataset:
        # Use full dataset
        Y_batch = Y_full
        E_batch = E_full
        print(f"✓ Using full dataset: {len(Y_batch)} patients")
    else:
        # Subset to batch 0-10000 (matching pi predictions)
        Y_batch = Y_full[0:10000]
        E_batch = E_full[0:10000]
        print(f"✓ Using subset: {len(Y_batch)} patients")
    
    # Find disease indices - MI (Myocardial Infarction) and Cancer
    mi_idx = None  # Myocardial Infarction
    cancer_indices = []  # Cancer diseases (excluding skin cancers)
    serious_cancer_indices = []  # Serious cancers (lung, breast, colon, etc.)
    
    # Terms to exclude (skin cancers, benign, precancerous, etc.)
    exclude_terms = [
        'skin', 'melanoma', 'basal cell', 'squamous cell', 'benign', 
        'precancerous', 'pre-cancerous', 'premalignant', 'pre-malignant',
        'in situ', 'carcinoma in situ', 'dysplasia', 'hyperplasia'
    ]
    
    # Serious cancer terms (must be combined with "cancer", "carcinoma", "malignant", or "neoplasm")
    serious_cancer_organs = [
        'lung', 'bronchus', 'breast', 'colon', 'colorectal', 'pancreas', 'liver', 
        'stomach', 'esophagus', 'ovary', 'prostate', 'bladder', 'kidney', 
        'brain', 'leukemia', 'lymphoma', 'myeloma', 'thyroid'
    ]
    
    # Required cancer indicators (must have one of these)
    cancer_indicators = ['malignant', 'carcinoma', 'cancer', 'neoplasm']
    
    for i, name in enumerate(disease_names):
        name_str = str(name).lower()
        if 'myocardial infarction' in name_str and mi_idx is None:
            mi_idx = i
        
        # Check if it's a cancer - must have a cancer indicator AND not be excluded
        is_cancer = False
        is_serious = False
        
        # First check if it has a cancer indicator
        has_cancer_indicator = any(indicator in name_str for indicator in cancer_indicators)
        
        if has_cancer_indicator:
            # Check for serious cancers (organ-specific cancers)
            for organ in serious_cancer_organs:
                if organ in name_str:
                    is_cancer = True
                    is_serious = True
                    break
            
            # If not a serious cancer, check if it's a general cancer term
            if not is_cancer:
                # Must have "malignant" or "carcinoma" to be considered cancer (not just "cancer" or "neoplasm")
                if 'malignant' in name_str or 'carcinoma' in name_str:
                    is_cancer = True
        
        # Exclude if it contains exclude terms
        if is_cancer:
            should_exclude = False
            for exclude_term in exclude_terms:
                if exclude_term in name_str:
                    should_exclude = True
                    break
            
            if not should_exclude and i not in cancer_indices:
                cancer_indices.append(i)
                if is_serious:
                    serious_cancer_indices.append(i)
    
    # Use first cancer index for display
    cancer_idx = cancer_indices[0] if len(cancer_indices) > 0 else None
    
    print(f"\nDisease indices:")
    if mi_idx is not None:
        print(f"  Myocardial Infarction: index {mi_idx} ({disease_names[mi_idx]})")
    if len(cancer_indices) > 0:
        print(f"  Cancer diseases found: {len(cancer_indices)} (excluding skin cancers)")
        print(f"  Serious cancers: {len(serious_cancer_indices)}")
        print(f"  Sample serious cancers:")
        for idx in serious_cancer_indices[:10]:  # Show first 10 serious cancers
            print(f"    - index {idx}: {disease_names[idx]}")
    
    if mi_idx is not None and len(cancer_indices) > 0:
        # HARDCODED: Use patient 5565
        patient_idx = 5565
        print(f"\nUsing hardcoded Patient {patient_idx}")
        
        # Get event times for this patient
        event_times = E_batch[patient_idx]
        
        # Check if patient has MI
        has_mi = event_times[mi_idx] < 51
        
        if not has_mi:
            print(f"⚠️  Patient {patient_idx} does not have MI")
        else:
            mi_age = event_times[mi_idx].item()
            print(f"  MI at age {mi_age + 30}")
            
            # Check if patient has ANY cancer (prioritize serious cancers)
            cancer_ages = []
            serious_cancer_ages = []
            
            for cancer_d_idx in cancer_indices:
                if event_times[cancer_d_idx] < 51:
                    cancer_age = event_times[cancer_d_idx].item()
                    cancer_ages.append((cancer_d_idx, cancer_age))
                    if cancer_d_idx in serious_cancer_indices:
                        serious_cancer_ages.append((cancer_d_idx, cancer_age))
            
            if len(cancer_ages) > 0:
                # Prefer serious cancers
                if len(serious_cancer_ages) > 0:
                    earliest_cancer_idx, earliest_cancer_age = min(serious_cancer_ages, key=lambda x: x[1])
                    print(f"  Serious cancer found: {disease_names[earliest_cancer_idx]} at age {earliest_cancer_age + 30}")
                else:
                    earliest_cancer_idx, earliest_cancer_age = min(cancer_ages, key=lambda x: x[1])
                    print(f"  Cancer found: {disease_names[earliest_cancer_idx]} at age {earliest_cancer_age + 30}")
                
                # Check if MI occurs first
                if mi_age <= earliest_cancer_age:
                    print(f"  ✓ MI occurs before cancer (MI: age {mi_age + 30}, Cancer: age {earliest_cancer_age + 30})")
                    
                    # Calculate risk ratios
                    t_mi = mi_age
                    pi_at_mi = pi_predictions[patient_idx, :, t_mi]
                    population_baseline = pi_predictions[:, :, t_mi].mean(dim=0)
                    
                    cancer_pred_final = pi_at_mi[earliest_cancer_idx].item()
                    cancer_pop_final = population_baseline[earliest_cancer_idx].item()
                    cancer_rr_final = cancer_pred_final / cancer_pop_final if cancer_pop_final > 0 else 0
                    
                    print(f"\n✓ Patient {patient_idx} (Cancer RR={cancer_rr_final:.2f}x)")
                    print(f"\nExample Patient: Patient {patient_idx}")
                    print(f"  Disease progression:")
                    print(f"    1. Myocardial Infarction at age {mi_age + 30}")
                    print(f"    2. Cancer ({disease_names[earliest_cancer_idx]}) at age {earliest_cancer_age + 30}")
                    print(f"  Total diseases: {(E_batch[patient_idx] < 51).sum().item()}")
                    
                    # Update for display
                    cancer_idx = earliest_cancer_idx
                    cancer_age = earliest_cancer_age
                    
                    print(f"\nCalculating population baseline risks...")
                else:
                    print(f"  ⚠️  Cancer occurs before MI (MI: age {mi_age + 30}, Cancer: age {earliest_cancer_age + 30})")
                    print("  This patient doesn't fit the MI → Cancer progression pattern")
                    patient_idx = None
            else:
                print(f"  ⚠️  Patient {patient_idx} does not have cancer")
                patient_idx = None
        
        # Only continue if we have a valid patient
        if patient_idx is not None:
            # Find top predicted subsequent diseases (excluding MI)
            other_diseases = [i for i in range(len(disease_names)) if i != mi_idx]
            pi_other = pi_at_mi[other_diseases]
            top_indices = torch.argsort(pi_other, descending=True)[:10]
            top_diseases = [other_diseases[i] for i in top_indices]
            
            print(f"\nTop 10 Predicted Subsequent Diseases (at MI diagnosis):")
            print("  Disease                          Predicted  Population  Risk Ratio")
            print("  " + "-"*70)
            for d_idx in top_diseases:
                pred_risk = pi_at_mi[d_idx].item()
                pop_risk = population_baseline[d_idx].item()
                risk_ratio = pred_risk / pop_risk if pop_risk > 0 else float('inf')
                marker = " ⭐" if d_idx in cancer_indices and risk_ratio > 1.2 else ""
                print(f"  {disease_names[d_idx][:30]:30s} {pred_risk:.4f}    {pop_risk:.4f}     {risk_ratio:.2f}x{marker}")
            
            print(f"\n  Risk Ratio = Predicted Risk / Population Risk")
            print(f"  Values > 1.0 indicate elevated risk relative to population average")
            print(f"  ⭐ = Cancer disease with elevated risk")
            
            # Check what actually happened
            print(f"\nActual Subsequent Diseases (after MI diagnosis):")
            subsequent_diseases = []
            for d_idx in range(len(disease_names)):
                if d_idx != mi_idx and E_batch[patient_idx, d_idx] < 51:
                    subsequent_age = E_batch[patient_idx, d_idx].item() + 30
                    if subsequent_age > mi_age + 30:  # After MI diagnosis
                        pred_risk = pi_at_mi[d_idx].item()
                        pop_risk = population_baseline[d_idx].item()
                        risk_ratio = pred_risk / pop_risk if pop_risk > 0 else float('inf')
                        subsequent_diseases.append((d_idx, subsequent_age, pred_risk, pop_risk, risk_ratio))
            
            if len(subsequent_diseases) > 0:
                subsequent_diseases.sort(key=lambda x: x[1])  # Sort by age
                print("  Disease                          Age    Predicted  Population  Risk Ratio")
                print("  " + "-"*75)
                for d_idx, age, pred_risk, pop_risk, risk_ratio in subsequent_diseases[:15]:
                    marker = " ⭐" if d_idx in cancer_indices and risk_ratio > 1.2 else ""
                    print(f"  {disease_names[d_idx][:30]:30s} {age:3.0f}   {pred_risk:.4f}    {pop_risk:.4f}     {risk_ratio:.2f}x{marker}")
                
                # Highlight Cancer specifically
                if len(cancer_indices) > 0:
                    print(f"\n  Key Subsequent Diseases: Cancer")
                    for d_idx, age, pred_risk, pop_risk, risk_ratio in subsequent_diseases:
                        if d_idx in cancer_indices:
                            print(f"    {disease_names[d_idx][:40]:40s} Age {age:3.0f}  Pred: {pred_risk:.4f}  Pop: {pop_risk:.4f}  RR: {risk_ratio:.2f}x")
                
                # Summary statistics
                elevated_risk = [s for s in subsequent_diseases if s[4] > 1.5]  # Risk ratio > 1.5
                print(f"\n  Summary:")
                print(f"  - Total subsequent diseases: {len(subsequent_diseases)}")
                print(f"  - Diseases with elevated risk (RR > 1.5x): {len(elevated_risk)}")
                if len(elevated_risk) > 0:
                    avg_rr = sum(s[4] for s in elevated_risk) / len(elevated_risk)
                    print(f"  - Average risk ratio for elevated diseases: {avg_rr:.2f}x")
                
                print(f"\n✓ Aladynoulli predicted {len([s for s in subsequent_diseases if s[2] > 0.01])} subsequent diseases (predicted risk > 0.01)")
                print(f"✓ Patient actually developed {len(subsequent_diseases)} subsequent diseases")
                print(f"✓ Patient developed CANCER after MI, demonstrating that 'competing risks' can both occur")
            else:
                print("  (No subsequent diseases yet)")
            
            print("\n" + "="*80)
            print("KEY INSIGHT:")
            print("="*80)
            print("Traditional competing risk models assume MI and Cancer are EXCLUSIVE - you die from one or the other.")
            print("But this patient developed BOTH - MI first, then Cancer.")
            print("Aladynoulli can predict cancer risk EVEN AFTER MI diagnosis.")
            print("This demonstrates that 'competing risks' are not truly exclusive - patients can develop multiple serious conditions.")
            print("Aladynoulli's multi-disease approach correctly models this clinical reality.")
        else:
            print("\n⚠️  Could not find a patient with MI → Cancer progression")
            print("   Falling back to any patient with MI as first disease...")
            # Fallback: find any patient with MI as first disease
            mi_first_patients = []
            for patient_idx in range(len(E_batch)):
                event_times = E_batch[patient_idx]
                diagnosed_diseases = torch.where(event_times < 51)[0]
                if len(diagnosed_diseases) > 0:
                    earliest_time = event_times[diagnosed_diseases].min()
                    earliest_diseases = diagnosed_diseases[event_times[diagnosed_diseases] == earliest_time]
                    if mi_idx in earliest_diseases:
                        mi_first_patients.append(patient_idx)
            
            if len(mi_first_patients) > 0:
                patient_idx = mi_first_patients[0]
                mi_age = E_batch[patient_idx, mi_idx].item()
                print(f"\nUsing Patient {patient_idx} with MI at age {mi_age + 30}")
                
                # Check if this patient also has cancer
                cancer_ages = []
                for cancer_d_idx in cancer_indices:
                    if E_batch[patient_idx, cancer_d_idx] < 51:
                        cancer_ages.append((cancer_d_idx, E_batch[patient_idx, cancer_d_idx].item()))
                
                if len(cancer_ages) > 0:
                    earliest_cancer_idx, cancer_age = min(cancer_ages, key=lambda x: x[1])
                    print(f"  Also has Cancer ({disease_names[earliest_cancer_idx]}) at age {cancer_age + 30}")
                    cancer_idx = earliest_cancer_idx
                else:
                    print("  Does not have cancer")
                    cancer_idx = None
                
                print(f"  Total diseases: {(E_batch[patient_idx] < 51).sum().item()}")
                
                # Continue with analysis
                t_mi = mi_age
                pi_at_mi = pi_predictions[patient_idx, :, t_mi]
                print(f"\nCalculating population baseline risks...")
                population_baseline = pi_predictions[:, :, t_mi].mean(dim=0)
                
                # Find top predicted subsequent diseases
                other_diseases = [i for i in range(len(disease_names)) if i != mi_idx]
                pi_other = pi_at_mi[other_diseases]
                top_indices = torch.argsort(pi_other, descending=True)[:10]
                top_diseases = [other_diseases[i] for i in top_indices]
                
                print(f"\nTop 10 Predicted Subsequent Diseases (at MI diagnosis):")
                print("  Disease                          Predicted  Population  Risk Ratio")
                print("  " + "-"*70)
                for d_idx in top_diseases:
                    pred_risk = pi_at_mi[d_idx].item()
                    pop_risk = population_baseline[d_idx].item()
                    risk_ratio = pred_risk / pop_risk if pop_risk > 0 else float('inf')
                    marker = " ⭐" if d_idx in cancer_indices and risk_ratio > 1.2 else ""
                    print(f"  {disease_names[d_idx][:30]:30s} {pred_risk:.4f}    {pop_risk:.4f}     {risk_ratio:.2f}x{marker}")
                
                print(f"\n  Risk Ratio = Predicted Risk / Population Risk")
                print(f"  Values > 1.0 indicate elevated risk relative to population average")
                print(f"  ⭐ = Cancer disease with elevated risk")
                
                # Check what actually happened
                print(f"\nActual Subsequent Diseases (after MI diagnosis):")
                subsequent_diseases = []
                for d_idx in range(len(disease_names)):
                    if d_idx != mi_idx and E_batch[patient_idx, d_idx] < 51:
                        subsequent_age = E_batch[patient_idx, d_idx].item() + 30
                        if subsequent_age > mi_age + 30:
                            pred_risk = pi_at_mi[d_idx].item()
                            pop_risk = population_baseline[d_idx].item()
                            risk_ratio = pred_risk / pop_risk if pop_risk > 0 else float('inf')
                            subsequent_diseases.append((d_idx, subsequent_age, pred_risk, pop_risk, risk_ratio))
                
                if len(subsequent_diseases) > 0:
                    subsequent_diseases.sort(key=lambda x: x[1])
                    print("  Disease                          Age    Predicted  Population  Risk Ratio")
                    print("  " + "-"*75)
                    for d_idx, age, pred_risk, pop_risk, risk_ratio in subsequent_diseases[:15]:
                        marker = " ⭐" if d_idx in cancer_indices and risk_ratio > 1.2 else ""
                        print(f"  {disease_names[d_idx][:30]:30s} {age:3.0f}   {pred_risk:.4f}    {pop_risk:.4f}     {risk_ratio:.2f}x{marker}")
                    
                    # Highlight Cancer
                    if len(cancer_indices) > 0:
                        print(f"\n  Key Subsequent Diseases: Cancer")
                        for d_idx, age, pred_risk, pop_risk, risk_ratio in subsequent_diseases:
                            if d_idx in cancer_indices:
                                print(f"    {disease_names[d_idx][:40]:40s} Age {age:3.0f}  Pred: {pred_risk:.4f}  Pop: {pop_risk:.4f}  RR: {risk_ratio:.2f}x")
                    
                    # Summary statistics
                    elevated_risk = [s for s in subsequent_diseases if s[4] > 1.5]
                    print(f"\n  Summary:")
                    print(f"  - Total subsequent diseases: {len(subsequent_diseases)}")
                    print(f"  - Diseases with elevated risk (RR > 1.5x): {len(elevated_risk)}")
                    if len(elevated_risk) > 0:
                        avg_rr = sum(s[4] for s in elevated_risk) / len(elevated_risk)
                        print(f"  - Average risk ratio for elevated diseases: {avg_rr:.2f}x")
                    
                    print(f"\n✓ Aladynoulli predicted {len([s for s in subsequent_diseases if s[2] > 0.01])} subsequent diseases (predicted risk > 0.01)")
                    print(f"✓ Patient actually developed {len(subsequent_diseases)} subsequent diseases")
                    if cancer_idx is not None:
                        print(f"✓ Patient developed CANCER after MI, demonstrating that 'competing risks' can both occur")
                else:
                    print("  (No subsequent diseases yet)")
                
                print("\n" + "="*80)
                print("KEY INSIGHT:")
                print("="*80)
                print("Traditional competing risk models assume MI and Cancer are EXCLUSIVE - you die from one or the other.")
                if cancer_idx is not None:
                    print("But this patient developed BOTH - MI first, then Cancer.")
                    print("Aladynoulli can predict cancer risk EVEN AFTER MI diagnosis.")
                print("This demonstrates that 'competing risks' are not truly exclusive - patients can develop multiple serious conditions.")
                print("Aladynoulli's multi-disease approach correctly models this clinical reality.")
            else:
                print("\n⚠️  Could not find any patient with MI as first disease")
                patient_idx = None
    else:
        print("\n⚠️  Could not find required disease indices (MI, Cancer)")
else:
    print("\n⚠️  Could not load Y/E tensors for patient identification")
else:
print(f"\n⚠️  Pi predictions file not found: {pi_path}")
print("   This example requires age offset predictions to be generated first.")
print("   The concept still applies: Aladynoulli uses pi and lambda to predict")
print("   subsequent diseases after the first diagnosis.")


MODEL PREDICTIONS: USING PI AND LAMBDA TO PREDICT SUBSEQUENT DISEASES

Loading pi predictions from: /Users/sarahurbut/Library/CloudStorage/Dropbox/age_offset_files/pi_enroll_fixedphi_age_offset_0_sex_0_10000_try2_withpcs_newrun.pt
✓ Loaded pi predictions: torch.Size([10000, 348, 52])
✓ Loaded 348 disease names
  First few: ['Bacterial enteritis', 'Viral Enteritis', 'Gram negative septicemia', 'Bacterial infection NOS', 'Staphylococcus infections']
  Looking for diabetes...

Loading full 400K pi predictions from: /Users/sarahurbut/Downloads/pi_full_400k.pt
✓ Loaded full pi predictions: torch.Size([400000, 348, 52])
✓ Using full dataset: 407878 patients

Disease indices:
  Myocardial Infarction: index 112 (Myocardial infarction)
  Cancer diseases found: 14 (excluding skin cancers)
  Serious cancers: 9
  Sample serious cancers:
    - index 10: Colon cancer
    - index 13: Cancer of bronchus; lung
    - index 16: Breast cancer [female]
    - index 17: Malignant neoplasm of female breast
  

In [1]:
# Compare AWS vs Local pi files
%run "/Users/sarahurbut/aladynoulli2/pyScripts/new_oct_revision/new_notebooks/analyze_patient5565_risk_ratio.py"

ANALYZING PATIENT 5565 RISK RATIO DIFFERENCE

1. Loading pi files (first 10000 patients for patient 5565 analysis)...
   (This may take a moment to load the files...)
   ✓ Loaded AWS: torch.Size([10000, 348, 52])
   ✓ Loaded Local: torch.Size([10000, 348, 52])

2. Found prostate cancer: index 21 - Cancer of prostate

3. Patient 5565 prostate cancer analysis:

   Time Point  AWS_pi      Local_pi    AWS_baseline  Local_baseline  AWS_RR    Local_RR   RR_Diff
   -----------------------------------------------------------------------------------------------
    0         0.000000  0.000000  0.000000  0.000000  1.0000x  1.0000x  0.0000x
    1         0.000000  0.000000  0.000000  0.000000  1.0000x  1.0000x  0.0000x
    2         0.000000  0.000000  0.000000  0.000000  0.9998x  0.9998x  0.0000x
    3         0.000000  0.000000  0.000000  0.000000  1.0422x  1.0462x  0.0040x
    4         0.000000  0.000000  0.000000  0.000000  1.0685x  1.0723x  0.0038x
    5         0.000000  0.000000  0.00000

## 4. Explanation: Decreasing Hazards at Old Age

The reviewer expressed concern about decreasing hazards at old age. This is **NOT a model failure** but reflects real phenomena:


In [23]:
print("="*80)
print("EXPLANATION: DECREASING HAZARDS AT OLD AGE")
print("="*80)
print("\nThis is NOT a model failure but reflects:")
print("\n1. ADMINISTRATIVE CENSORING:")
print("   - All individuals censored at age 80 (standard in biobank analyses)")
print("   - Creates interval censoring that appears as declining hazard")
print("   - Limited follow-up beyond age 80 in UK Biobank")
print("\n2. COMPETING RISK OF DEATH:")
print("   - Individuals at age 75+ face high mortality risk")
print("   - Those who survive to 80 are SELECTED HEALTHY SURVIVORS")
print("   - Creates apparent risk reduction (survival bias)")
print("   - This is a REAL PHENOMENON, not a model artifact")
print("\n3. HEALTHY SURVIVOR EFFECT:")
print("   - Patients who survive to old age without disease are genuinely lower risk")
print("   - The model correctly captures this selection effect")
print("   - This is clinically meaningful: older patients without disease are healthier")
print("\nINTERPRETATION: The decreasing hazards at old age reflect both")
print("administrative censoring and the competing risk of death.")
print("This is EXPECTED and does not indicate model failure.")


EXPLANATION: DECREASING HAZARDS AT OLD AGE

This is NOT a model failure but reflects:

1. ADMINISTRATIVE CENSORING:
   - All individuals censored at age 80 (standard in biobank analyses)
   - Creates interval censoring that appears as declining hazard
   - Limited follow-up beyond age 80 in UK Biobank

2. COMPETING RISK OF DEATH:
   - Individuals at age 75+ face high mortality risk
   - Those who survive to 80 are SELECTED HEALTHY SURVIVORS
   - Creates apparent risk reduction (survival bias)
   - This is a REAL PHENOMENON, not a model artifact

3. HEALTHY SURVIVOR EFFECT:
   - Patients who survive to old age without disease are genuinely lower risk
   - The model correctly captures this selection effect
   - This is clinically meaningful: older patients without disease are healthier

INTERPRETATION: The decreasing hazards at old age reflect both
administrative censoring and the competing risk of death.
This is EXPECTED and does not indicate model failure.
