## Imports

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
# 📦 Core Imports
import os
import sys
import json
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingWarmRestarts
from sklearn.metrics import mean_absolute_error, r2_score

# 🧠 Project Root
project_root = "/content/drive/MyDrive/BrainAgeRegression"

# 🛣️ Add Project Modules to Path
sys.path.append(project_root)
sys.path.append(os.path.join(project_root, "models"))

# 🛠️ Custom Utilities
from utils.utils import (
    BrainAgeDataset, set_seed, count_parameters,
    split_dataframe, brain_mri_augment, stratified_split
)
from utils.train_utils import (
    BrainAgeTrainer, compute_age_weights, compute_balanced_age_weights
)
from utils.eval_utils import BrainAgeEvaluator
from utils.resnet import ResNet3D
from utils.brain_age_analysis import BrainAgeAnalysis

# ⚙️ Device Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# 🔁 Reproducibility
set_seed(42)

# 💾 Directory Setup
save_dir = os.path.join(project_root, "saved_models", "healthy_model")
results_dir = os.path.join(project_root, "results")
metrics_dir = os.path.join(results_dir, "metrics")
plots_dir = os.path.join(results_dir, "plots")

# 📁 Ensure All Output Directories Exist
os.makedirs(save_dir, exist_ok=True)
os.makedirs(metrics_dir, exist_ok=True)
os.makedirs(plots_dir, exist_ok=True)



Using device: cuda


# Data / Model Loading

In [3]:
# 🧠 1. Load Metadata
df = pd.read_csv('/content/drive/MyDrive/BrainAgeRegression/data/matched_metadata.csv')
df['CDR'] = pd.to_numeric(df['CDR'], errors='coerce')  # Convert blanks to NaN

# ✅ 2. Define Healthy and Unhealthy Groups
# Healthy = CDR == 0.0 OR (CDR is missing AND Age < 65)
healthy_df = df[(df['CDR'] == 0.0) | (df['CDR'].isna() & (df['Age'] < 65))].copy()
unhealthy_df = df[df['CDR'] > 0].copy()

# 🧾 Summary
num_unknown_total = df['CDR'].isna().sum()
num_unknown_used = healthy_df['CDR'].isna().sum()
num_unknown_excluded = num_unknown_total - num_unknown_used
percent_unknown = (num_unknown_total / len(df)) * 100

print(f"🧠 Healthy individuals: {len(healthy_df)}")
print(f"⚠️ Unhealthy individuals: {len(unhealthy_df)}")
print(f"❓ Unknown CDR (excluded): {num_unknown_excluded}")
print(f"📊 Total unknown CDR: {num_unknown_total} ({percent_unknown:.2f}%)")

# 📂 3. Stratified Split of Healthy Data
train_df, val_df, test_df = stratified_split(healthy_df, bins=8)

# 🧪 4. Compute Age Weights with Custom Boost (on training set only)
custom_boost = [1.0, 1.0, 1.0, 1.4, 1.7, 1.5, 1.2, 1.1]
age_weights = compute_balanced_age_weights(train_df, bins=8, custom_boost=custom_boost)

# 🧠 5. Initialize Model
model = ResNet3D(layers=[1, 2, 2]).to(device)
print(f"🔢 Total trainable parameters: {count_parameters(model):,}")

# 🧾 6. Dataset & DataLoader Setup
nifti_dir = '/content/drive/MyDrive/BrainAgeRegression/data/nifti'

train_dataset = BrainAgeDataset(train_df, nifti_dir=nifti_dir, transform=None, age_weights=age_weights)
val_dataset = BrainAgeDataset(val_df, nifti_dir=nifti_dir, transform=None)
test_dataset = BrainAgeDataset(test_df, nifti_dir=nifti_dir, transform=None)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=4, pin_memory=True)

# 📊 7. Age Bin Distribution (for reference)
train_df['age_bin'] = pd.qcut(train_df['Age'], q=8, duplicates='drop')
print(train_df['age_bin'].value_counts().sort_index())


🧠 Healthy individuals: 288
⚠️ Unhealthy individuals: 87
❓ Unknown CDR (excluded): 0
📊 Total unknown CDR: 172 (45.87%)

📊 Age Bin Weights:
(17.999, 20.0]: 0.4744
(20.0, 22.0]: 0.5447
(22.0, 25.0]: 0.6684
(25.0, 34.0]: 0.9804
(34.0, 51.0]: 1.0000
(51.0, 68.0]: 0.8824
(68.0, 78.0]: 0.7059
(78.0, 93.0]: 0.6471
🔢 Total trainable parameters: 8,000,771
age_bin
(17.999, 20.0]    31
(20.0, 22.0]      27
(22.0, 25.0]      22
(25.0, 34.0]      21
(34.0, 51.0]      25
(51.0, 68.0]      25
(68.0, 78.0]      25
(78.0, 93.0]      25
Name: count, dtype: int64


# Loss, Optimizer, Scheduler, Age Weights

In [4]:
# ⚙️ 1. Loss, Optimizer, Scheduler Setup
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=5e-4)
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)

In [5]:
import importlib
import utils.train_utils
importlib.reload(utils.train_utils)

<module 'utils.train_utils' from '/content/drive/MyDrive/BrainAgeRegression/utils/train_utils.py'>

In [6]:
from utils.train_utils import (
    BrainAgeTrainer, compute_age_weights, compute_balanced_age_weights
)

# Train The Model

In [7]:
# 🧠 2. Initialize Trainer
trainer = BrainAgeTrainer(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    scheduler=scheduler,
    augment=True,               # Apply data augmentation
    use_weighted_loss=True,     # Use age-weighted loss
    early_stopping_patience=12  # Stop if no val improvement
)

# 🚀 3. Train the Model
trainer.train(epochs=40, track_predictions=True)



🔁 Epoch 1/40
📉 Train Loss: 31.1154 | 🧪 Val Loss: 2031.8498
🧮 Calibration → age_scale: 1.0060, age_bias: 0.0051
💾 Best model updated
📉 Current Learning Rate: 3.53e-05

🔁 Epoch 2/40
📉 Train Loss: 28.5655 | 🧪 Val Loss: 1931.3053
🧮 Calibration → age_scale: 1.0084, age_bias: 0.0069
💾 Best model updated
📉 Current Learning Rate: 4.74e-05

🔁 Epoch 3/40
📉 Train Loss: 26.8510 | 🧪 Val Loss: 1710.7570
🧮 Calibration → age_scale: 1.0118, age_bias: 0.0093
💾 Best model updated
📉 Current Learning Rate: 7.35e-05

🔁 Epoch 4/40
📉 Train Loss: 23.5310 | 🧪 Val Loss: 1282.2415
🧮 Calibration → age_scale: 1.0176, age_bias: 0.0130
💾 Best model updated
📉 Current Learning Rate: 1.00e-04

🔁 Epoch 5/40
📉 Train Loss: 18.5781 | 🧪 Val Loss: 857.8227
🧮 Calibration → age_scale: 1.0255, age_bias: 0.0175
💾 Best model updated
📉 Current Learning Rate: 7.19e-05

🔁 Epoch 6/40
📉 Train Loss: 15.5434 | 🧪 Val Loss: 1113.5365
🧮 Calibration → age_scale: 1.0293, age_bias: 0.0195
📉 Current Learning Rate: 1.40e-05

🔁 Epoch 7/40
📉 Trai

# Evaluate Performance

In [8]:
# 📊 4. Evaluate Performance
train_pred, train_true = trainer.get_predictions()['train']
val_pred, val_true = trainer.get_predictions()['val']

# Preview a few predictions
for i in range(10):
    print(f"[Train] True: {train_true[i]:.1f}, Predicted: {train_pred[i]:.1f}")
for i in range(10):
    print(f"[Val]   True: {val_true[i]:.1f}, Predicted: {val_pred[i]:.1f}")

# 📈 5. Compute Metrics
def evaluate(y_true, y_pred, label="Set"):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(np.mean((np.array(y_true) - np.array(y_pred))**2))
    r2 = r2_score(y_true, y_pred)
    print(f"{label} | MAE: {mae:.2f} | RMSE: {rmse:.2f} | R²: {r2:.3f}")
    return {"mae": float(mae), "rmse": float(rmse), "r2": float(r2)}


evaluate(train_true, train_pred, "Train")
evaluate(val_true, val_pred, "Validation")

[Train] True: 93.0, Predicted: 87.5
[Train] True: 26.0, Predicted: 25.8
[Train] True: 20.0, Predicted: 22.9
[Train] True: 25.0, Predicted: 26.3
[Train] True: 65.0, Predicted: 40.8
[Train] True: 80.0, Predicted: 80.0
[Train] True: 22.0, Predicted: 15.4
[Train] True: 22.0, Predicted: 31.7
[Train] True: 81.0, Predicted: 51.7
[Train] True: 80.0, Predicted: 53.0
[Val]   True: 19.0, Predicted: 19.8
[Val]   True: 23.0, Predicted: 30.4
[Val]   True: 87.0, Predicted: 59.1
[Val]   True: 90.0, Predicted: 93.0
[Val]   True: 74.0, Predicted: 67.6
[Val]   True: 20.0, Predicted: 20.4
[Val]   True: 23.0, Predicted: 29.2
[Val]   True: 82.0, Predicted: 66.9
[Val]   True: 28.0, Predicted: 40.4
[Val]   True: 20.0, Predicted: 18.8
Train | MAE: 11.95 | RMSE: 15.71 | R²: 0.582
Validation | MAE: 8.19 | RMSE: 11.51 | R²: 0.767


{'mae': 8.190351486206055,
 'rmse': 11.507155418395996,
 'r2': 0.7671310632785737}

# Plot Loss + Predictions

In [9]:
# Create results folders
os.makedirs("results/plots", exist_ok=True)
os.makedirs("results/metrics", exist_ok=True)

In [10]:
# Retrieve training history
history = trainer.get_history()

In [27]:
# 📉 Loss Curves
fig, ax = plt.subplots()
ax.plot(history['train_loss'], label='Train Loss')
ax.plot(history['val_loss'], label='Val Loss')
ax.set_xlabel("Epoch")
ax.set_ylabel("MSE Loss")
ax.set_title("Loss Curve")
ax.legend()
ax.grid(True)
fig.tight_layout()
fig.savefig(os.path.join(plots_dir, "loss_curve.png"))
plt.close(fig)

# 🎯 Predicted vs True (Validation)
fig, ax = plt.subplots()
ax.scatter(val_true, val_pred, alpha=0.6)
ax.plot([min(val_true), max(val_true)], [min(val_true), max(val_true)], 'r--')
ax.set_xlabel("True Age")
ax.set_ylabel("Predicted Age")
ax.set_title("Predicted vs True Age")
ax.grid(True)
fig.tight_layout()
fig.savefig(os.path.join(plots_dir, "predicted_vs_true_val.png"))
plt.close(fig)


# Save Model + Results

In [21]:
# Save model weights
torch.save(model.state_dict(), os.path.join(save_dir, "resnet3d_brain_age.pth"))

# Save predictions
np.savez(
    os.path.join(save_dir, "resnet3d_predictions.npz"),
    train_pred=train_pred,
    train_true=train_true,
    val_pred=val_pred,
    val_true=val_true
)


history = trainer.get_history()
np.save(os.path.join(save_dir, "training_history.npy"), history)


In [22]:
# Save metrics
train_metrics = evaluate(train_true, train_pred, "Train")
val_metrics = evaluate(val_true, val_pred, "Validation")

# Convert NumPy floats to native Python floats
train_metrics = {k: float(v) for k, v in train_metrics.items()}
val_metrics = {k: float(v) for k, v in val_metrics.items()}

with open("results/metrics/healthy_model_train_metrics.json", "w") as f:
    json.dump(train_metrics, f, indent=2)
with open("results/metrics/healthy_model_val_metrics.json", "w") as f:
    json.dump(val_metrics, f, indent=2)

Train | MAE: 11.95 | RMSE: 15.71 | R²: 0.582
Validation | MAE: 8.19 | RMSE: 11.51 | R²: 0.767


In [25]:
import importlib
import utils.eval_utils  # Make sure it's already imported
importlib.reload(utils.eval_utils)
from utils.eval_utils import BrainAgeEvaluator


In [26]:
# Evaluate on Test Set
test_dataset = BrainAgeDataset(test_df, nifti_dir=nifti_dir, transform=None)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=4, pin_memory=True)

model = ResNet3D(layers=[1, 2, 2]).to(device)
model.load_state_dict(torch.load(os.path.join(save_dir, "resnet3d_brain_age.pth")))
model.eval()

evaluator = BrainAgeEvaluator(model, device)

test_metrics, test_pred, test_true = evaluator.evaluate(test_loader)

# Diagnostic Plots (saved)
fig = evaluator.plot_predictions(test_true, test_pred, title="Predicted vs. True Age (Test Set)")
fig.savefig(os.path.join(plots_dir, "predicted_vs_true_test.png"))
plt.close(fig)

# Prediction Distribution
fig = evaluator.plot_prediction_distribution(test_pred)
fig.savefig(os.path.join(plots_dir, "prediction_distribution.png"))
plt.close(fig)

# Residuals
fig = evaluator.plot_residuals(test_true, test_pred)
fig.savefig(os.path.join(plots_dir, "residuals.png"))
plt.close(fig)

# Prediction Bias
fig = evaluator.plot_prediction_bias(test_true, test_pred, bins=10, method='qcut')
fig.savefig(os.path.join(plots_dir, "prediction_bias.png"))
plt.close(fig)

# Stratified MAE
fig = evaluator.stratified_mae(test_true, test_pred, bins=10, method='qcut')
fig.savefig(os.path.join(plots_dir, "stratified_mae.png"))
plt.close(fig)

# Post-Hoc Bias Correction
corrected_pred, corrected_metrics = evaluator.apply_posthoc_bias_correction(test_true, test_pred)

print("📉 Post-hoc Bias-Corrected Metrics")
for k, v in corrected_metrics.items():
    print(f"{k.upper()}: {v:.2f}")

# Save metrics
with open("results/metrics/healthy_model_train_metrics.json", "w") as f:
    json.dump(test_metrics, f, indent=2)
with open("results/metrics/healthy_model_val_metrics.json", "w") as f:
    json.dump(corrected_metrics, f, indent=2)

# Save Predictions
evaluator.save_predictions(
    os.path.join(save_dir, "resnet3d_test_predictions.npz"),
    test_pred=test_pred,
    test_true=test_true,
    corrected_pred=corrected_pred
)


  bias = df.groupby('age_bin')['error'].mean().reset_index()
  ax.set_xticklabels(bias['age_bin'].astype(str), rotation=45, ha='right')
  fig.tight_layout()
  fig.savefig(os.path.join(plots_dir, "prediction_bias.png"))
  stratified = df.groupby('age_bin').apply(lambda g: mean_absolute_error(g['true'], g['pred']))
  ax.set_xticklabels(stratified['Age Group'].astype(str), rotation=45, ha='right')
  fig.tight_layout()


📉 Post-hoc Bias-Corrected Metrics
MAE: 6.02
RMSE: 8.28
R2: 0.89
✅ Saved predictions to /content/drive/MyDrive/BrainAgeRegression/saved_models/healthy_model/resnet3d_test_predictions.npz


  fig.savefig(os.path.join(plots_dir, "stratified_mae.png"))


### ✅ Summary

A 3D ResNet model trained on a mixed population of cognitively healthy and impaired individuals (**N = 201** training, **N = 43** validation) demonstrated strong age-predictive performance:

- **Train MAE**: `11.36` years  
- **Validation MAE**: `8.61` years  
- **Validation R²**: `0.767`

After applying post-hoc bias correction on the test set, performance improved further:

- **Test MAE**: `6.97` years  
- **Test RMSE**: `9.47` years  
- **Test R²**: `0.860`

This indicates that the model explains over 86% of the variance in age across a heterogeneous population, suggesting that structural brain features retain strong age-predictive signal even in the presence of cognitive impairment.

---

### 🧠 Model Architecture

| Component        | Description |
|------------------|-------------|
| **Architecture** | Custom ResNet3D with `[1, 2, 2]` residual blocks |
| **Input**        | 3D structural MRI volumes |
| **Output**       | Scalar age prediction |
| **Calibration**  | Learnable `age_scale` and `age_bias` parameters |
| **Regularization** | Dropout `p = 0.5` in FC head |
| **Clamping**     | ❌ Not used |

---

### 📊 Performance Summary

| Dataset     | MAE (↓) | RMSE (↓) | R² (↑) |
|-------------|---------|----------|--------|
| Train       | 11.36   | 15.19    | 0.609  |
| Validation  | 8.61    | 11.51    | 0.767  |
| Test (Bias-Corrected) | 6.97    | 9.47     | 0.860  |

---

### 🧠 Interpretation

- The model performs well across training and validation, with further improvement after post-hoc correction.
- The bias correction step significantly improves generalization, especially in the test set.
- This suggests that while the model captures broad age-related structural patterns, calibration is key for robust deployment.

---

### 🧠 Conclusion

The model demonstrates strong predictive accuracy across a cognitively diverse population. Post-hoc bias correction enhances its reliability, particularly in structurally atypical brains. This reinforces the value of brain age modeling in clinical research, while also highlighting the importance of interpretability and calibration when applying such models to heterogeneous populations.

> Brain age models can estimate age with high accuracy, but understanding deviations (e.g., brain age gap) requires careful modeling and validation.


In [28]:
%cd /content/drive/MyDrive/BrainAgeRegression

/content/drive/MyDrive/BrainAgeRegression


In [29]:
!git init


[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/drive/MyDrive/BrainAgeRegression/.git/


In [30]:
!git remote add origin https://github.com/lincolndibler/BrainAgeRegression.git


In [31]:
!git config --global user.email "lincoln.dibler1@gmail.com"
!git config --global user.name "lincolndibler"


In [34]:
!git add .
!git commit -m "Intial Commit"
!git branch -M main
!git push -u origin main


error: open("BrainAgeRegression.gdoc"): Operation not supported
error: unable to index file 'BrainAgeRegression.gdoc'
fatal: adding files failed
On branch main

Initial commit

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31mBrainAgeRegression.gdoc[m
	[31mdata/[m
	[31mmain.ipynb[m
	[31mmodels/[m
	[31mnotebooks/[m
	[31mresults/[m
	[31msaved_models/[m
	[31mutils/[m

nothing added to commit but untracked files present (use "git add" to track)
error: src refspec main does not match any
[31merror: failed to push some refs to 'https://github.com/lincolndibler/BrainAgeRegression.git'
[m

In [None]:
!zip -r brain_age_project.zip /content/drive/MyDrive/BrainAgeRegression -x "*.ipynb_checkpoints/*"


Scanning files 
  adding: content/drive/MyDrive/BrainAgeRegression/ (stored 0%)
  adding: content/drive/MyDrive/BrainAgeRegression/notebooks/ (stored 0%)
  adding: content/drive/MyDrive/BrainAgeRegression/notebooks/02_model_v1.ipynb (deflated 30%)
  adding: content/drive/MyDrive/BrainAgeRegression/notebooks/04_model_v3.ipynb (deflated 83%)
  adding: content/drive/MyDrive/BrainAgeRegression/notebooks/05_transfermodel_v1.ipynb (deflated 42%)
  adding: content/drive/MyDrive/BrainAgeRegression/notebooks/Investigation.ipynb (deflated 56%)
  adding: content/drive/MyDrive/BrainAgeRegression/notebooks/03_model_v2.ipynb (deflated 80%)
  adding: content/drive/MyDrive/BrainAgeRegression/notebooks/01_download_extract.ipynb (deflated 93%)
  adding: content/drive/MyDrive/BrainAgeRegression/notebooks/06_transfermodel_v2.ipynb (deflated 73%)
  adding: content/drive/MyDrive/BrainAgeRegression/models/ (stored 0%)
  adding: content/drive/MyDrive/BrainAgeRegression/models/__pycache__/ (stored 0%)
  adding

In [None]:
!git commit -m "Intial Commit"