In [1]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# two levels up from the notebook to project root, then append to sys.path
sys.path.append(str(Path().resolve().parents[1]))

PROJECT_ROOT = Path().resolve().parents[1]
RESULTS_PATH = PROJECT_ROOT / "results"


from utils.data_loading import load_datasets
from utils.data_preparation import get_spo2_to_po2_interpolator, add_shift_raw_column, add_engineered_features
from utils.evaluation import evaluate_macro_patient_level, print_evaluation, bland_altman_plots, bland_altman_pct_comparison
from utils.modeling import fit_cv_models, predict_cv_ensemble, average_linear_coeffs
from utils.logging import log_run_json


train_df, test_df, val_df, odc = load_datasets()
spo2_to_po2 = get_spo2_to_po2_interpolator(odc)




train_df_engineered = add_engineered_features(train_df, spo2_to_po2)
test_df_engineered = add_engineered_features(test_df, spo2_to_po2)
val_df_engineered = add_engineered_features(val_df, spo2_to_po2)

features = ['shift_raw', 'PiO2(kPa)', 'SpO2(%)', 'Hb', 'log_PiO2', 'log_SpO2', 'SpO2_over_PiO2', 
                 'SpO2_squared', 'Hb_SpO2', 'saturation_deficit', 'CaO2_estimate']



In [2]:
from xgboost import XGBRegressor

features = ['PiO2(kPa)', 'SpO2(%)']

cv_models = fit_cv_models(
    train_df,
    features=features,
    target_col='shift',
    model_class=XGBRegressor,
    k=10,
    model_kwargs={'n_estimators': 100, 'max_depth': 3, 'random_state': 42}
)


test_df['y_pred'] = predict_cv_ensemble(test_df, features, cv_models)

# Evaluate macro patient-level metrics
test_summary = evaluate_macro_patient_level(test_df, y_true_col='shift', y_pred_col='y_pred')
print_evaluation(test_summary)


Macro-averaged per-patient metrics:
MAE  = 0.922
MSE  = 2.331
RMSE = 1.122
Mean Bias Error = 0.039
MAPE = 8.718%
nRMSE = 5.967%


In [3]:
val_df['y_pred'] = predict_cv_ensemble(val_df, features, cv_models)

val_summary = evaluate_macro_patient_level(val_df, y_true_col='shift', y_pred_col='y_pred')
print_evaluation(val_summary)

Macro-averaged per-patient metrics:
MAE  = 0.978
MSE  = 2.656
RMSE = 1.189
Mean Bias Error = -0.058
MAPE = 7.894%
nRMSE = 6.098%


In [4]:
description = f'''
Model: Baseline XGBoost Regressor
Description: This is a model that is trained on the two base features and on the full train dataset and evaluated on the full test/val sets.
Features: {', '.join(features)}
Target: shift
Notes: patient-level macro metrics, ODC from neonatal table
'''

# Filter only scalar (JSON-serializable) entries
json_test_metrics = {
    k: float(v) if isinstance(v, (np.generic, np.float64, np.float32)) else v
    for k, v in test_summary.items()
    if not isinstance(v, pd.Series)
}
json_val_metrics = {
    k: float(v) if isinstance(v, (np.generic, np.float64, np.float32)) else v
    for k, v in val_summary.items()
    if not isinstance(v, pd.Series)
}

json_path = RESULTS_PATH / "single_point_model_metrics_log.json"

log_run_json(
    identifier="Baseline XGBoost Regressor",
    model_type="XGBoost",
    features=features,
    train_subset="full train set",
    test_subset="full test set",
    val_subset="full validation set",  
    description=description,
    test_metrics=json_test_metrics,
    val_metrics=json_val_metrics,

    json_path=json_path
)


✅ Logged run #5 ➜ /Users/sarah/Code/neonatal-odc-shift/results/single_point_model_metrics_log.json


In [6]:
features = ['shift_raw', 'log_PiO2',  'SpO2_over_PiO2','SpO2_squared']

#  Filter subsets where SpO₂ < 92.5%
train_sub = train_df_engineered[train_df_engineered['SpO2(%)'] < 92.5]
test_sub = test_df_engineered[test_df_engineered['SpO2(%)'] < 92.5]
val_sub = val_df_engineered[val_df_engineered['SpO2(%)'] < 92.5]

cv_models = fit_cv_models(
    train_df,
    features=features,
    target_col='shift',
    model_class=XGBRegressor,
    k=10,
    model_kwargs={'n_estimators': 100, 'max_depth': 3, 'random_state': 42}
)


# Predict using ensemble average
test_sub = test_sub.copy()
val_sub = val_sub.copy()

test_sub['y_pred'] = predict_cv_ensemble(test_sub, features, cv_models)
val_sub['y_pred'] = predict_cv_ensemble(val_sub, features, cv_models)

# Evaluate using patient-level macro averaging
test_summary = evaluate_macro_patient_level(test_sub, y_true_col='shift', y_pred_col='y_pred')
val_summary = evaluate_macro_patient_level(val_sub, y_true_col='shift', y_pred_col='y_pred')

# Print evaluation
print("Test set evaluation:")
print_evaluation(test_summary)

print("Validation set evaluation:")
print_evaluation(val_summary)

Test set evaluation:
Macro-averaged per-patient metrics:
MAE  = 0.636
MSE  = 1.290
RMSE = 0.728
Mean Bias Error = 0.103
MAPE = 6.034%
nRMSE = 4.246%
Validation set evaluation:
Macro-averaged per-patient metrics:
MAE  = 0.560
MSE  = 0.838
RMSE = 0.594
Mean Bias Error = -0.074
MAPE = 4.380%
nRMSE = 3.044%


In [7]:
description = f'''
Model: XGBoost Regressor Subset
Description: This is a model that is trained on all engineered features on the subset of patients with SpO₂ < 92.5%.
Features: {', '.join(features)}
Target: shift
Notes: patient-level macro metrics, ODC from neonatal table
'''

# Filter only scalar (JSON-serializable) entries
json_test_metrics = {
    k: float(v) if isinstance(v, (np.generic, np.float64, np.float32)) else v
    for k, v in test_summary.items()
    if not isinstance(v, pd.Series)
}
json_val_metrics = {
    k: float(v) if isinstance(v, (np.generic, np.float64, np.float32)) else v
    for k, v in val_summary.items()
    if not isinstance(v, pd.Series)
}

json_path = RESULTS_PATH / "single_point_model_metrics_log.json"

log_run_json(
    identifier="XBoost Regressor Subset",
    model_type="XGBoost",
    features=features,
    train_subset="full train set",
    test_subset="full test set",
    val_subset="full validation set",  
    description=description,
    test_metrics=json_test_metrics,
    val_metrics=json_val_metrics,

    json_path=json_path
)


✅ Logged run #6 ➜ /Users/sarah/Code/neonatal-odc-shift/results/single_point_model_metrics_log.json
