# ICU Risk Model Training Notebook

This notebook documents the training process for the `RandomForestRegressor` model used in the AI Hospital ICU Monitoring System.

The production model is saved via `train_model.py`. This notebook is for exploration and documentation.

In [None]:
import sys, os
sys.path.insert(0, os.path.join(os.getcwd(), '..'))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
import joblib

from utils.simulator import generate_patient_dataset, FEATURE_COLUMNS

In [None]:
df = generate_patient_dataset(n_patients=500, seed=42)
print(df.shape)
df.head()

In [None]:
df.describe()

In [None]:
X = df[FEATURE_COLUMNS].values
y = df['risk_score'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=200, max_depth=8, min_samples_leaf=4, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(f'R² Score (test): {r2_score(y_test, y_pred):.4f}')
print(f'MAE (test): {mean_absolute_error(y_test, y_pred):.4f}')

In [None]:
cv_scores = cross_val_score(model, X, y, cv=5, scoring='r2')
print('CV R² scores:', cv_scores.round(4))
print(f'Mean: {cv_scores.mean():.4f}  Std: {cv_scores.std():.4f}')

In [None]:
importances = pd.Series(model.feature_importances_, index=FEATURE_COLUMNS).sort_values(ascending=False)
importances.plot(kind='bar', figsize=(8, 4), color='steelblue')
plt.title('Feature Importances')
plt.ylabel('Importance')
plt.tight_layout()
plt.show()

In [None]:
# Save model
os.makedirs('../models', exist_ok=True)
joblib.dump(model, '../models/icu_risk_model.pkl')
print('Model saved to ../models/icu_risk_model.pkl')