# 02 — Modeling

> ⚠️ **DISCLAIMER**: This is an educational prototype. Output is NOT medical advice.

In [None]:
import sys
sys.path.insert(0, '../src')

from diabetes_explainer.synth_data import generate
from diabetes_explainer import data_schema
from diabetes_explainer.features import build_features, FEATURE_DESCRIPTIONS

df = generate(n_days=14, seed=42)
df = data_schema.validate(df)
X, y = build_features(df)
print(f'Features: {X.shape[1]}, Samples: {X.shape[0]}')
X.head()

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, root_mean_squared_error

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

model = GradientBoostingRegressor(
    n_estimators=200, max_depth=4, learning_rate=0.05, subsample=0.8, random_state=42
)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = root_mean_squared_error(y_test, y_pred)
print(f'Test MAE:  {mae:.2f} mg/dL')
print(f'Test RMSE: {rmse:.2f} mg/dL')

In [None]:
import matplotlib.pyplot as plt
import numpy as np

importances = dict(zip(X.columns, model.feature_importances_))
sorted_items = sorted(importances.items(), key=lambda x: x[1], reverse=True)[:10]
feat_labels = [FEATURE_DESCRIPTIONS.get(k, k) for k, _ in sorted_items]
feat_vals = [v for _, v in sorted_items]

fig, ax = plt.subplots(figsize=(9, 5))
ax.barh(feat_labels[::-1], feat_vals[::-1], color='steelblue')
ax.set_xlabel('Feature Importance')
ax.set_title('Top 10 Feature Importances\n(Synthetic data — educational only)')
ax.tick_params(axis='y', labelsize=9)
plt.tight_layout()
plt.show()

In [None]:
n_plot = 200
fig, ax = plt.subplots(figsize=(12, 4))
ax.plot(y_test.values[:n_plot], label='Actual', alpha=0.8)
ax.plot(y_pred[:n_plot], label='Predicted (30 min ahead)', alpha=0.8)
ax.set_xlabel('Time steps (5 min each)')
ax.set_ylabel('Glucose (mg/dL)')
ax.set_title('Predicted vs Actual Glucose (test set, first 200 points)\n(Synthetic data — educational only)')
ax.legend()
plt.tight_layout()
plt.show()