# 50 – Model Evaluation & Explainability

This notebook demonstrates:

- Comparing baseline vs advanced models.
- Visualizing ROC curves.
- Using SHAP (if installed) for local & global explanations.

For speed in a live talk, you may pre-run this notebook and show the plots.

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score

base_dir = "/mnt/data/cardiff_ai_talk_runbook"

# For simplicity, reload data and re-train a single model for each use case.
wf_path = os.path.join(base_dir, "data", "raw", "wildfire_synthetic.csv")
mh_path = os.path.join(base_dir, "data", "raw", "mental_health_mobile_sensing_synthetic.csv")

wf_df = pd.read_csv(wf_path)
mh_df = pd.read_csv(mh_path)

In [None]:
# Wildfire model evaluation (Random Forest)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

wf_features = [
    "temp_c",
    "humidity",
    "wind_speed",
    "rain_mm_last_7d",
    "vegetation_index",
    "population_density",
    "month",
    "year",
]

X_wf = wf_df[wf_features]
y_wf = wf_df["fire_occurred"]

X_wf_train, X_wf_test, y_wf_train, y_wf_test = train_test_split(
    X_wf, y_wf, test_size=0.25, random_state=42, stratify=y_wf
)

rf_wf = RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1)
rf_wf.fit(X_wf_train, y_wf_train)

y_wf_prob = rf_wf.predict_proba(X_wf_test)[:, 1]
fpr_wf, tpr_wf, _ = roc_curve(y_wf_test, y_wf_prob)
auc_wf = roc_auc_score(y_wf_test, y_wf_prob)

print("Wildfire ROC-AUC:", auc_wf)

In [None]:
# Mental health model evaluation (Random Forest classifier)
from sklearn.ensemble import RandomForestClassifier

mh_features = [
    "avg_daily_steps",
    "avg_daily_distance_km",
    "time_at_home_hours",
    "num_unique_locations",
    "calls_per_day",
    "texts_per_day",
    "avg_sleep_duration_hours",
    "sleep_onset_variability_hours",
    "screen_time_hours",
]

X_mh = mh_df[mh_features]
y_mh = mh_df["high_risk"]

X_mh_train, X_mh_test, y_mh_train, y_mh_test = train_test_split(
    X_mh, y_mh, test_size=0.25, random_state=42, stratify=y_mh
)

rf_mh = RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1)
rf_mh.fit(X_mh_train, y_mh_train)

y_mh_prob = rf_mh.predict_proba(X_mh_test)[:, 1]
fpr_mh, tpr_mh, _ = roc_curve(y_mh_test, y_mh_prob)
auc_mh = roc_auc_score(y_mh_test, y_mh_prob)

print("Mental health ROC-AUC:", auc_mh)

In [None]:
# Plot ROC curves for both use cases
plt.plot(fpr_wf, tpr_wf, label=f"Wildfire (AUC={auc_wf:.2f})")
plt.plot(fpr_mh, tpr_mh, label=f"Mental Health (AUC={auc_mh:.2f})")
plt.plot([0, 1], [0, 1], linestyle="--", label="Random chance")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curves – Wildfire vs Mental Health Models")
plt.legend()
plt.show()

In [None]:
# Optional: SHAP explainability (if available)
try:
    import shap

    # Use a small sample for speed
    explainer_wf = shap.TreeExplainer(rf_wf)
    shap_values_wf = explainer_wf.shap_values(X_wf_test.sample(200, random_state=42))

    shap.summary_plot(shap_values_wf[1], X_wf_test.sample(200, random_state=42))
except ImportError:
    print("SHAP not installed. Run `pip install shap` to enable explainability plots.")