ML TRAINING + TUNING + MODEL SELECTION

In [None]:
# =====================================================
# ML IMPLEMENTATION – 5 MODELS + PERFORMANCE COMPARISON
# =====================================================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import LeaveOneOut, cross_val_predict
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    ExtraTreesRegressor
)

# -----------------------------------------------------
# 1. ML DATASET
# -----------------------------------------------------
ml_features = [
    "op_demand",
    "total_beds",
    "icu_beds",
    "ventilators_invasive",
    "ventilators_non_invasive",
    "doctor_load",
    "nurse_load",
    "ambulance_risk",
    "ventilator_risk",
    "demand_risk_index"
]

target = "ip_demand"

df_ml = df_features.copy()

X = df_ml[ml_features]
y = df_ml[target]

print("Number of samples:", X.shape[0])

# -----------------------------------------------------
# 2. DEFINE MODELS (5 TOTAL)
# -----------------------------------------------------
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(alpha=1.0),
    "Random Forest": RandomForestRegressor(
        n_estimators=200,
        max_depth=5,
        random_state=42
    ),
    "Gradient Boosting": GradientBoostingRegressor(
        n_estimators=200,
        learning_rate=0.05,
        max_depth=3,
        random_state=42
    ),
    "Extra Trees": ExtraTreesRegressor(
        n_estimators=200,
        max_depth=5,
        random_state=42
    )
}

# -----------------------------------------------------
# 3. LOOCV EVALUATION
# -----------------------------------------------------
loo = LeaveOneOut()
results = []

for name, model in models.items():

    pipeline = Pipeline([
        ("scaler", StandardScaler()),
        ("model", model)
    ])

    y_pred = cross_val_predict(pipeline, X, y, cv=loo)

    results.append({
        "Model": name,
        "MAE": mean_absolute_error(y, y_pred),
        "RMSE": np.sqrt(mean_squared_error(y, y_pred)),
        "R2": r2_score(y, y_pred)
    })

results_df = (
    pd.DataFrame(results)
    .sort_values("RMSE")
    .reset_index(drop=True)
)

print("\nMODEL PERFORMANCE SUMMARY")
results_df


In [None]:
# =====================================================
# PERFORMANCE COMPARISON PLOT
# =====================================================

plt.figure(figsize=(10,5))

plt.bar(
    results_df["Model"],
    results_df["RMSE"]
)

plt.ylabel("RMSE (Lower is Better)")
plt.xlabel("ML Model")
plt.title("ML Model Performance Comparison (LOOCV)")
plt.xticks(rotation=30)
plt.grid(axis="y", linestyle="--", alpha=0.5)
plt.tight_layout()
plt.show()


In [None]:
# =====================================================
# RIDGE COEFFICIENT INTERPRETATION
# =====================================================

import pandas as pd

ridge_model = best_pipeline.named_steps["model"]

coef_df = pd.DataFrame({
    "Feature": ml_features,
    "Coefficient": ridge_model.coef_
}).sort_values("Coefficient", ascending=False)

coef_df


In [None]:
# =====================================================
# BLOCK 1: PREPARE ML-BASED STRESS DATA
# =====================================================

df_ml_stress = df_features.copy()

# Add the predicted ip_demand from the ML model to the dataframe
df_ml_stress["ip_demand_pred_ml"] = y_pred

# Use ML-predicted demand as base
df_ml_stress["bed_demand_base_ml"] = df_ml_stress["ip_demand_pred_ml"]
df_ml_stress["icu_demand_base_ml"] = df_ml_stress["ip_demand_pred_ml"] * 0.15
df_ml_stress["vent_demand_base_ml"] = df_ml_stress["ip_demand_pred_ml"] * 0.05

# Capacities
df_ml_stress["bed_capacity"] = df_ml_stress["total_beds"]
df_ml_stress["icu_capacity"] = df_ml_stress["icu_beds"]
df_ml_stress["vent_capacity"] = (
    df_ml_stress["ventilators_invasive"] +
    df_ml_stress["ventilators_non_invasive"]
)

In [None]:
# =====================================================
# BLOCK 2: ML-BASED SURGE STRESS TEST
# =====================================================

SURGE_SCENARIOS = {
    "Baseline (1.0x)": 1.0,
    "Mild Surge (1.2x)": 1.2,
    "Moderate Surge (1.4x)": 1.4,
    "Severe Surge (1.6x)": 1.6,
    "Extreme Surge (1.8x)": 1.8,
    "Collapse Zone (2.0x)": 2.0
}

ml_results = []

for scenario, factor in SURGE_SCENARIOS.items():

    bed_demand = df_ml_stress["bed_demand_base_ml"] * factor
    icu_demand = df_ml_stress["icu_demand_base_ml"] * factor
    vent_demand = df_ml_stress["vent_demand_base_ml"] * factor

    bed_unmet = np.maximum(bed_demand - df_ml_stress["bed_capacity"], 0)
    icu_unmet = np.maximum(icu_demand - df_ml_stress["icu_capacity"], 0)
    vent_unmet = np.maximum(vent_demand - df_ml_stress["vent_capacity"], 0)

    ml_results.append({
        "Scenario": scenario,
        "Surge_Factor": factor,
        "Total_Bed_Unmet_ML": bed_unmet.sum(),
        "Total_ICU_Unmet_ML": icu_unmet.sum(),
        "Total_Vent_Unmet_ML": vent_unmet.sum(),
        "High_Risk_Districts_ML": (bed_unmet > 0).sum()
    })

stress_ml_df = pd.DataFrame(ml_results)

stress_ml_df


In [None]:
# =====================================================
# BLOCK 3: COMPARISON TABLE (ACTUAL vs ML)
# =====================================================

comparison_df = stress_df.merge(
    stress_ml_df,
    on=["Scenario", "Surge_Factor"]
)

comparison_df


In [None]:
# =====================================================
# BLOCK 4: FAILURE CURVE – ACTUAL vs ML
# =====================================================

plt.figure(figsize=(10,5))

plt.plot(
    stress_df["Surge_Factor"],
    stress_df["Total_Bed_Unmet"],
    marker="o",
    label="Beds (Actual Demand)"
)

plt.plot(
    stress_ml_df["Surge_Factor"],
    stress_ml_df["Total_Bed_Unmet_ML"],
    marker="s",
    linestyle="--",
    label="Beds (ML-Predicted Demand)"
)

plt.xlabel("Surge Multiplier")
plt.ylabel("Total Unmet Bed Demand")
plt.title("Actual vs ML-Based Healthcare Stress Test (Beds)")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.5)
plt.tight_layout()
plt.show()
