# Title: Statistical Validation and Confidence Intervals
# Description: Performs bootstrapped AUC confidence intervals and produces thesis tables.

In [None]:
import numpy as np
import pandas as pd
from src.evaluate import bootstrap_ci
from sklearn.metrics import roc_auc_score

# Load arrays (assuming you saved them earlier)
import json
with open("experiments/results/test_metrics.json") as f:
    metrics = json.load(f)

print("Base AUC:", metrics["auc"])

In [None]:
# Example: compute bootstrap CI from saved predictions (y_true.npy, y_prob.npy)
y_true = np.load("experiments/results/y_true.npy")
y_prob = np.load("experiments/results/y_prob.npy")

lo, hi = bootstrap_ci(roc_auc_score, y_true, y_prob, n_boot=1000)
print(f"95% CI for AUC: [{lo:.3f}, {hi:.3f}]")

# Save results table for thesis inclusion
summary = pd.DataFrame(
    {
        "Metric": ["AUC", "F1", "Recall", "Precision", "Brier"],
        "Score": [
            metrics["auc"],
            metrics["f1"],
            metrics["recall"],
            metrics["precision"],
            metrics["brier"],
        ],
    }
)

summary.to_csv("experiments/results/statistical_summary.csv", index=False)
summary
