# Temperature Scaling and Selective Conformal Classification (Binary)
We train a classifier, show calibration, apply **temperature scaling**, and then build a **selective classifier** with a conformal-style risk control (abstention) that guarantees validation error ≤ α among auto-decisions.

In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_auc_score, brier_score_loss
from sklearn.calibration import calibration_curve
from src.data.loaders import load_classification_breast_cancer
from src.features.pipelines import build_leakage_safe_preprocessor
from src.models.gbm import lgbm_classifier
from src.calibration.temperature import TemperatureScaler
from src.calibration.conformal import selective_threshold, selective_predict

plt.rcParams['figure.figsize']=(6,4)
ALPHA = 0.1


In [None]:
X, y = load_classification_breast_cancer()
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
X_tr, X_va, y_tr, y_va = train_test_split(X_tr, y_tr, test_size=0.3, stratify=y_tr, random_state=42)
pre = build_leakage_safe_preprocessor(X_tr)
gbm = lgbm_classifier()
pipe = Pipeline([('prep', pre), ('model', gbm)])
pipe.fit(X_tr, y_tr)
p_va = pipe.predict_proba(X_va)[:,1]
p_te = pipe.predict_proba(X_te)[:,1]
print('Valid AUC:', roc_auc_score(y_va, p_va), '| Brier:', brier_score_loss(y_va, p_va))
print('Test  AUC:', roc_auc_score(y_te, p_te), '| Brier:', brier_score_loss(y_te, p_te))


In [None]:
# Reliability before temperature scaling
prob_true, prob_pred = calibration_curve(y_va, p_va, n_bins=10, strategy='quantile')
plt.plot(prob_pred, prob_true, marker='o'); plt.plot([0,1],[0,1],'--')
plt.title('Calibration (valid) before temp scaling'); plt.xlabel('Predicted'); plt.ylabel('Fraction positive'); plt.show()


In [None]:
# Temperature scaling on validation
ts = TemperatureScaler().fit(y_va, p_va)
p_va_cal = ts.transform(p_va)
p_te_cal = ts.transform(p_te)
print('Temp T:', ts.T_)
print('Valid (cal): Brier', brier_score_loss(y_va, p_va_cal))
print(' Test (cal): Brier', brier_score_loss(y_te, p_te_cal))
prob_true, prob_pred = calibration_curve(y_va, p_va_cal, n_bins=10, strategy='quantile')
plt.plot(prob_pred, prob_true, marker='o'); plt.plot([0,1],[0,1],'--')
plt.title('Calibration (valid) after temp scaling'); plt.xlabel('Predicted'); plt.ylabel('Fraction positive'); plt.show()


In [None]:
# Selective conformal threshold on validation (risk control)
tau, summary = selective_threshold(y_va, p_va_cal, alpha=ALPHA)
print('Selective threshold tau:', tau, '| summary:', summary)
pred_te, abstain = selective_predict(p_te_cal, tau)
auto_mask = (pred_te != -1)
err_rate = np.mean(pred_te[auto_mask] != y_te[auto_mask]) if auto_mask.any() else 0.0
coverage = np.mean(auto_mask)
print(f'Auto-decision coverage on test: {coverage:.3f}; empirical error among auto decisions: {err_rate:.3f} (target <= {ALPHA})')
