# Split Conformal Prediction for Regression
We fit a model on a training set, use a **calibration (validation) set** to estimate residual quantiles,
and form **(1 - α)** prediction intervals with guaranteed marginal coverage under exchangeability.

In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from src.data.loaders import load_regression_synthetic
from src.features.pipelines import build_leakage_safe_preprocessor
from src.models.gbm import lgbm_regressor
from src.evaluation.metrics import regression_metrics

plt.rcParams['figure.figsize']=(6,4)
ALPHA = 0.1  # 90% intervals


In [None]:
X, y = load_regression_synthetic(n_samples=1500)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_cal, X_test, y_cal, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
pre = build_leakage_safe_preprocessor(X_train)
model = lgbm_regressor()
pipe = Pipeline([('prep', pre), ('model', model)])
pipe.fit(X_train, y_train)
y_cal_pred = pipe.predict(X_cal)
cal_resid = np.abs(y_cal - y_cal_pred)
q = np.quantile(cal_resid, 1-ALPHA)
print('Calibrated residual quantile q:', q)


In [None]:
y_test_pred = pipe.predict(X_test)
lower = y_test_pred - q
upper = y_test_pred + q
coverage = np.mean((y_test >= lower) & (y_test <= upper))
print(f'Empirical coverage: {coverage:.3f} (target {(1-ALPHA):.2f})')


In [None]:
plt.scatter(y_test_pred, y_test, s=8, alpha=0.6)
plt.plot([y_test_pred.min(), y_test_pred.max()], [y_test_pred.min(), y_test_pred.max()], linestyle='--')
plt.fill_between(np.sort(y_test_pred), np.sort(y_test_pred-q), np.sort(y_test_pred+q), alpha=0.2)
plt.xlabel('Prediction')
plt.ylabel('True')
plt.title('Conformal Prediction Intervals (split)')
plt.tight_layout(); plt.show()
