# 4.4 Train and Evaluate Boosted Models - Code Brief

Condensed reference for training, evaluation, and SHAP interpretation.

## Setup

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, cross_validate
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, roc_curve, precision_recall_curve, confusion_matrix, brier_score_loss
)
from sklearn.calibration import calibration_curve

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
import lightgbm as lgb
import shap

## Early Stopping - XGBoost

In [None]:
xgb_model = XGBClassifier(
    n_estimators=500,
    max_depth=6,
    learning_rate=0.1,
    random_state=42,
    eval_metric='logloss',
    early_stopping_rounds=20,
    use_label_encoder=False
)

xgb_model.fit(
    X_train, y_train,
    eval_set=[(X_train, y_train), (X_val, y_val)],
    verbose=False
)

print(f"Best iteration: {xgb_model.best_iteration}")

## Early Stopping - LightGBM

In [None]:
lgb_model = LGBMClassifier(
    n_estimators=500,
    num_leaves=31,
    learning_rate=0.1,
    random_state=42,
    verbose=-1
)

lgb_model.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    eval_metric='logloss',
    callbacks=[
        lgb.early_stopping(stopping_rounds=20, verbose=False),
        lgb.log_evaluation(period=0)
    ]
)

print(f"Best iteration: {lgb_model.best_iteration_}")

## Early Stopping - CatBoost

In [None]:
cat_model = CatBoostClassifier(
    iterations=500,
    depth=6,
    learning_rate=0.1,
    cat_features=categorical_cols,
    early_stopping_rounds=20,
    random_state=42,
    verbose=0
)

cat_model.fit(X_train, y_train, eval_set=(X_val, y_val), use_best_model=True)

print(f"Best iteration: {cat_model.best_iteration_}")

## Cross-Validation with Early Stopping

In [None]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

cv_scores = cross_validate(
    CatBoostClassifier(iterations=200, depth=6, cat_features=categorical_cols, verbose=0),
    X, y, cv=cv,
    scoring=['accuracy', 'roc_auc', 'f1', 'precision', 'recall'],
    return_train_score=True
)

print(f"Mean ROC-AUC: {cv_scores['test_roc_auc'].mean():.4f}")

## Comprehensive Evaluation

In [None]:
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]

# Metrics
print(f"Accuracy:  {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred):.4f}")
print(f"Recall:    {recall_score(y_test, y_pred):.4f}")
print(f"F1 Score:  {f1_score(y_test, y_pred):.4f}")
print(f"ROC-AUC:   {roc_auc_score(y_test, y_pred_proba):.4f}")

# ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)

# Precision-Recall Curve
precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)

# Calibration
prob_true, prob_pred = calibration_curve(y_test, y_pred_proba, n_bins=10)
brier = brier_score_loss(y_test, y_pred_proba)

## SHAP Interpretation

In [None]:
# Create SHAP explainer
explainer = shap.TreeExplainer(model)

# Calculate SHAP values
shap_values = explainer.shap_values(X_test)

# Global feature importance
mean_abs_shap = np.abs(shap_values).mean(axis=0)

# Individual prediction explanation
individual_shap = explainer.shap_values(X_test.iloc[[0]])[0]
base_value = explainer.expected_value

## Key Concepts

| Concept | Description |
|:--------|:------------|
| Early Stopping | Stop when validation performance stops improving |
| Stratified CV | Preserve class distribution in each fold |
| Brier Score | Calibration metric (lower is better) |
| SHAP Values | Feature contribution to individual predictions |