# Credit Risk Prediction Walkthrough

End-to-end classic ML on a small credit-risk dataset using scikit-learn, Optuna, and SHAP.

## 1️⃣ Load data

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("../data/credit_risk.csv")
df.head()


## 2️⃣ Exploratory Data Analysis

In [None]:

sns.pairplot(df, hue="default")
plt.show()

plt.figure(figsize=(6,4))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap="coolwarm")
plt.title("Feature correlations")
plt.show()


## 3️⃣ Train a simple pipeline

In [None]:

from src.features import build_preprocessor
from src.models import model_candidates
from sklearn.pipeline import Pipeline

X = df.drop(columns=["default"])
y = df["default"]

pre = build_preprocessor(X)
model = model_candidates()  # default RF candidate
pipe = Pipeline([("pre", pre), ("model", model)])
pipe.fit(X, y)

pipe


## 4️⃣ Evaluation

In [None]:

from sklearn.metrics import classification_report, roc_curve, auc

y_pred = pipe.predict(X)
print(classification_report(y, y_pred))

if hasattr(pipe.named_steps["model"], "predict_proba"):
    y_proba = pipe.predict_proba(X)[:,1]
    fpr, tpr, _ = roc_curve(y, y_proba)
    plt.figure(figsize=(5,4))
    plt.plot(fpr, tpr, label=f"AUC={auc(fpr,tpr):.2f}")
    plt.plot([0,1],[0,1],'--', alpha=0.5)
    plt.xlabel("FPR"); plt.ylabel("TPR"); plt.title("ROC Curve"); plt.legend()
    plt.savefig("../reports/roc_curve.png", bbox_inches="tight")
    plt.show()
else:
    print("Model has no predict_proba; skipping ROC.")


## 5️⃣ Explainability with SHAP

In [None]:

import shap, numpy as np

# Try tree SHAP when possible, fallback to KernelExplainer
try:
    feature_names = pipe.named_steps["pre"].get_feature_names_out()
    X_trans = pipe.named_steps["pre"].transform(X)
    model = pipe.named_steps["model"]
    try:
        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(X_trans)
        shap.summary_plot(shap_values[1] if isinstance(shap_values, list) else shap_values, X_trans, feature_names=feature_names, show=False)
    except Exception:
        explainer = shap.KernelExplainer(model.predict_proba, X_trans[:20])
        shap_values = explainer.shap_values(X_trans[:5])
        shap.summary_plot(shap_values[1], X_trans[:5], feature_names=feature_names, show=False)
    plt.savefig("../reports/shap_summary.png", bbox_inches="tight")
    plt.show()
except Exception as e:
    print("SHAP skipped:", e)


## 6️⃣ Save model

In [None]:

from joblib import dump
import os
os.makedirs("../models", exist_ok=True)
dump(pipe, "../models/credit_risk.joblib")
print("Saved to ../models/credit_risk.joblib")
