# 🤖 Loan Default Prediction - Model Training and Explainability

In [None]:

import pandas as pd
import xgboost as xgb
import shap
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score

# Load dataset
df = pd.read_csv('../data/raw/loan_data.csv')


In [None]:

# Preprocessing
df = pd.get_dummies(df, columns=['purpose', 'term'], drop_first=True)
X = df.drop('default', axis=1)
y = df['default']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:

# Model Training
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)


In [None]:

# Predictions and Evaluation
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print(f"ROC AUC Score: {roc_auc_score(y_test, model.predict_proba(X_test)[:,1]):.4f}")


In [None]:

# SHAP Explainability
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)

shap.summary_plot(shap_values, X_test)


## ✅ Modeling Complete. Model and SHAP explanations generated! 🚀