In [None]:
# loan_default_analysis.ipynb

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

In [None]:
# Load dataset
df = pd.read_csv("bank_loan_default_dataset.csv")

In [None]:
# Quick look at data
print(df.head())
print(df.info())
print(df['Loan_Default'].value_counts())

In [None]:
# Data preprocessing
# Encode categorical variables
le = LabelEncoder()
df['Employment_Status'] = le.fit_transform(df['Employment_Status'])
df['Marital_Status'] = le.fit_transform(df['Marital_Status'])

In [None]:
# Features and target
X = df.drop(['Customer_ID', 'Loan_Default'], axis=1)
y = df['Loan_Default']

In [None]:
# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# Model building
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Predictions
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:,1]

In [None]:
# Evaluation
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print(f"ROC AUC Score: {roc_auc_score(y_test, y_proba):.3f}")

In [None]:
# Feature importance
feat_importances = pd.Series(model.feature_importances_, index=X.columns)
feat_importances = feat_importances.sort_values(ascending=False)

In [None]:
# Plot feature importance
plt.figure(figsize=(10,6))
sns.barplot(x=feat_importances, y=feat_importances.index)
plt.title("Feature Importance")
plt.show()