# 🤖 Notebook 2: Model Training and Evaluation
This notebook trains baseline machine learning models and evaluates them for performance metrics. It uses the cleaned dataset from Notebook 1.

### 📦 Import Libraries

In [11]:
# 📦 Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns

### 📁 Load Cleaned Dataset

In [12]:
# 📁 Load Cleaned Dataset
df = pd.read_csv('cleaned_credit_data.csv')
df.head()

Unnamed: 0,Age,Sex,Job,Credit amount,Duration,Housing_own,Housing_rent,Saving accounts_moderate,Saving accounts_quite rich,Saving accounts_rich,Checking account_moderate,Checking account_rich,Purpose_car,Purpose_domestic appliances,Purpose_education,Purpose_furniture/equipment,Purpose_radio/TV,Purpose_repairs,Purpose_vacation/others,Risk_good
0,22,0,2,5951,48,True,False,False,False,False,True,False,False,False,False,False,True,False,False,False
1,45,1,2,7882,42,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True
2,53,1,2,4870,24,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False
3,35,1,3,6948,36,False,True,False,False,False,True,False,True,False,False,False,False,False,False,True
4,28,1,3,5234,30,True,False,False,False,False,True,False,True,False,False,False,False,False,False,False


### 🎯 Split Features and Target

In [13]:
# 🎯 Split Features and Target
# Adjust 'Risk' column if it has different values
X = df.drop('Risk_good', axis=1)
y = df['Risk_good'].astype(int)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 🔄 Feature Scaling

In [14]:
# 🔄 Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### 🤖 Train Logistic Regression

In [15]:
# 🤖 Train Logistic Regression
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)
y_pred_lr = lr.predict(X_test_scaled)

### 🌲 Train Random Forest

In [16]:
# 🌲 Train Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

### ⚡ Train XGBoost

In [17]:
# ⚡ Train XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


### 📊 Evaluation Function

In [18]:
# 📊 Evaluation Function
def evaluate_model(name, y_true, y_pred):
    print(f"\n📋 Model: {name}")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("ROC AUC:", roc_auc_score(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred))

### 🧪 Evaluate All Models

In [19]:
# 🧪 Evaluate All Models
evaluate_model("Logistic Regression", y_test, y_pred_lr)
evaluate_model("Random Forest", y_test, y_pred_rf)
evaluate_model("XGBoost", y_test, y_pred_xgb)


📋 Model: Logistic Regression
Accuracy: 0.5619047619047619
ROC AUC: 0.5590909090909091
Confusion Matrix:
 [[25 25]
 [21 34]]
Classification Report:
               precision    recall  f1-score   support

           0       0.54      0.50      0.52        50
           1       0.58      0.62      0.60        55

    accuracy                           0.56       105
   macro avg       0.56      0.56      0.56       105
weighted avg       0.56      0.56      0.56       105


📋 Model: Random Forest
Accuracy: 0.5333333333333333
ROC AUC: 0.53
Confusion Matrix:
 [[23 27]
 [22 33]]
Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.46      0.48        50
           1       0.55      0.60      0.57        55

    accuracy                           0.53       105
   macro avg       0.53      0.53      0.53       105
weighted avg       0.53      0.53      0.53       105


📋 Model: XGBoost
Accuracy: 0.6
ROC AUC: 0.5990909090909091
Confusio

### 💾 Save Models (optional)

In [21]:
# 💾 Save Models (optional)
import joblib
joblib.dump(lr, 'models/logistic_model.pkl')
joblib.dump(rf, 'models/random_forest_model.pkl')
joblib.dump(xgb, 'models/xgboost_model.pkl')
print('Models saved in models/.')

Models saved in models/.
