# Comparing Classifiers for Loan Approval

In [33]:
# Importing required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import seaborn as sns

### 1. Decision Tree Classifier

In [3]:
# --- 1. Data Preprocessing ---
# Load the dataset
file_path = 'loan_approval_dataset.csv'
df = pd.read_csv(file_path)

In [4]:
# Strip leading/trailing whitespace from column names
df.columns = df.columns.str.strip()

In [5]:
# Handle potential negative asset values
asset_cols = ['residential_assets_value', 'commercial_assets_value', 'luxury_assets_value', 'bank_asset_value']
for col in asset_cols:
    df[col] = df[col].apply(lambda x: 0 if x < 0 else x)

In [6]:
# Drop the loan_id column
df = df.drop('loan_id', axis=1)

In [7]:
# Encode categorical variables using Label Encoding
categorical_cols = df.select_dtypes(include='object').columns
label_encoders = {}
print("\nPreprocessing: Encoding Categorical Variables...")
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le
    if col == 'loan_status':
        print(f"Mapping for '{col}': {dict(zip(le.classes_, le.transform(le.classes_)))}")


Preprocessing: Encoding Categorical Variables...
Mapping for 'loan_status': {' Approved': 0, ' Rejected': 1}


In [8]:
# Separate features (X) and target (y)
X = df.drop('loan_status', axis=1)
y = df['loan_status'] # Target variable is now encoded (0=Approved, 1=Rejected)

In [9]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print(f"Preprocessing: Data Split - Train shape: {X_train.shape}, Test shape: {X_test.shape}")

Preprocessing: Data Split - Train shape: (3415, 11), Test shape: (854, 11)


In [11]:
# Perform feature scaling (StandardScaler)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("Preprocessing: Feature Scaling Completed.")

Preprocessing: Feature Scaling Completed.


In [12]:
# --- 2. Model Implementation (Decision Tree) ---
print("\nTraining Decision Tree Classifier...")
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train_scaled, y_train)
print("Training complete.")


Training Decision Tree Classifier...
Training complete.


In [13]:
# --- 3. Model Evaluation (Decision Tree) ---
print("\n--- Decision Tree Evaluation Results ---")
y_pred_dt = dt_classifier.predict(X_test_scaled)


--- Decision Tree Evaluation Results ---


In [14]:
# Accuracy
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print(f"Accuracy: {accuracy_dt:.4f} ({accuracy_dt * 100:.2f}%)")

Accuracy: 0.9719 (97.19%)


In [15]:
# Confusion Matrix
print("\nConfusion Matrix:")
cm_dt = confusion_matrix(y_test, y_pred_dt)
print(cm_dt)


Confusion Matrix:
[[523   8]
 [ 16 307]]


In [19]:
# Classification Report
print("\nClassification Report:")
from sklearn.metrics import classification_report

print(classification_report(y_test,y_pred_dt))


Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.98      0.98       531
           1       0.97      0.95      0.96       323

    accuracy                           0.97       854
   macro avg       0.97      0.97      0.97       854
weighted avg       0.97      0.97      0.97       854



### 2.K-Nearest Neighbors Classifier

In [23]:
# --- 1. Data Preprocessing (completed) ---
# --- 2. Model Implementation (KNN) ---
from sklearn.neighbors import KNeighborsClassifier
print("\nTraining K-Nearest Neighbors Classifier (k=5)...")
knn_classifier = KNeighborsClassifier(n_neighbors=5) # Using default k=5
knn_classifier.fit(X_train_scaled, y_train)
print("Training complete.")


Training K-Nearest Neighbors Classifier (k=5)...
Training complete.


In [24]:
# --- 3. Model Evaluation (KNN) ---
print("\n--- KNN Evaluation Results ---")
y_pred_knn = knn_classifier.predict(X_test_scaled)


--- KNN Evaluation Results ---


In [25]:
# Accuracy
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print(f"Accuracy: {accuracy_knn:.4f} ({accuracy_knn * 100:.2f}%)")

Accuracy: 0.8970 (89.70%)


In [27]:
# Confusion Matrix
print("\nConfusion Matrix:")
cm_knn = confusion_matrix(y_test, y_pred_knn)
print(cm_knn)


Confusion Matrix:
[[492  39]
 [ 49 274]]


In [29]:
# Classification Report
print("\nClassification Report:")
from sklearn.metrics import classification_report

print(classification_report(y_test,y_pred_knn))


Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.93      0.92       531
           1       0.88      0.85      0.86       323

    accuracy                           0.90       854
   macro avg       0.89      0.89      0.89       854
weighted avg       0.90      0.90      0.90       854



### 3. Support Vector Classifier

In [34]:
# --- 1. Data Preprocessing (completed) ---
# --- 2. Model Implementation (SVC) ---
print("\nTraining Support Vector Classifier...")
svc_classifier = SVC(random_state=42)
svc_classifier.fit(X_train_scaled, y_train)
print("Training complete.")


Training Support Vector Classifier...
Training complete.


In [35]:
# --- 3. Model Evaluation (SVC) ---
print("\n--- SVC Evaluation Results ---")
y_pred_svc = svc_classifier.predict(X_test_scaled)


--- SVC Evaluation Results ---


In [36]:
# Accuracy
accuracy_svc = accuracy_score(y_test, y_pred_svc)
print(f"Accuracy: {accuracy_svc:.4f} ({accuracy_svc * 100:.2f}%)")

Accuracy: 0.9426 (94.26%)


In [37]:
# Confusion Matrix
print("\nConfusion Matrix:")
cm_svc = confusion_matrix(y_test, y_pred_svc)
print(cm_svc)


Confusion Matrix:
[[509  22]
 [ 27 296]]


In [40]:
# Classification Report
print("\nClassification Report:")
from sklearn.metrics import confusion_matrix,classification_report

print(classification_report(y_test,y_pred_svc))


Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.96      0.95       531
           1       0.93      0.92      0.92       323

    accuracy                           0.94       854
   macro avg       0.94      0.94      0.94       854
weighted avg       0.94      0.94      0.94       854

