In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix,classification_report
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

In [9]:
df_cl=pd.read_csv('training_preprocessed.csv')
dt_cl=pd.read_csv('testing_preprocessed.csv')

In [10]:
df_cl_train,df_cl_val=train_test_split(df_cl,test_size=0.25,random_state=42)

In [11]:
y_cl_train=df_cl_train["Loan_Status"]
x_cl_train=df_cl_train.drop(columns="Loan_Status")
y_cl_val=df_cl_val["Loan_Status"]
x_cl_val=df_cl_val.drop(columns="Loan_Status")

**Naive Bayes**

In [12]:
nb = GaussianNB()
nb.fit(x_cl_train, y_cl_train)
y_cl_val_pred = nb.predict(x_cl_val)
cm = confusion_matrix(y_cl_val, y_cl_val_pred)
acc = accuracy_score(y_cl_val, y_cl_val_pred)
precision = precision_score(y_cl_val, y_cl_val_pred)
recall = recall_score(y_cl_val, y_cl_val_pred)
f1 = f1_score(y_cl_val, y_cl_val_pred)
print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}\n")

Accuracy: 0.7727, Precision: 0.7481, Recall: 0.9800, F1-score: 0.8485



**Logistic Regression**

In [13]:
nb = LogisticRegression()
nb.fit(x_cl_train, y_cl_train)
y_cl_val_pred = nb.predict(x_cl_val)
cm = confusion_matrix(y_cl_val, y_cl_val_pred)
acc = accuracy_score(y_cl_val, y_cl_val_pred)
precision = precision_score(y_cl_val, y_cl_val_pred)
recall = recall_score(y_cl_val, y_cl_val_pred)
f1 = f1_score(y_cl_val, y_cl_val_pred)
print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}\n")

Accuracy: 0.7597, Precision: 0.7405, Recall: 0.9700, F1-score: 0.8398



**SVM Linear**

In [28]:
rbf_svm = SVC(kernel='linear', C=1.0)
rbf_svm.fit(x_cl_train, y_cl_train)

# Predict on validation set
y_cl_val_pred = nb.predict(x_cl_val)

# Evaluation
cm = confusion_matrix(y_cl_val, y_cl_val_pred)
acc = accuracy_score(y_cl_val, y_cl_val_pred)
precision = precision_score(y_cl_val, y_cl_val_pred)
recall = recall_score(y_cl_val, y_cl_val_pred)
f1 = f1_score(y_cl_val, y_cl_val_pred)
print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}\n")

Accuracy: 0.7532, Precision: 0.7818, Recall: 0.8600, F1-score: 0.8190



**SVM Polynomial**

In [15]:
poly_svm = SVC(kernel='poly', degree=3, C=5.0, gamma='scale')
poly_svm.fit(x_cl_train, y_cl_train)
y_cl_val_pred = poly_svm.predict(x_cl_val)
cm = confusion_matrix(y_cl_val, y_cl_val_pred)
acc = accuracy_score(y_cl_val, y_cl_val_pred)
precision = precision_score(y_cl_val, y_cl_val_pred)
recall = recall_score(y_cl_val, y_cl_val_pred)
f1 = f1_score(y_cl_val, y_cl_val_pred)
print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}\n")

Accuracy: 0.6494, Precision: 0.6494, Recall: 1.0000, F1-score: 0.7874



**SVM RBF**

In [19]:
model = SVC(kernel='rbf', C=5, gamma=0.1)
model.fit(x_cl_train, y_cl_train)
y_cl_val_pred = model.predict(x_cl_val)
cm = confusion_matrix(y_cl_val, y_cl_val_pred)
acc = accuracy_score(y_cl_val, y_cl_val_pred)
precision = precision_score(y_cl_val, y_cl_val_pred)
recall = recall_score(y_cl_val, y_cl_val_pred)
f1 = f1_score(y_cl_val, y_cl_val_pred)
print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}\n")

Accuracy: 0.6558, Precision: 0.6556, Recall: 0.9900, F1-score: 0.7888



**Decision Trees**

In [20]:
nb = DecisionTreeClassifier()
nb.fit(x_cl_train, y_cl_train)
y_cl_val_pred = nb.predict(x_cl_val)
cm = confusion_matrix(y_cl_val, y_cl_val_pred)
acc = accuracy_score(y_cl_val, y_cl_val_pred)
precision = precision_score(y_cl_val, y_cl_val_pred)
recall = recall_score(y_cl_val, y_cl_val_pred)
f1 = f1_score(y_cl_val, y_cl_val_pred)
print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}\n")

Accuracy: 0.7338, Precision: 0.7864, Recall: 0.8100, F1-score: 0.7980



**XGB Classifier**

In [21]:
nb = XGBClassifier()
nb.fit(x_cl_train, y_cl_train)
y_cl_val_pred = nb.predict(x_cl_val)
cm = confusion_matrix(y_cl_val, y_cl_val_pred)
acc = accuracy_score(y_cl_val, y_cl_val_pred)
precision = precision_score(y_cl_val, y_cl_val_pred)
recall = recall_score(y_cl_val, y_cl_val_pred)
f1 = f1_score(y_cl_val, y_cl_val_pred)
print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}\n")

Accuracy: 0.7532, Precision: 0.7818, Recall: 0.8600, F1-score: 0.8190



**K Nearest Neighbours**

In [26]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=11)
knn.fit(x_cl_train, y_cl_train)
y_cl_val_pred = knn.predict(x_cl_val)
cm = confusion_matrix(y_cl_val, y_cl_val_pred)
acc = accuracy_score(y_cl_val, y_cl_val_pred)
precision = precision_score(y_cl_val, y_cl_val_pred)
recall = recall_score(y_cl_val, y_cl_val_pred)
f1 = f1_score(y_cl_val, y_cl_val_pred)
print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}\n")

Accuracy: 0.6234, Precision: 0.6500, Recall: 0.9100, F1-score: 0.7583

