# Machine Learning Implementation (Support Vector Machines)

This document contains the actual data splitting, training, and metrics for the following machine learning models

## Importing our libraries

In [1]:
# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, plot_confusion_matrix,roc_auc_score, roc_curve
from sklearn.svm import SVC

from time import time
from scipy.stats import randint
import pickle

## Reading saved clean data

In [2]:
base_df = pd.read_pickle('cleaned_data_loan_approval.pkl')
base_no_outliers_df = pd.read_pickle('cleaned_data_loan_approval_without_outliers.pkl')

display(base_df.head())
print(base_df.shape)

display(base_no_outliers_df.head())
print(base_no_outliers_df.shape)

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Gender,Married,Education,Self_Employed,Loan_Status,Semiurban,Urban,1,2,3+
0,5849,0.0,146.412162,360.0,1.0,1,0,0,0,1,0,1,0,0,0
1,4583,1508.0,128.0,360.0,1.0,1,1,0,0,0,0,0,1,0,0
2,3000,0.0,66.0,360.0,1.0,1,1,0,1,1,0,1,0,0,0
3,2583,2358.0,120.0,360.0,1.0,1,1,1,0,1,0,1,0,0,0
4,6000,0.0,141.0,360.0,1.0,1,0,0,0,1,0,1,0,0,0


(614, 15)


Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Gender,Married,Education,Self_Employed,Loan_Status,Semiurban,Urban,1,2,3+
0,5849,0.0,146.412162,360.0,1.0,1,0,0,0,1,0,1,0,0,0
1,4583,1508.0,128.0,360.0,1.0,1,1,0,0,0,0,0,1,0,0
2,3000,0.0,66.0,360.0,1.0,1,1,0,1,1,0,1,0,0,0
3,2583,2358.0,120.0,360.0,1.0,1,1,1,0,1,0,1,0,0,0
4,6000,0.0,141.0,360.0,1.0,1,0,0,0,1,0,1,0,0,0


(445, 15)


## Splitting Data

In [3]:
# Base Model
X = base_df[['LoanAmount','ApplicantIncome','CoapplicantIncome','Loan_Amount_Term','Credit_History','Gender', 'Married', 'Education', 'Self_Employed','Semiurban','Urban','1','2','3+' ]]
y = base_df['Loan_Status']

# With outliers removed
X_1 = base_no_outliers_df[['LoanAmount','ApplicantIncome','CoapplicantIncome','Loan_Amount_Term','Credit_History','Gender', 'Married', 'Education', 'Self_Employed','Semiurban','Urban','1','2','3+' ]]
y_1 = base_no_outliers_df['Loan_Status']

print(f'Shape: X{X.shape}, y{y.shape}')
print(f'Shape: X_1{X_1.shape}, y_1{y_1.shape}')

Shape: X(614, 14), y(614,)
Shape: X_1(445, 14), y_1(445,)


📝 These two are the same dataset but with different variations such as the (*)_1 not having outliers.

## Training SVMs

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100)

svm_model = SVC(kernel='linear')

svm_model.fit(X_train,y_train)
svm_pred = svm_model.predict(X_test)

print(classification_report(y_test,svm_pred))

df_matrix=pd.DataFrame(confusion_matrix(y_test, svm_pred), 
             columns=["Predicted False", "Predicted True"], 
             index=["Actual False", "Actual True"])
df_matrix.style.background_gradient(cmap='Blues')

              precision    recall  f1-score   support

           0       0.78      0.33      0.47        63
           1       0.73      0.95      0.83       122

    accuracy                           0.74       185
   macro avg       0.76      0.64      0.65       185
weighted avg       0.75      0.74      0.71       185



Unnamed: 0,Predicted False,Predicted True
Actual False,21,42
Actual True,6,116


In [9]:
X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(X_1, y_1, test_size=0.3, random_state=100)

svm_model_1 = SVC(kernel='linear')

svm_model_1.fit(X_train_1,y_train_1)
svm_pred_1 = svm_model_1.predict(X_test_1)

print(classification_report(y_test_1,svm_pred_1))

df_matrix=pd.DataFrame(confusion_matrix(y_test_1, svm_pred_1), 
             columns=["Predicted False", "Predicted True"], 
             index=["Actual False", "Actual True"])
df_matrix.style.background_gradient(cmap='Blues')

              precision    recall  f1-score   support

           0       0.89      0.23      0.36        35
           1       0.78      0.99      0.88        99

    accuracy                           0.79       134
   macro avg       0.84      0.61      0.62       134
weighted avg       0.81      0.79      0.74       134



Unnamed: 0,Predicted False,Predicted True
Actual False,8,27
Actual True,1,98
