In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [3]:
# Loading data
df_business = pd.read_csv('../Machine_Learning_CSVs/encoded_loans_dfV2.0.csv', index_col='LoanNumber')
df_business

Unnamed: 0_level_0,BorrowerZip,LoanStatus,Term,InitialApprovalAmount,CurrentApprovalAmount,JobsReported,ForgivenessAmount,rating,review_count,lat,...,MonthApproved_1,MonthApproved_2,MonthApproved_3,MonthApproved_4,MonthApproved_5,MonthApproved_6,MonthApproved_7,MonthApproved_8,YearApproved_2020,YearApproved_2021
LoanNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8728957203,80202,Paid in Full,24,7275742.0,7275742.0,500.0,7356987.79,1.5,7,39.754760,...,0,0,0,1,0,0,0,0,1,0
9826177105,80204,Paid in Full,24,5865500.0,5865500.0,391.0,5943869.60,2.5,3,39.737240,...,0,0,0,1,0,0,0,0,1,0
2089597208,80202,Paid in Full,24,5114560.0,5114560.0,227.0,5173803.65,3.0,2,39.751680,...,0,0,0,1,0,0,0,0,1,0
8574487102,80230,Paid in Full,24,3527000.0,3527000.0,249.0,3573732.75,1.0,4,39.717893,...,0,0,0,1,0,0,0,0,1,0
7218167001,80216,Exemption 4,60,3230500.0,3230500.0,500.0,971592.57,1.0,1,39.771974,...,0,0,0,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5431627705,80222,Paid in Full,24,1395.0,1395.0,2.0,1411.43,5.0,8,39.684343,...,0,0,0,0,1,0,0,0,1,0
2773518503,80237,Paid in Full,60,1000.0,1000.0,1.0,1009.84,5.0,5,39.598190,...,0,1,0,0,0,0,0,0,0,1
4250667310,80237,Paid in Full,24,1000.0,1000.0,1.0,1008.25,5.0,5,39.598190,...,0,0,0,1,0,0,0,0,1,0
5634797307,80209,Paid in Full,24,513.0,513.0,1.0,517.42,5.0,3,39.705286,...,0,0,0,1,0,0,0,0,1,0


In [4]:
# Define features and target
y = df_business["LoanStatus"]
X = df_business.drop(columns="LoanStatus")

In [5]:
# Train test & split
X_train, X_test, y_train, y_test = train_test_split(X,
   y,  random_state=1, stratify=y)

In [6]:
# Scale data
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [7]:
# Model, fit and predict with SVC
model = SVC(kernel='linear')

model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)
results = pd.DataFrame({
   "Prediction": y_pred,
   "Actual": y_test
}).reset_index(drop=True)
results.head()


Unnamed: 0,Prediction,Actual
0,Paid in Full,Paid in Full
1,Paid in Full,Paid in Full
2,Paid in Full,Paid in Full
3,Paid in Full,Paid in Full
4,Paid in Full,Paid in Full


In [8]:
# Calculated the accuracy score
accuracy_score(y_test, y_pred)

0.9735099337748344

In [9]:
confusion_matrix(y_test, y_pred)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

 Exemption 4       0.00      0.00      0.00         8
Paid in Full       0.97      1.00      0.99       294

    accuracy                           0.97       302
   macro avg       0.49      0.50      0.49       302
weighted avg       0.95      0.97      0.96       302



# Remove non important columns

In [10]:
filtered_biz_df = df_business.filter(['LoanStatus','InitialApprovalAmount','ForgivenessAmount','CurrentApprovalAmount',
                                      'review_count','lat','lon','JobsReported','BorrowerZip','rating'], axis=1)
filtered_biz_df     

Unnamed: 0_level_0,LoanStatus,InitialApprovalAmount,ForgivenessAmount,CurrentApprovalAmount,review_count,lat,lon,JobsReported,BorrowerZip,rating
LoanNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
8728957203,Paid in Full,7275742.0,7356987.79,7275742.0,7,39.754760,-104.997910,500.0,80202,1.5
9826177105,Paid in Full,5865500.0,5943869.60,5865500.0,3,39.737240,-105.008310,391.0,80204,2.5
2089597208,Paid in Full,5114560.0,5173803.65,5114560.0,2,39.751680,-104.998950,227.0,80202,3.0
8574487102,Paid in Full,3527000.0,3573732.75,3527000.0,4,39.717893,-104.895233,249.0,80230,1.0
7218167001,Exemption 4,3230500.0,971592.57,3230500.0,1,39.771974,-104.995255,500.0,80216,1.0
...,...,...,...,...,...,...,...,...,...,...
5431627705,Paid in Full,1395.0,1411.43,1395.0,8,39.684343,-104.937757,2.0,80222,5.0
2773518503,Paid in Full,1000.0,1009.84,1000.0,5,39.598190,-104.903560,1.0,80237,5.0
4250667310,Paid in Full,1000.0,1008.25,1000.0,5,39.598190,-104.903560,1.0,80237,5.0
5634797307,Paid in Full,513.0,517.42,513.0,3,39.705286,-104.941672,1.0,80209,5.0


In [17]:
# Define features and target
y = filtered_biz_df ["LoanStatus"]
X = filtered_biz_df.drop(columns="LoanStatus")

In [18]:
# Train test & split
X_train, X_test, y_train, y_test = train_test_split(X,
   y,  random_state=1, stratify=y)

In [19]:
# Scale data
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [20]:
# Model, fit and predict with SVC
model = SVC(kernel='linear')

model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)
results = pd.DataFrame({
   "Prediction": y_pred,
   "Actual": y_test
}).reset_index(drop=True)
results.head()


Unnamed: 0,Prediction,Actual
0,Paid in Full,Paid in Full
1,Paid in Full,Paid in Full
2,Paid in Full,Paid in Full
3,Paid in Full,Paid in Full
4,Paid in Full,Paid in Full


In [21]:
# Calculated the accuracy score
accuracy_score(y_test, y_pred)

0.9735099337748344

In [22]:
confusion_matrix(y_test, y_pred)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

 Exemption 4       0.00      0.00      0.00         8
Paid in Full       0.97      1.00      0.99       294

    accuracy                           0.97       302
   macro avg       0.49      0.50      0.49       302
weighted avg       0.95      0.97      0.96       302

