# SVM - Support Vector Classifier Model

In [1]:
# ------------------------------------------------------------------
# Predict the loan approval status based on various data elements
# ------------------------------------------------------------------

In [2]:
# Import Libraries
import pandas as pd

LoanData = pd.read_csv("Loan_Data.csv")
LoanPrep = LoanData.copy()

In [3]:
#find out columns with missing values
LoanPrep.isnull().sum(axis=0)

gender     13
married     3
ch         50
income      0
loanamt    22
status      0
dtype: int64

In [4]:
# Replace the missing values

# replace categorical values with mode - most frequent values
# ch stands for Credit History - the value are either 1.0 or 0.0
cols_cat = ['gender', 'married', 'ch']
LoanPrep[cols_cat] = LoanPrep[cols_cat].fillna(LoanPrep.mode().iloc[0])


# replace numerical value with mean
cols_num = ['loanamt']
LoanPrep[cols_num] = LoanPrep[cols_num].fillna(LoanPrep.mean())

LoanPrep.isnull().sum(axis=0)

gender     0
married    0
ch         0
income     0
loanamt    0
status     0
dtype: int64

In [5]:
# Drop irrelevant columns - I don't think gender is a factor for any bank to decide on loans
LoanPrep = LoanPrep.drop(['gender'], axis=1)

In [6]:
# creating dummy variables or one-hot encoding

# get_dummies automatically figures out which columns are categorical and one-hot encode them
LoanPrep = pd.get_dummies(LoanPrep, drop_first=True)

In [7]:
# Normalize the data (Income and Loan Amount) Using StandardScaler
from sklearn.preprocessing import StandardScaler
scalar_ = StandardScaler()

LoanPrep['income'] = scalar_.fit_transform(LoanPrep[['income']])
LoanPrep['loanamt'] = scalar_.fit_transform(LoanPrep[['loanamt']])

In [8]:
LoanPrep

Unnamed: 0,ch,income,loanamt,married_Yes,status_Y
0,1.0,0.072991,0.000000,0,1
1,1.0,-0.134412,-0.219273,1,0
2,1.0,-0.393747,-0.957641,1,1
3,1.0,-0.462062,-0.314547,1,1
4,1.0,0.097728,-0.064454,0,1
...,...,...,...,...,...
609,1.0,-0.410130,-0.898095,0,1
610,1.0,-0.212557,-1.267279,1,1
611,1.0,0.437174,1.269371,1,1
612,1.0,0.357064,0.483367,1,1


In [9]:
# Create the X (Independent) and Y (Dependent) dataframes
# -------------------------------------------------------
Y = LoanPrep[['status_Y']]
X = LoanPrep.drop(['status_Y'], axis=1)

In [10]:
# Split the X and Y dataset into training and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = \
train_test_split(X, Y, test_size = 0.3, random_state = 42, stratify=Y)

In [11]:
# Import and build Support Vector Classifier
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train, Y_train)

  return f(**kwargs)


SVC()

In [12]:
# Predict the outcome using Test data
Y_predict = svc.predict(X_test)
Y_predict

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 1], dtype=uint8)

In [13]:
# Build the conufsion matrix and get the accuracy/score
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test, Y_predict)

cm

array([[ 31,  27],
       [  2, 125]])

In [14]:
score = svc.score(X_test, Y_test)
score

0.8432432432432433