In [25]:
# ------------------------------------------------------------------
# Build the Support Vector Classifier Model
# Predict the loan approval status based on 
# Gender, Marital Status, Credit History, Income and Loan Amount
# ------------------------------------------------------------------

# Import Libraries and read csv file
import pandas as pd
data = pd.read_csv(r'C:\Users\o.resulov\Desktop\QSS\01Exercise1.csv')
data

Unnamed: 0,gender,married,ch,income,loanamt,status
0,Male,No,1.0,5849,,Y
1,Male,Yes,1.0,4583,128.0,N
2,Male,Yes,1.0,3000,66.0,Y
3,Male,Yes,1.0,2583,120.0,Y
4,Male,No,1.0,6000,141.0,Y
...,...,...,...,...,...,...
609,Female,No,1.0,2900,71.0,Y
610,Male,Yes,1.0,4106,40.0,Y
611,Male,Yes,1.0,8072,253.0,Y
612,Male,Yes,1.0,7583,187.0,Y


In [26]:
#find out columns with missing values
data.isnull().sum(axis=0)

gender     13
married     3
ch         50
income      0
loanamt    22
status      0
dtype: int64

In [27]:
# Replace Missing Values. Drop the rows.
data = data.dropna()
data

Unnamed: 0,gender,married,ch,income,loanamt,status
1,Male,Yes,1.0,4583,128.0,N
2,Male,Yes,1.0,3000,66.0,Y
3,Male,Yes,1.0,2583,120.0,Y
4,Male,No,1.0,6000,141.0,Y
5,Male,Yes,1.0,5417,267.0,Y
...,...,...,...,...,...,...
609,Female,No,1.0,2900,71.0,Y
610,Male,Yes,1.0,4106,40.0,Y
611,Male,Yes,1.0,8072,253.0,Y
612,Male,Yes,1.0,7583,187.0,Y


In [28]:
# Drop irrelevant columns based on business sense
data = data.drop(['gender'], axis=1)
data

Unnamed: 0,married,ch,income,loanamt,status
1,Yes,1.0,4583,128.0,N
2,Yes,1.0,3000,66.0,Y
3,Yes,1.0,2583,120.0,Y
4,No,1.0,6000,141.0,Y
5,Yes,1.0,5417,267.0,Y
...,...,...,...,...,...
609,No,1.0,2900,71.0,Y
610,Yes,1.0,4106,40.0,Y
611,Yes,1.0,8072,253.0,Y
612,Yes,1.0,7583,187.0,Y


In [34]:
# Create Dummy variables
data.dtypes
data = pd.get_dummies(data, drop_first=True)
data

Unnamed: 0,ch,income,loanamt,married_Yes,status_Y
1,1.0,4583,128.0,1,0
2,1.0,3000,66.0,1,1
3,1.0,2583,120.0,1,1
4,1.0,6000,141.0,0,1
5,1.0,5417,267.0,1,1
...,...,...,...,...,...
609,1.0,2900,71.0,0,1
610,1.0,4106,40.0,1,1
611,1.0,8072,253.0,1,1
612,1.0,7583,187.0,1,1


In [47]:
# Normalize the data (Income and Loan Amount) Using StandardScaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
data['income'] = scaler.fit_transform(data[['income']])
data['loanamt'] = scaler.fit_transform(data[['loanamt']])
data


Unnamed: 0,ch,income,loanamt,married_Yes,status_Y
1,1.0,-0.128073,-0.194250,1,0
2,1.0,-0.392077,-0.971015,1,1
3,1.0,-0.461621,-0.294478,1,1
4,1.0,0.108246,-0.031380,0,1
5,1.0,0.011017,1.547205,1,1
...,...,...,...,...,...
609,1.0,-0.408754,-0.908372,0,1
610,1.0,-0.207624,-1.296754,1,1
611,1.0,0.453802,1.371807,1,1
612,1.0,0.372249,0.544929,1,1


In [51]:
# Create the X (Independent) and Y (Dependent) dataframes
y = data['status_Y']
X = data.drop(['status_Y'], axis=1)
X

Unnamed: 0,ch,income,loanamt,married_Yes
1,1.0,-0.128073,-0.194250,1
2,1.0,-0.392077,-0.971015,1
3,1.0,-0.461621,-0.294478,1
4,1.0,0.108246,-0.031380,0
5,1.0,0.011017,1.547205,1
...,...,...,...,...
609,1.0,-0.408754,-0.908372,0
610,1.0,-0.207624,-1.296754,1
611,1.0,0.453802,1.371807,1
612,1.0,0.372249,0.544929,1


In [55]:
# Split the X and Y dataset into training and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.3, random_state=1234, stratify=y)

In [56]:
# Import and build Support Vector Classifier
from sklearn import svm
model = svm.SVC()
model.fit(X_train, y_train)


SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [63]:
# Predict the outcome using Test data
model_estimation = model.predict(X_test)
model_estimation

array([0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1], dtype=uint8)

In [68]:
# Build the conufsion matrix and get the accuracy/score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
f1score = accuracy_score(y_test, model_estimation)
CM = confusion_matrix(y_test, model_estimation)
f1score


0.8553459119496856

In [69]:
CM

array([[ 27,  22],
       [  1, 109]], dtype=int64)