In [13]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, KFold, cross_val_score, cross_validate

df_Ubank = pd.read_excel('Universalbank_1500-1.xlsx').drop(columns = ['ID', 'ZIP Code'])
Ubank_X = df_Ubank.drop(columns = ['Personal Loan']) 
Ubank_y = df_Ubank['Personal Loan']

# Partition Data 
train_X, valid_X, train_y, valid_y = train_test_split(Ubank_X, Ubank_y, test_size = 1/3, random_state = 2)

In [14]:
svm = SVC(kernel = 'linear')
svm.fit(train_X, train_y)

# Optimal Hyperplane
pd.DataFrame({'Coefficient': np.append(svm.coef_, svm.intercept_)}, index = np.append(Ubank_X.columns, 'Intercept')).transpose()

Unnamed: 0,Age,Experience,Income,Family,CCAvg,Education,Mortgage,Securities Account,CD Account,Online,CreditCard,Intercept
Coefficient,-0.044252,0.053797,0.02902,0.241554,0.045743,1.042725,0.000438,-0.702149,1.795864,-0.067534,-0.620861,-6.223595


In [15]:
# Use 10 time 5-fold Cross Validation for SVM
svm_total_train_acc = []
svm_total_test_acc = []

for i in range(10):
  svm_cv = KFold(n_splits = 5, shuffle = True, random_state = None)

  svm_scores = cross_validate(svm, Ubank_X, Ubank_y, cv = svm_cv, scoring = 'accuracy', return_train_score = True)

  svm_train_acc = np.mean(svm_scores['train_score'])
  svm_test_acc = np.mean(svm_scores['test_score'])

  svm_total_train_acc.append(svm_train_acc)
  svm_total_test_acc.append(svm_test_acc)

  print('{}:'.format(i+1))
  print('Accuracy for Training Data:', svm_train_acc)
  print('Accuracy for Test Data:', svm_test_acc, '\n')


print('Average Accuracy for Training Data:', np.mean(svm_total_train_acc))
print('Average Accuracy for Test Data:', np.mean(svm_total_test_acc))

1:
Accuracy for Training Data: 0.9523333333333334
Accuracy for Test Data: 0.9479999999999998 

2:
Accuracy for Training Data: 0.9531666666666666
Accuracy for Test Data: 0.9513333333333334 

3:
Accuracy for Training Data: 0.9536666666666667
Accuracy for Test Data: 0.952 

4:
Accuracy for Training Data: 0.9528333333333332
Accuracy for Test Data: 0.9486666666666667 

5:
Accuracy for Training Data: 0.9528333333333334
Accuracy for Test Data: 0.9493333333333333 

6:
Accuracy for Training Data: 0.9526666666666668
Accuracy for Test Data: 0.9473333333333332 

7:
Accuracy for Training Data: 0.9525
Accuracy for Test Data: 0.9486666666666667 

8:
Accuracy for Training Data: 0.9521666666666666
Accuracy for Test Data: 0.9513333333333334 

9:
Accuracy for Training Data: 0.9530000000000001
Accuracy for Test Data: 0.9446666666666665 

10:
Accuracy for Training Data: 0.9515
Accuracy for Test Data: 0.9480000000000001 

Average Accuracy for Training Data: 0.9526666666666666
Average Accuracy for Test Data: