In [2]:
import pandas as pd
import numpy as np
import random
import csv
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix  

In [3]:
url = "D:\\TUGAS AKHIR\\Data TA\\Australian fraud dataset\\australian.csv"
colnames = ['x1', 'x2', 'x3','x4','x5','x6','x7','x8','x9','x10','x11','x12','x13','x14', 'y']
data = pd.read_csv(url, header = None, names = colnames)
X = data.drop('y', axis = 1)
X = (X-X.mean())/X.std()
y = data['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)  

In [4]:
nfolds = 5
Cs = [0.001, 0.01, 0.2, 1, 10]
gammas = [0.001, 0.01, 0.2, 1, 10]
param_grid = {'C': Cs, 'gamma' : gammas}
grid_search = GridSearchCV(svm.SVC(kernel='rbf'), param_grid, cv=nfolds)
grid_search.fit(X_train, y_train)
best_gamma = grid_search.best_params_['gamma']
best_C = grid_search.best_params_['C']
best_gamma, best_C

(0.01, 0.2)

In [5]:
svclassifier = SVC(kernel='rbf', gamma = best_gamma, C = best_C )  
svclassifier.fit(X_train, y_train)
y_train_pred = svclassifier.predict(X_train)
y_test_pred = svclassifier.predict(X_test)
confusion_matrix(y_train,y_train_pred), confusion_matrix(y_test, y_test_pred)

(array([[241,  55],
        [ 18, 238]], dtype=int64), array([[72, 15],
        [ 5, 46]], dtype=int64))

In [6]:
(confusion_matrix(y_train,y_train_pred)[0][1] + confusion_matrix(y_train,y_train_pred)[1][0])/np.sum(confusion_matrix(y_train,y_train_pred)), (confusion_matrix(y_test,y_test_pred)[0][1] + confusion_matrix(y_test,y_test_pred)[1][0])/np.sum(confusion_matrix(y_test,y_test_pred))

(0.1322463768115942, 0.14492753623188406)

In [7]:
svclassifier = SVC(kernel='rbf', gamma = 0.37874898, C = 1.84307884 )  
svclassifier.fit(X_train, y_train)
y_train_pred = svclassifier.predict(X_train)
y_test_pred = svclassifier.predict(X_test)
confusion_matrix(y_train,y_train_pred), confusion_matrix(y_test, y_test_pred)

(array([[292,   4],
        [  9, 247]], dtype=int64), array([[75, 12],
        [12, 39]], dtype=int64))

In [17]:
svclassifier.decision_function(X)

array([-1.16155822, -1.2259314 , -0.74400916,  1.00026566,  1.00014753,
        1.00043368, -0.98279247,  1.00039536, -1.00049639, -0.99984742,
        0.86405789,  0.99984136, -0.99998266,  1.00001386, -0.9997529 ,
        0.25646244,  0.99995283,  1.15093051, -1.28397648,  1.38357168,
       -1.16167223, -0.99990582, -1.41056606, -0.99998384, -1.31870109,
        0.99976589, -1.23106243,  1.00001918,  1.00004432,  1.00015469,
        0.45617807, -0.11781448, -0.99997886,  0.99963907, -1.0000086 ,
       -1.00018032,  0.99962884,  0.99984956,  1.00037431,  0.07079663,
        0.99983897, -1.19781424,  1.0003678 , -0.39758009, -0.95143984,
        0.99997435, -0.99993653,  0.17327991, -1.00007454,  0.91974179,
       -1.3593536 , -0.99972098, -1.15841982, -1.04431672, -0.06355347,
        0.85542796,  1.11888589, -1.00004787,  0.68552072,  0.99997336,
       -1.16419723, -1.14981177,  0.99966478,  0.06310866,  0.99996966,
        0.7132102 , -0.99991327, -0.99992175, -0.99999651,  0.37

In [18]:
svclassifier.intercept_

array([0.06583346])