# SVM - Support Vector Machine

In [1]:
import numpy as np
import pandas as pd

from sklearn import svm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn import preprocessing

In [2]:
df = pd.read_csv('datasets/pima-indians-diabetes.csv', header=None)

In [3]:
df.size

6912

In [4]:
X = df.values[:, :-1]
y = df.values[:, -1]

In [5]:
# scale_type (None, 'norm'-normalization, 'std'-standardization)
def svm_eval(n_splits=3, scale_type=None):
    accuracy = []
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    for train_index, test_index in kfold.split(X, y):
        if scale_type == 'std':
            scale = preprocessing.StandardScaler().fit(X[train_index])
            X_train = scale.transform(X[train_index])
            X_test = scale.transform(X[test_index])
        elif scale_type == 'norm':
            scale = preprocessing.MinMaxScaler().fit(X[train_index])
            X_train = scale.transform(X[train_index])
            X_test = scale.transform(X[test_index])
        else:
            X_train = X[train_index]
            X_test = X[test_index]
            
        model = svm.SVC()
        model.fit(X_train, y[train_index])
        y_pred = model.predict(X_test)
        accuracy.append(accuracy_score(y[test_index], y_pred))
    return accuracy

# Usando k-fold com k=[3, 5, 10]

In [10]:
accuracy = svm_eval(3)
print(accuracy)
print('mean', np.mean(accuracy))

accuracy = svm_eval(5)
print(accuracy)
print('mean', np.mean(accuracy))

accuracy = svm_eval(10)
print(accuracy)
print('mean', np.mean(accuracy))

[0.64980544747081714, 0.65234375, 0.65098039215686276]
mean 0.651043196543
[0.64935064935064934, 0.64935064935064934, 0.64935064935064934, 0.65359477124183007, 0.65359477124183007]
mean 0.651048298107
[0.64935064935064934, 0.64935064935064934, 0.64935064935064934, 0.64935064935064934, 0.64935064935064934, 0.64935064935064934, 0.64935064935064934, 0.64935064935064934, 0.65789473684210531, 0.65789473684210531]
mean 0.651059466849


# Usando Normalization

In [11]:
accuracy = svm_eval(3, scale_type='norm')
print(accuracy)
print('mean', np.mean(accuracy))

accuracy = svm_eval(5, scale_type='norm')
print(accuracy)
print('mean', np.mean(accuracy))

accuracy = svm_eval(10, scale_type='norm')
print(accuracy)
print('mean', np.mean(accuracy))

[0.73151750972762641, 0.7734375, 0.73333333333333328]
mean 0.746096114354
[0.75974025974025972, 0.77922077922077926, 0.77272727272727271, 0.77777777777777779, 0.72549019607843135]
mean 0.762991257109
[0.76623376623376627, 0.77922077922077926, 0.76623376623376627, 0.80519480519480524, 0.7142857142857143, 0.79220779220779225, 0.77922077922077926, 0.76623376623376627, 0.71052631578947367, 0.75]
mean 0.762935748462


# Usando Standardization

In [12]:
accuracy = svm_eval(3, scale_type='std')
print(accuracy)
print('mean', np.mean(accuracy))

accuracy = svm_eval(5, scale_type='std')
print(accuracy)
print('mean', np.mean(accuracy))

accuracy = svm_eval(10, scale_type='std')
print(accuracy)
print('mean', np.mean(accuracy))

[0.77821011673151752, 0.75390625, 0.75686274509803919]
mean 0.762993037277
[0.7857142857142857, 0.72727272727272729, 0.74025974025974028, 0.76470588235294112, 0.75816993464052285]
mean 0.755224514048
[0.76623376623376627, 0.77922077922077926, 0.75324675324675328, 0.75324675324675328, 0.7142857142857143, 0.79220779220779225, 0.75324675324675328, 0.74025974025974028, 0.75, 0.81578947368421051]
mean 0.761773752563
