In [71]:
import pandas as pd
import numpy as np 

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report

%run util.ipynb

# SVM

In [72]:
# https://scikit-learn.org/stable/modules/svm.html
#https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html

## Load data

In [73]:
X, Y = get_data()

In [74]:
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, random_state=42)

scaler = StandardScaler()
Xtrain = scaler.fit_transform(Xtrain)
Xtest = scaler.transform(Xtest)

### no param hypertuning 

In [75]:
clf = svm.SVC()

clf.fit(Xtrain, Ytrain)

In [76]:
Ypred = clf.predict(Xtest)

accuracy_score(Ytest, Ypred)

0.7916666666666666

In [77]:
print(classification_report(Ytest, Ypred))

              precision    recall  f1-score   support

           0       0.76      0.88      0.81        50
           1       0.84      0.70      0.76        46

    accuracy                           0.79        96
   macro avg       0.80      0.79      0.79        96
weighted avg       0.80      0.79      0.79        96



### param hypertuning

In [78]:
# check default params

clf.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [79]:
param_grid_svm = {
    'C': [2**-5, 2**-1, 2**9],
    'gamma': [2**-9, 2**-5, 2**-1, 2**-3]
}

grid_search_svm = GridSearchCV(estimator=svm.SVC(), param_grid=param_grid_svm, cv=10, scoring='accuracy', n_jobs=-1)

In [80]:
grid_search_svm.fit(Xtrain, Ytrain)

In [81]:
grid_search_svm.best_params_, grid_search_svm.best_score_

({'C': 512, 'gamma': 0.001953125}, np.float64(0.7671936758893281))

In [82]:
bestModel = grid_search_svm.best_estimator_

YpredGrid = bestModel.predict(Xtest)

accuracy_score(Ytest, YpredGrid)

0.7604166666666666

In [83]:
print(classification_report(Ytest, YpredGrid))

              precision    recall  f1-score   support

           0       0.71      0.90      0.80        50
           1       0.85      0.61      0.71        46

    accuracy                           0.76        96
   macro avg       0.78      0.75      0.75        96
weighted avg       0.78      0.76      0.75        96



### check for overfitting

In [None]:
# training accuracy

YtrainPred = grid_search_svm.best_estimator_.predict(Xtrain)
accuracy_score(Ytrain, YtrainPred)

0.9237668161434978