# Imports

In [6]:
from sklearn import svm
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import KFold, cross_validate, LeaveOneOut, train_test_split, GridSearchCV
import pandas as pd

# Information on Support vector machines
[sklearn website](https://scikit-learn.org/stable/modules/svm.html#svm-kernels)

# Create some fake data

In [2]:
X = np.random.rand(20)
print(X)

[0.00308122 0.25865966 0.83600584 0.34755135 0.46534994 0.6492493
 0.16142487 0.52242716 0.66373048 0.0580653  0.65593346 0.84373899
 0.1540254  0.77479057 0.40662182 0.13985343 0.54261936 0.5815225
 0.57731462 0.74569474]


In [3]:
## reshape data for fitting model
X = X.reshape(-1,1)
print(X)

[[0.00308122]
 [0.25865966]
 [0.83600584]
 [0.34755135]
 [0.46534994]
 [0.6492493 ]
 [0.16142487]
 [0.52242716]
 [0.66373048]
 [0.0580653 ]
 [0.65593346]
 [0.84373899]
 [0.1540254 ]
 [0.77479057]
 [0.40662182]
 [0.13985343]
 [0.54261936]
 [0.5815225 ]
 [0.57731462]
 [0.74569474]]


In [4]:
y = np.random.randint(0,2, size=20)
print(y)

[0 1 1 0 0 0 1 1 1 1 0 1 0 1 0 1 0 1 1 0]


# Call our SVM module to create an instance of a classifier

In [5]:
clf = svm.SVC()

## Explore the inputs of the SVC

In [6]:
svm.SVC??

[0;31mInit signature:[0m
[0msvm[0m[0;34m.[0m[0mSVC[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mC[0m[0;34m=[0m[0;36m1.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkernel[0m[0;34m=[0m[0;34m'rbf'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdegree[0m[0;34m=[0m[0;36m3[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mgamma[0m[0;34m=[0m[0;34m'scale'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcoef0[0m[0;34m=[0m[0;36m0.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mshrinking[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprobability[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtol[0m[0;34m=[0m[0;36m0.001[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcache_size[0m[0;34m=[0m[0;36m200[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mclass_weight[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mverbose[0m[0;34m=[0m[0;32mFalse[

# Fit model

In [11]:
# Fit the model to x and y
clf.fit(X,y)

SVC()

In [12]:
# Verify we only have two classes
clf.classes_

array([0, 1])

## See how well the training data fits

In [13]:
# Get the training classification score
clf.score(X,y)

0.6

# What if we change the kernel?

In [14]:
# Kernels accepted by the function
list_kerns = ['linear', 'poly', 'rbf', 'sigmoid']

# Loop over them and train a model

for kernel in list_kerns:
    print(f'Kernel being used is {kernel}')
    clf = svm.SVC(kernel=kernel)
    clf.fit(X,y)
    print(clf.score(X,y))


Kernel being used is linear
0.55
Kernel being used is poly
0.55
Kernel being used is rbf
0.6
Kernel being used is sigmoid
0.55


# Grid Search

In [7]:
data_df = pd.read_csv('./data/60x61.csv', header=None)
data_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,1,0.062832,0.097064,0.041652,0.008974,0.093586,0.011168,0.010246,0.097714,0.093843,...,0.003632,0.028356,0.066206,0.062532,0.10037,0.069072,0.025882,0.054651,0.11276,0.10607
1,1,0.025996,0.056383,0.052404,0.079622,0.07071,0.046787,0.004721,0.034333,0.059902,...,0.068505,0.030921,0.004277,0.059342,0.059611,0.025914,0.018308,0.10288,0.078364,0.049836
2,1,0.015806,0.099749,0.082649,0.076439,0.045748,0.061168,0.037715,0.10508,0.11137,...,0.07326,0.008919,0.01206,0.03868,0.013767,0.01477,0.028393,0.08889,0.021125,0.01447
3,1,0.096576,0.021273,0.071937,0.023003,0.051179,0.026095,0.014747,0.027601,0.011309,...,0.075601,0.028319,0.004177,0.026253,0.025739,0.018103,0.075973,0.057072,0.043127,0.024063
4,1,0.0245,0.0381,0.019063,0.004089,0.02301,0.10454,0.075521,0.032552,0.018125,...,0.061589,0.030194,0.004605,0.041527,0.050398,0.008049,0.1259,0.10101,0.026404,0.004397


In [8]:
y = data_df.iloc[:,0]
X = abs(data_df.iloc[:,1:])

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, train_size=0.8)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)

(48, 60)
(12, 60)
(48,)


In [9]:
params = {
    'C': [1,10,100,1000],
    'gamma': ['scale','auto',0.01, 0.001, 0.0001]
}

In [10]:
# get cv
loo = LeaveOneOut()

# initalize model
svm_clf = svm.SVC(kernel='rbf')
grid = GridSearchCV(svm_clf, cv=loo, param_grid=params)

In [11]:
# fit the model
grid.fit(X,y)

GridSearchCV(cv=LeaveOneOut(), estimator=SVC(),
             param_grid={'C': [1, 10, 100, 1000],
                         'gamma': ['scale', 'auto', 0.01, 0.001, 0.0001]})

In [15]:
pd.DataFrame(grid.cv_results_).sort_values('mean_test_score', ascending=False)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,params,split0_test_score,split1_test_score,split2_test_score,...,split53_test_score,split54_test_score,split55_test_score,split56_test_score,split57_test_score,split58_test_score,split59_test_score,mean_test_score,std_test_score,rank_test_score
17,0.00076,2.9e-05,0.000484,1.4e-05,1000,0.01,"{'C': 1000, 'gamma': 0.01}",1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.983333,0.128019,1
16,0.000804,3.8e-05,0.000485,1.6e-05,1000,auto,"{'C': 1000, 'gamma': 'auto'}",1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.983333,0.128019,1
0,0.000903,0.000232,0.000568,9.8e-05,1,scale,"{'C': 1, 'gamma': 'scale'}",1.0,1.0,1.0,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.9,0.3,3
5,0.000766,1.9e-05,0.000487,1.6e-05,10,scale,"{'C': 10, 'gamma': 'scale'}",1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9,0.3,3
15,0.000763,2.8e-05,0.000481,1.3e-05,1000,scale,"{'C': 1000, 'gamma': 'scale'}",1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9,0.3,3
10,0.000761,2.9e-05,0.000484,1.6e-05,100,scale,"{'C': 100, 'gamma': 'scale'}",1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9,0.3,3
11,0.000721,2.7e-05,0.000483,2e-05,100,auto,"{'C': 100, 'gamma': 'auto'}",1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.489898,7
6,0.000729,3e-05,0.000492,2.3e-05,10,auto,"{'C': 10, 'gamma': 'auto'}",1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.516667,0.499722,8
7,0.000727,2.9e-05,0.000489,2.3e-05,10,0.01,"{'C': 10, 'gamma': 0.01}",1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.516667,0.499722,8
8,0.000727,3.4e-05,0.000486,1.8e-05,10,0.001,"{'C': 10, 'gamma': 0.001}",1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.516667,0.499722,8


In [16]:
print(grid.best_estimator_)
print(grid.best_params_)
print(grid.best_index_)
print(grid.best_score_)

SVC(C=1000, gamma='auto')
{'C': 1000, 'gamma': 'auto'}
16
0.9833333333333333
