# In motion dataset SVM

In [1]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
import numpy as np
import scipy
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, DBSCAN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import sklearn.metrics
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from sklearn.preprocessing import StandardScaler, LabelEncoder
from PIL import Image
import time
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import pandas as pd

In [3]:
def create_array(df):
    scaled = True
    arr = np.array(df["Signal"])
    arr_numpy = []
    for i in range(len(arr)):
        arr[i] = arr[i].replace("[","")
        arr[i] = arr[i].replace("\n","")
        arr[i] = arr[i].replace("]","")
        float_list = []
        for item in arr[i].split():  
            float_list.append(float(item))
        arr_numpy.append(float_list)
        
    arr_numpy = np.array(arr_numpy)
    if scaled:
        scaler = StandardScaler()
        arr_numpy = scaler.fit_transform(arr_numpy.T).T
    return arr_numpy , np.array(df["Label"])

# ECG

### SVM ECG FILTERED

In [4]:
ecg = pd.read_csv("../DATA_CSV/ECG_filtered_noscaled_cammina.csv",names=["index","Signal","Label"],skiprows=1).drop(columns="index")

train_ecg_df, test_ecg_df = train_test_split(ecg, test_size=0.2, stratify=ecg['Label'], random_state=123)

ecg_mat_train, label_ecg_train = create_array(train_ecg_df) 
ecg_mat_test, label_ecg_test  = create_array(test_ecg_df) 

X_train = ecg_mat_train
y_train = label_ecg_train
X_test = ecg_mat_test
y_test = label_ecg_test

# from sklearn.preprocessing import MinMaxScaler
# scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
# X_train = scaling.transform(X_train)
# X_test = scaling.transform(X_test)

In [4]:
# parameters for linear SVM
parameters = {'kernel' : ['linear'] , 'C': [0.001, 0.01, 0.1, 1, 10, 100]}
  
svc = SVC()
clf = GridSearchCV(svc, parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR LINEAR KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("All scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]

RESULTS FOR LINEAR KERNEL
Best parameters set found:
{'C': 1, 'kernel': 'linear'}
Score with best parameters:
0.9825669053901245
All scores on the grid:


Unnamed: 0,params,mean_test_score
0,"{'C': 0.001, 'kernel': 'linear'}",0.953543
1,"{'C': 0.01, 'kernel': 'linear'}",0.972578
2,"{'C': 0.1, 'kernel': 'linear'}",0.98153
3,"{'C': 1, 'kernel': 'linear'}",0.982567
4,"{'C': 10, 'kernel': 'linear'}",0.980871
5,"{'C': 100, 'kernel': 'linear'}",0.977101


In [5]:
# parameters for poly with degree 2 kernel
parameters = {'C': [0.05, 0.5, 5],'gamma':[0.05,0.5,5.]}

#run SVM with poly of degree 2 kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='poly', degree=2), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR POLY DEGREE=2 KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]

RESULTS FOR POLY DEGREE=2 KERNEL
Best parameters set found:
{'C': 0.05, 'gamma': 0.5}
Score with best parameters:
0.9922728986053524

All scores on the grid:


Unnamed: 0,params,mean_test_score
0,"{'C': 0.05, 'gamma': 0.05}",0.984923
1,"{'C': 0.05, 'gamma': 0.5}",0.992273
2,"{'C': 0.05, 'gamma': 5.0}",0.990954
3,"{'C': 0.5, 'gamma': 0.05}",0.991896
4,"{'C': 0.5, 'gamma': 0.5}",0.991048
5,"{'C': 0.5, 'gamma': 5.0}",0.990954
6,"{'C': 5, 'gamma': 0.05}",0.992273
7,"{'C': 5, 'gamma': 0.5}",0.990954
8,"{'C': 5, 'gamma': 5.0}",0.990954


In [6]:
# parameters for poly with higher degree kernel
parameters = {'C': [0.05, 0.5, 5],'gamma':[0.05,0.5,5.]}

#run SVM with poly of higher degree kernel
degree = 4

svc = SVC()
clf = GridSearchCV(SVC(kernel='poly', degree=degree), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR POLY DEGREE=', degree, ' KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]

RESULTS FOR POLY DEGREE= 4  KERNEL
Best parameters set found:
{'C': 0.05, 'gamma': 0.05}
Score with best parameters:
0.9933094609875612

All scores on the grid:


Unnamed: 0,params,mean_test_score
0,"{'C': 0.05, 'gamma': 0.05}",0.993309
1,"{'C': 0.05, 'gamma': 0.5}",0.992744
2,"{'C': 0.05, 'gamma': 5.0}",0.992744
3,"{'C': 0.5, 'gamma': 0.05}",0.992744
4,"{'C': 0.5, 'gamma': 0.5}",0.992744
5,"{'C': 0.5, 'gamma': 5.0}",0.992744
6,"{'C': 5, 'gamma': 0.05}",0.992744
7,"{'C': 5, 'gamma': 0.5}",0.992744
8,"{'C': 5, 'gamma': 5.0}",0.992744


In [13]:
#parameters for rbf SVM
parameters = {'C': [0.5, 5, 50, 500],'gamma':[0.005, 0.05, 0.5,5]}

#run SVM with rbf kernel

svc = SVC()
#clf = GridSearchCV(SVC(kernel='rbf', max_iter = 1000), parameters, cv=4, return_train_score=True)
clf = RandomizedSearchCV(SVC(kernel='rbf', max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR rbf KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR rbf KERNEL
Best parameters set found:
{'gamma': 0.05, 'C': 50}
Score with best parameters:
0.9926498303807011

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'gamma': 0.05, 'C': 0.5}",0.977667
1,"{'gamma': 5, 'C': 50}",0.900207
2,"{'gamma': 0.5, 'C': 50}",0.992179
3,"{'gamma': 0.5, 'C': 5}",0.992367
4,"{'gamma': 5, 'C': 5}",0.900207
5,"{'gamma': 0.05, 'C': 500}",0.991802
6,"{'gamma': 0.05, 'C': 50}",0.99265
7,"{'gamma': 0.005, 'C': 500}",0.991048
8,"{'gamma': 5, 'C': 500}",0.900207
9,"{'gamma': 0.005, 'C': 50}",0.984357


In [5]:
# execution time
t = []
for i in range(100):
    start_time = time.time()
    best_SVM = SVC(kernel='poly',degree=4, C=0.05, gamma=0.05).fit(X_train,y_train)
    t.append(time.time() - start_time)
print('Execution time = ', np.mean(t))
print('Highest score = ', best_SVM.score(X_test,y_test))

Execution time =  0.798825249671936
Highest score =  0.9962321024868124


### SVM ECG FILTERED PCA

In [6]:
ecg = pd.read_csv("../DATA_CSV/ECG_filtered_noscaled_cammina.csv",names=["index","Signal","Label"],skiprows=1).drop(columns="index")

train_ecg_df, test_ecg_df = train_test_split(ecg, test_size=0.2, stratify=ecg['Label'], random_state=123)

ecg_mat_train, label_ecg_train = create_array(train_ecg_df) 
ecg_mat_test, label_ecg_test  = create_array(test_ecg_df) 

num_components = 0.95
pca2 = PCA(n_components = num_components)
pca2.fit(ecg_mat_train)
ecg_mat_train = pca2.transform(ecg_mat_train)
ecg_mat_test = pca2.transform(ecg_mat_test)

X_train = ecg_mat_train
y_train = label_ecg_train
X_test = ecg_mat_test
y_test = label_ecg_test

# from sklearn.preprocessing import MinMaxScaler
# scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
# X_train = scaling.transform(X_train)
# X_test = scaling.transform(X_test)

In [14]:
# parameters for linear SVM
parameters = {'kernel' : ['linear'] , 'C': [0.001, 0.01, 0.1, 1, 10, 100]}
  
svc = SVC()
clf = GridSearchCV(svc, parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR LINEAR KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("All scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]

RESULTS FOR LINEAR KERNEL
Best parameters set found:
{'C': 100, 'kernel': 'linear'}
Score with best parameters:
0.9676781002638523
All scores on the grid:


Unnamed: 0,params,mean_test_score
0,"{'C': 0.001, 'kernel': 'linear'}",0.943743
1,"{'C': 0.01, 'kernel': 'linear'}",0.960045
2,"{'C': 0.1, 'kernel': 'linear'}",0.96617
3,"{'C': 1, 'kernel': 'linear'}",0.96683
4,"{'C': 10, 'kernel': 'linear'}",0.967584
5,"{'C': 100, 'kernel': 'linear'}",0.967678


In [15]:
# parameters for poly with degree 2 kernel
parameters = {'C': [0.05, 0.5, 5],'gamma':[0.05,0.5,5.]}

#run SVM with poly of degree 2 kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='poly', degree=2), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR POLY DEGREE=2 KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]

RESULTS FOR POLY DEGREE=2 KERNEL
Best parameters set found:
{'C': 0.05, 'gamma': 0.5}
Score with best parameters:
0.988597813795703

All scores on the grid:


Unnamed: 0,params,mean_test_score
0,"{'C': 0.05, 'gamma': 0.05}",0.97107
1,"{'C': 0.05, 'gamma': 0.5}",0.988598
2,"{'C': 0.05, 'gamma': 5.0}",0.987279
3,"{'C': 0.5, 'gamma': 0.05}",0.984828
4,"{'C': 0.5, 'gamma': 0.5}",0.988598
5,"{'C': 0.5, 'gamma': 5.0}",0.986242
6,"{'C': 5, 'gamma': 0.05}",0.988504
7,"{'C': 5, 'gamma': 0.5}",0.987279
8,"{'C': 5, 'gamma': 5.0}",0.986054


In [19]:
# parameters for poly with higher degree kernel
parameters = {'C': [0.05, 0.5, 5],'gamma':[0.05,0.5,5.]}

#run SVM with poly of higher degree kernel
degree = 4

svc = SVC()
#clf = GridSearchCV(SVC(kernel='poly', degree=degree), parameters, cv=4, return_train_score=True)
clf = RandomizedSearchCV(SVC(kernel='poly', degree=degree, max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR POLY DEGREE=', degree, ' KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR POLY DEGREE= 4  KERNEL
Best parameters set found:
{'gamma': 0.05, 'C': 5}
Score with best parameters:
0.9890689785148888

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'gamma': 0.05, 'C': 0.05}",0.987655
1,"{'gamma': 0.5, 'C': 0.05}",0.969186
2,"{'gamma': 5.0, 'C': 0.05}",0.967961
3,"{'gamma': 0.05, 'C': 0.5}",0.989069
4,"{'gamma': 0.5, 'C': 0.5}",0.969186
5,"{'gamma': 5.0, 'C': 0.5}",0.967961
6,"{'gamma': 0.05, 'C': 5}",0.989069
7,"{'gamma': 0.5, 'C': 5}",0.969186
8,"{'gamma': 5.0, 'C': 5}",0.967961


In [18]:
# parameters for rbf SVM
parameters = {'C': [0.5, 5, 50, 500],'gamma':[0.005, 0.05, 0.5,5]}

#run SVM with rbf kernel

svc = SVC()
#clf = GridSearchCV(SVC(kernel='rbf'), parameters, cv=4, return_train_score=True)
clf = RandomizedSearchCV(SVC(kernel='rbf', max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR rbf KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR rbf KERNEL
Best parameters set found:
{'gamma': 0.005, 'C': 500}
Score with best parameters:
0.9926498303807011

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'gamma': 0.05, 'C': 50}",0.992556
1,"{'gamma': 0.5, 'C': 50}",0.988504
2,"{'gamma': 0.005, 'C': 5}",0.982567
3,"{'gamma': 0.05, 'C': 500}",0.989823
4,"{'gamma': 0.005, 'C': 0.5}",0.963343
5,"{'gamma': 0.05, 'C': 5}",0.989823
6,"{'gamma': 0.005, 'C': 500}",0.99265
7,"{'gamma': 0.05, 'C': 0.5}",0.982755
8,"{'gamma': 0.005, 'C': 50}",0.988881
9,"{'gamma': 0.5, 'C': 0.5}",0.987373


In [7]:
# execution time
t = []
for i in range(100):
    start_time = time.time()
    best_SVM = SVC(kernel='rbf', C=500, gamma=0.05).fit(X_train,y_train)
    t.append(time.time() - start_time)
print('Execution time = ', np.mean(t))
print('Highest score = ', best_SVM.score(X_test,y_test))

Execution time =  0.4009261679649353
Highest score =  0.9924642049736248


### SVM ECG nonFILTERED

In [8]:
ecg = pd.read_csv("../DATA_CSV/ECG_nofiltered_noscaled_cammina.csv",names=["index","Signal","Label"],skiprows=1).drop(columns="index")

train_ecg_df, test_ecg_df = train_test_split(ecg, test_size=0.2, stratify=ecg['Label'], random_state=123)

ecg_mat_train, label_ecg_train = create_array(train_ecg_df) 
ecg_mat_test, label_ecg_test  = create_array(test_ecg_df) 

X_train = ecg_mat_train
y_train = label_ecg_train
X_test = ecg_mat_test
y_test = label_ecg_test

# from sklearn.preprocessing import MinMaxScaler
# scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
# X_train = scaling.transform(X_train)
# X_test = scaling.transform(X_test)

In [23]:
# parameters for linear SVM
parameters = {'kernel' : ['linear'] , 'C': [0.001, 0.01, 0.1, 1, 10, 100]}
  
svc = SVC()
clf = GridSearchCV(svc, parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR LINEAR KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("All scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]

RESULTS FOR LINEAR KERNEL
Best parameters set found:
{'C': 0.1, 'kernel': 'linear'}
Score with best parameters:
0.9833207689408218
All scores on the grid:


Unnamed: 0,params,mean_test_score
0,"{'C': 0.001, 'kernel': 'linear'}",0.951847
1,"{'C': 0.01, 'kernel': 'linear'}",0.975028
2,"{'C': 0.1, 'kernel': 'linear'}",0.983321
3,"{'C': 1, 'kernel': 'linear'}",0.983038
4,"{'C': 10, 'kernel': 'linear'}",0.981342
5,"{'C': 100, 'kernel': 'linear'}",0.978703


In [24]:
# parameters for poly with degree 2 kernel
parameters = {'C': [0.05, 0.5, 5],'gamma':[0.05,0.5,5.]}

#run SVM with poly of degree 2 kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='poly', degree=2), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR POLY DEGREE=2 KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]

RESULTS FOR POLY DEGREE=2 KERNEL
Best parameters set found:
{'C': 0.05, 'gamma': 0.5}
Score with best parameters:
0.9934979268752356

All scores on the grid:


Unnamed: 0,params,mean_test_score
0,"{'C': 0.05, 'gamma': 0.05}",0.985394
1,"{'C': 0.05, 'gamma': 0.5}",0.993498
2,"{'C': 0.05, 'gamma': 5.0}",0.99265
3,"{'C': 0.5, 'gamma': 0.05}",0.991613
4,"{'C': 0.5, 'gamma': 0.5}",0.99265
5,"{'C': 0.5, 'gamma': 5.0}",0.99265
6,"{'C': 5, 'gamma': 0.05}",0.993498
7,"{'C': 5, 'gamma': 0.5}",0.99265
8,"{'C': 5, 'gamma': 5.0}",0.99265


In [26]:
# parameters for poly with higher degree kernel
parameters = {'C': [0.05, 0.5, 5],'gamma':[0.05,0.5,5.]}

#run SVM with poly of higher degree kernel
degree = 4

svc = SVC()
clf = GridSearchCV(SVC(kernel='poly', degree=degree), parameters, cv=4, return_train_score=True)
#clf = RandomizedSearchCV(SVC(kernel='poly', degree=degree, max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR POLY DEGREE=', degree, ' KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]

RESULTS FOR POLY DEGREE= 4  KERNEL
Best parameters set found:
{'C': 0.05, 'gamma': 0.05}
Score with best parameters:
0.9940633245382585

All scores on the grid:


Unnamed: 0,params,mean_test_score
0,"{'C': 0.05, 'gamma': 0.05}",0.994063
1,"{'C': 0.05, 'gamma': 0.5}",0.993781
2,"{'C': 0.05, 'gamma': 5.0}",0.993781
3,"{'C': 0.5, 'gamma': 0.05}",0.993781
4,"{'C': 0.5, 'gamma': 0.5}",0.993781
5,"{'C': 0.5, 'gamma': 5.0}",0.993781
6,"{'C': 5, 'gamma': 0.05}",0.993781
7,"{'C': 5, 'gamma': 0.5}",0.993781
8,"{'C': 5, 'gamma': 5.0}",0.993781


In [27]:
# parameters for rbf SVM
parameters = {'C': [0.5, 5, 50, 500],'gamma':[0.005, 0.05, 0.5,5]}

#run SVM with rbf kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='rbf', max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR rbf KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR rbf KERNEL
Best parameters set found:
{'C': 500, 'gamma': 0.05}
Score with best parameters:
0.9939690915944214

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'C': 0.5, 'gamma': 0.005}",0.97484
1,"{'C': 0.5, 'gamma': 0.05}",0.988221
2,"{'C': 0.5, 'gamma': 0.5}",0.96438
3,"{'C': 0.5, 'gamma': 5}",0.779589
4,"{'C': 5, 'gamma': 0.005}",0.987184
5,"{'C': 5, 'gamma': 0.05}",0.993875
6,"{'C': 5, 'gamma': 0.5}",0.978326
7,"{'C': 5, 'gamma': 5}",0.857897
8,"{'C': 50, 'gamma': 0.005}",0.992744
9,"{'C': 50, 'gamma': 0.05}",0.993781


In [9]:
# execution time
t = []
for i in range(100):
    start_time = time.time()
    best_SVM = SVC(kernel='poly',degree=4, C=0.05, gamma=0.05).fit(X_train,y_train)
    t.append(time.time() - start_time)
print('Execution time = ', np.mean(t))
print('Highest score = ', best_SVM.score(X_test,y_test))

Execution time =  0.7400203394889832
Highest score =  0.9966088922381311


### SVM ECG noFILTERED PCA

In [10]:
ecg = pd.read_csv("../DATA_CSV/ECG_nofiltered_noscaled_cammina.csv",names=["index","Signal","Label"],skiprows=1).drop(columns="index")

train_ecg_df, test_ecg_df = train_test_split(ecg, test_size=0.2, stratify=ecg['Label'], random_state=123)

ecg_mat_train, label_ecg_train = create_array(train_ecg_df) 
ecg_mat_test, label_ecg_test  = create_array(test_ecg_df) 

num_components = 0.95
pca2 = PCA(n_components = num_components)
pca2.fit(ecg_mat_train)
ecg_mat_train = pca2.transform(ecg_mat_train)
ecg_mat_test = pca2.transform(ecg_mat_test)

X_train = ecg_mat_train
y_train = label_ecg_train
X_test = ecg_mat_test
y_test = label_ecg_test

# from sklearn.preprocessing import MinMaxScaler
# scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
# X_train = scaling.transform(X_train)
# X_test = scaling.transform(X_test)

In [31]:
# parameters for linear SVM
parameters = {'kernel' : ['linear'] , 'C': [0.001, 0.01, 0.1, 1, 10, 100]}
  
svc = SVC()
clf = GridSearchCV(svc, parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR LINEAR KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("All scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]

RESULTS FOR LINEAR KERNEL
Best parameters set found:
{'C': 0.1, 'kernel': 'linear'}
Score with best parameters:
0.9663588390501319
All scores on the grid:


Unnamed: 0,params,mean_test_score
0,"{'C': 0.001, 'kernel': 'linear'}",0.941387
1,"{'C': 0.01, 'kernel': 'linear'}",0.961082
2,"{'C': 0.1, 'kernel': 'linear'}",0.966359
3,"{'C': 1, 'kernel': 'linear'}",0.965982
4,"{'C': 10, 'kernel': 'linear'}",0.965699
5,"{'C': 100, 'kernel': 'linear'}",0.965605


In [32]:
# parameters for poly with degree 2 kernel
parameters = {'C': [0.05, 0.5, 5],'gamma':[0.05,0.5,5.]}

#run SVM with poly of degree 2 kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='poly', degree=2, max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR POLY DEGREE=2 KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR POLY DEGREE=2 KERNEL
Best parameters set found:
{'C': 0.05, 'gamma': 0.5}
Score with best parameters:
0.9856765925367508

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'C': 0.05, 'gamma': 0.05}",0.96994
1,"{'C': 0.05, 'gamma': 0.5}",0.985677
2,"{'C': 0.05, 'gamma': 5.0}",0.855635
3,"{'C': 0.5, 'gamma': 0.05}",0.97974
4,"{'C': 0.5, 'gamma': 0.5}",0.976724
5,"{'C': 0.5, 'gamma': 5.0}",0.815021
6,"{'C': 5, 'gamma': 0.05}",0.985582
7,"{'C': 5, 'gamma': 0.5}",0.787599
8,"{'C': 5, 'gamma': 5.0}",0.810498


In [33]:
# parameters for poly with higher degree kernel
parameters = {'C': [0.05, 0.5, 5],'gamma':[0.05,0.5,5.]}

#run SVM with poly of higher degree kernel
degree = 4

svc = SVC()
clf = GridSearchCV(SVC(kernel='poly', degree=degree, max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR POLY DEGREE=', degree, ' KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR POLY DEGREE= 4  KERNEL
Best parameters set found:
{'C': 0.5, 'gamma': 0.05}
Score with best parameters:
0.9879381831888429

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'C': 0.05, 'gamma': 0.05}",0.983415
1,"{'C': 0.05, 'gamma': 0.5}",0.924991
2,"{'C': 0.05, 'gamma': 5.0}",0.913777
3,"{'C': 0.5, 'gamma': 0.05}",0.987938
4,"{'C': 0.5, 'gamma': 0.5}",0.924802
5,"{'C': 0.5, 'gamma': 5.0}",0.913777
6,"{'C': 5, 'gamma': 0.05}",0.984169
7,"{'C': 5, 'gamma': 0.5}",0.924802
8,"{'C': 5, 'gamma': 5.0}",0.913777


In [34]:
# parameters for rbf SVM
parameters = {'C': [0.5, 5, 50, 500],'gamma':[0.005, 0.05, 0.5,5]}

#run SVM with rbf kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='rbf', max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR rbf KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR rbf KERNEL
Best parameters set found:
{'C': 50, 'gamma': 0.05}
Score with best parameters:
0.9922728986053524

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'C': 0.5, 'gamma': 0.005}",0.968243
1,"{'C': 0.5, 'gamma': 0.05}",0.983509
2,"{'C': 0.5, 'gamma': 0.5}",0.98775
3,"{'C': 0.5, 'gamma': 5}",0.893328
4,"{'C': 5, 'gamma': 0.005}",0.983321
5,"{'C': 5, 'gamma': 0.05}",0.991048
6,"{'C': 5, 'gamma': 0.5}",0.991425
7,"{'C': 5, 'gamma': 5}",0.94167
8,"{'C': 50, 'gamma': 0.005}",0.990671
9,"{'C': 50, 'gamma': 0.05}",0.992273


In [11]:
# execution time
t = []
for i in range(100):
    start_time = time.time()
    best_SVM = SVC(kernel='rbf', C=50, gamma=0.05).fit(X_train,y_train)
    t.append(time.time() - start_time)
print('Execution time = ', np.mean(t))
print('Highest score = ', best_SVM.score(X_test,y_test))

Execution time =  0.2585904669761658
Highest score =  0.9890730972117558


# PPG

### SVM PPG FILTERED

In [12]:
ppg = pd.read_csv("../DATA_CSV/PPG_filtered_noscaled_cammina.csv",names=["index","Signal","Label"],skiprows=1).drop(columns="index")

train_ppg_df, test_ppg_df = train_test_split(ppg, test_size=0.2, stratify=ppg['Label'], random_state=123)

ppg_mat_train, label_ppg_train = create_array(train_ppg_df) 
ppg_mat_test, label_ppg_test   = create_array(test_ppg_df) 

X_train = ppg_mat_train
y_train = label_ppg_train
X_test = ppg_mat_test
y_test = label_ppg_test

from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
X_train = scaling.transform(X_train)
X_test = scaling.transform(X_test)

In [39]:
# parameters for linear SVM
parameters = {'kernel' : ['linear'] , 'C': [0.001, 0.01, 0.1, 1, 10, 100]}
  
svc = SVC()
clf = GridSearchCV(svc, parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR LINEAR KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("All scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]

RESULTS FOR LINEAR KERNEL
Best parameters set found:
{'C': 100, 'kernel': 'linear'}
Score with best parameters:
0.8714241090068363
All scores on the grid:


Unnamed: 0,params,mean_test_score
0,"{'C': 0.001, 'kernel': 'linear'}",0.491314
1,"{'C': 0.01, 'kernel': 'linear'}",0.854883
2,"{'C': 0.1, 'kernel': 'linear'}",0.865286
3,"{'C': 1, 'kernel': 'linear'}",0.867471
4,"{'C': 10, 'kernel': 'linear'}",0.870488
5,"{'C': 100, 'kernel': 'linear'}",0.871424


In [40]:
# parameters for poly with degree 2 kernel
parameters = {'C': [0.05, 0.5, 5],'gamma':[0.05,0.5,5.]}

#run SVM with poly of degree 2 kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='poly', degree=2, max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR POLY DEGREE=2 KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR POLY DEGREE=2 KERNEL
Best parameters set found:
{'C': 5, 'gamma': 0.05}
Score with best parameters:
0.8772495971826675

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'C': 0.05, 'gamma': 0.05}",0.616247
1,"{'C': 0.05, 'gamma': 0.5}",0.877042
2,"{'C': 0.05, 'gamma': 5.0}",0.654008
3,"{'C': 0.5, 'gamma': 0.05}",0.867887
4,"{'C': 0.5, 'gamma': 0.5}",0.876522
5,"{'C': 0.5, 'gamma': 5.0}",0.292611
6,"{'C': 5, 'gamma': 0.05}",0.87725
7,"{'C': 5, 'gamma': 0.5}",0.544157
8,"{'C': 5, 'gamma': 5.0}",0.326941


In [41]:
# parameters for rbf SVM
parameters = {'C': [0.5, 5, 50, 500],'gamma':[0.005, 0.05, 0.5,5]}

#run SVM with rbf kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='rbf', max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR rbf KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR rbf KERNEL
Best parameters set found:
{'C': 5, 'gamma': 0.5}
Score with best parameters:
0.8980548440904776

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'C': 0.5, 'gamma': 0.005}",0.566933
1,"{'C': 0.5, 'gamma': 0.05}",0.870592
2,"{'C': 0.5, 'gamma': 0.5}",0.887652
3,"{'C': 0.5, 'gamma': 5}",0.866222
4,"{'C': 5, 'gamma': 0.005}",0.865807
5,"{'C': 5, 'gamma': 0.05}",0.881515
6,"{'C': 5, 'gamma': 0.5}",0.898055
7,"{'C': 5, 'gamma': 5}",0.877977
8,"{'C': 50, 'gamma': 0.005}",0.871528
9,"{'C': 50, 'gamma': 0.05}",0.888693


In [13]:
# execution time
t = []
for i in range(100):
    start_time = time.time()
    best_SVM = SVC(kernel='rbf', C=5, gamma=0.5).fit(X_train,y_train)
    t.append(time.time() - start_time)
print('Execution time = ', np.mean(t))
print('Highest score = ', best_SVM.score(X_test,y_test))

Execution time =  4.002095561027527
Highest score =  0.901414309484193


### SVM PPG FILTERED PCA

In [14]:
ppg = pd.read_csv("../DATA_CSV/PPG_filtered_noscaled_cammina.csv",names=["index","Signal","Label"],skiprows=1).drop(columns="index")

train_ppg_df, test_ppg_df = train_test_split(ppg, test_size=0.2, stratify=ppg['Label'], random_state=123)

ppg_mat_train, label_ppg_train = create_array(train_ppg_df) 
ppg_mat_test, label_ppg_test   = create_array(test_ppg_df) 

num_components = 0.95
pca2 = PCA(n_components = num_components)
pca2.fit(ecg_mat_train)
ecg_mat_train = pca2.transform(ecg_mat_train)
ecg_mat_test = pca2.transform(ecg_mat_test)

X_train = ppg_mat_train
y_train = label_ppg_train
X_test = ppg_mat_test
y_test = label_ppg_test

from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
X_train = scaling.transform(X_train)
X_test = scaling.transform(X_test)

In [43]:
# parameters for linear SVM
parameters = {'kernel' : ['linear'] , 'C': [0.001, 0.01, 0.1, 1, 10, 100]}
  
svc = SVC()
clf = GridSearchCV(svc, parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR LINEAR KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("All scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]

RESULTS FOR LINEAR KERNEL
Best parameters set found:
{'C': 100, 'kernel': 'linear'}
Score with best parameters:
0.8714241090068363
All scores on the grid:


Unnamed: 0,params,mean_test_score
0,"{'C': 0.001, 'kernel': 'linear'}",0.491314
1,"{'C': 0.01, 'kernel': 'linear'}",0.854883
2,"{'C': 0.1, 'kernel': 'linear'}",0.865286
3,"{'C': 1, 'kernel': 'linear'}",0.867471
4,"{'C': 10, 'kernel': 'linear'}",0.870488
5,"{'C': 100, 'kernel': 'linear'}",0.871424


In [44]:
# parameters for poly with degree 2 kernel
parameters = {'C': [0.05, 0.5, 5],'gamma':[0.05,0.5,5.]}

#run SVM with poly of degree 2 kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='poly', degree=2, max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR POLY DEGREE=2 KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR POLY DEGREE=2 KERNEL
Best parameters set found:
{'C': 5, 'gamma': 0.05}
Score with best parameters:
0.8772495971826675

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'C': 0.05, 'gamma': 0.05}",0.616247
1,"{'C': 0.05, 'gamma': 0.5}",0.877042
2,"{'C': 0.05, 'gamma': 5.0}",0.654008
3,"{'C': 0.5, 'gamma': 0.05}",0.867887
4,"{'C': 0.5, 'gamma': 0.5}",0.876522
5,"{'C': 0.5, 'gamma': 5.0}",0.292611
6,"{'C': 5, 'gamma': 0.05}",0.87725
7,"{'C': 5, 'gamma': 0.5}",0.544157
8,"{'C': 5, 'gamma': 5.0}",0.326941


In [45]:
# parameters for rbf SVM
parameters = {'C': [0.5, 5, 50, 500],'gamma':[0.005, 0.05, 0.5,5]}

#run SVM with rbf kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='rbf', max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR rbf KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR rbf KERNEL
Best parameters set found:
{'C': 5, 'gamma': 0.5}
Score with best parameters:
0.8980548440904776

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'C': 0.5, 'gamma': 0.005}",0.566933
1,"{'C': 0.5, 'gamma': 0.05}",0.870592
2,"{'C': 0.5, 'gamma': 0.5}",0.887652
3,"{'C': 0.5, 'gamma': 5}",0.866222
4,"{'C': 5, 'gamma': 0.005}",0.865807
5,"{'C': 5, 'gamma': 0.05}",0.881515
6,"{'C': 5, 'gamma': 0.5}",0.898055
7,"{'C': 5, 'gamma': 5}",0.877977
8,"{'C': 50, 'gamma': 0.005}",0.871528
9,"{'C': 50, 'gamma': 0.05}",0.888693


In [15]:
# execution time
t = []
for i in range(100):
    start_time = time.time()
    best_SVM = SVC(kernel='rbf', C=5, gamma=0.5).fit(X_train,y_train)
    t.append(time.time() - start_time)
print('Execution time = ', np.mean(t))
print('Highest score = ', best_SVM.score(X_test,y_test))

Execution time =  3.991243796348572
Highest score =  0.901414309484193


### SVM PPG noFILTERED

In [16]:
ppg = pd.read_csv("../DATA_CSV/PPG_nofiltered_noscaled_cammina.csv",names=["index","Signal","Label"],skiprows=1).drop(columns="index")

train_ppg_df, test_ppg_df = train_test_split(ppg, test_size=0.2, stratify=ppg['Label'], random_state=123)

ppg_mat_train, label_ppg_train = create_array(train_ppg_df) 
ppg_mat_test, label_ppg_test   = create_array(test_ppg_df) 

X_train = ppg_mat_train
y_train = label_ppg_train
X_test = ppg_mat_test
y_test = label_ppg_test

from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
X_train = scaling.transform(X_train)
X_test = scaling.transform(X_test)

In [47]:
# parameters for linear SVM
parameters = {'kernel' : ['linear'] , 'C': [0.001, 0.01, 0.1, 1, 10, 100]}
  
svc = SVC()
clf = GridSearchCV(svc, parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR LINEAR KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("All scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]

RESULTS FOR LINEAR KERNEL
Best parameters set found:
{'C': 10, 'kernel': 'linear'}
Score with best parameters:
0.8477720356636489
All scores on the grid:


Unnamed: 0,params,mean_test_score
0,"{'C': 0.001, 'kernel': 'linear'}",0.784147
1,"{'C': 0.01, 'kernel': 'linear'}",0.833471
2,"{'C': 0.1, 'kernel': 'linear'}",0.843316
3,"{'C': 1, 'kernel': 'linear'}",0.845078
4,"{'C': 10, 'kernel': 'linear'}",0.847772
5,"{'C': 100, 'kernel': 'linear'}",0.84456


In [48]:
# parameters for poly with degree 2 kernel
parameters = {'C': [0.05, 0.5, 5],'gamma':[0.05,0.5,5.]}

#run SVM with poly of degree 2 kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='poly', degree=2,max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR POLY DEGREE=2 KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR POLY DEGREE=2 KERNEL
Best parameters set found:
{'C': 0.5, 'gamma': 0.5}
Score with best parameters:
0.8676686243461516

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'C': 0.05, 'gamma': 0.05}",0.71005
1,"{'C': 0.05, 'gamma': 0.5}",0.866425
2,"{'C': 0.05, 'gamma': 5.0}",0.609248
3,"{'C': 0.5, 'gamma': 0.05}",0.852435
4,"{'C': 0.5, 'gamma': 0.5}",0.867669
5,"{'C': 0.5, 'gamma': 5.0}",0.404878
6,"{'C': 5, 'gamma': 0.05}",0.866425
7,"{'C': 5, 'gamma': 0.5}",0.566514
8,"{'C': 5, 'gamma': 5.0}",0.467676


In [49]:
# parameters for rbf SVM
parameters = {'C': [0.5, 5, 50, 500],'gamma':[0.005, 0.05, 0.5,5]}

#run SVM with rbf kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='rbf', max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR rbf KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR rbf KERNEL
Best parameters set found:
{'C': 5, 'gamma': 0.5}
Score with best parameters:
0.8950258807495881

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'C': 0.5, 'gamma': 0.005}",0.802694
1,"{'C': 0.5, 'gamma': 0.05}",0.859068
2,"{'C': 0.5, 'gamma': 0.5}",0.885699
3,"{'C': 0.5, 'gamma': 5}",0.866322
4,"{'C': 5, 'gamma': 0.005}",0.843626
5,"{'C': 5, 'gamma': 0.05}",0.874715
6,"{'C': 5, 'gamma': 0.5}",0.895026
7,"{'C': 5, 'gamma': 5}",0.87399
8,"{'C': 50, 'gamma': 0.005}",0.86228
9,"{'C': 50, 'gamma': 0.05}",0.886839


In [17]:
# execution time
t = []
for i in range(100):
    start_time = time.time()
    best_SVM = SVC(kernel='rbf', C=5, gamma=0.5).fit(X_train,y_train)
    t.append(time.time() - start_time)
print('Execution time = ', np.mean(t))
print('Highest score = ', best_SVM.score(X_test,y_test))

Execution time =  4.309520490169525
Highest score =  0.9088271860754248


### SVM PPG FILTERED PCA

In [18]:
ppg = pd.read_csv("../DATA_CSV/PPG_nofiltered_noscaled_cammina.csv",names=["index","Signal","Label"],skiprows=1).drop(columns="index")

train_ppg_df, test_ppg_df = train_test_split(ppg, test_size=0.2, stratify=ppg['Label'], random_state=123)

ppg_mat_train, label_ppg_train = create_array(train_ppg_df) 
ppg_mat_test, label_ppg_test   = create_array(test_ppg_df) 

num_components = 0.95
pca2 = PCA(n_components = num_components)
pca2.fit(ecg_mat_train)
ecg_mat_train = pca2.transform(ecg_mat_train)
ecg_mat_test = pca2.transform(ecg_mat_test)

X_train = ppg_mat_train
y_train = label_ppg_train
X_test = ppg_mat_test
y_test = label_ppg_test

from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
X_train = scaling.transform(X_train)
X_test = scaling.transform(X_test)

In [51]:
# parameters for linear SVM
parameters = {'kernel' : ['linear'] , 'C': [0.001, 0.01, 0.1, 1, 10, 100]}
  
svc = SVC()
clf = GridSearchCV(svc, parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR LINEAR KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("All scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]

RESULTS FOR LINEAR KERNEL
Best parameters set found:
{'C': 10, 'kernel': 'linear'}
Score with best parameters:
0.8477720356636489
All scores on the grid:


Unnamed: 0,params,mean_test_score
0,"{'C': 0.001, 'kernel': 'linear'}",0.784147
1,"{'C': 0.01, 'kernel': 'linear'}",0.833471
2,"{'C': 0.1, 'kernel': 'linear'}",0.843316
3,"{'C': 1, 'kernel': 'linear'}",0.845078
4,"{'C': 10, 'kernel': 'linear'}",0.847772
5,"{'C': 100, 'kernel': 'linear'}",0.84456


In [52]:
# parameters for poly with degree 2 kernel
parameters = {'C': [0.05, 0.5, 5],'gamma':[0.05,0.5,5.]}

#run SVM with poly of degree 2 kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='poly', degree=2, max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR POLY DEGREE=2 KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR POLY DEGREE=2 KERNEL
Best parameters set found:
{'C': 0.5, 'gamma': 0.5}
Score with best parameters:
0.8676686243461516

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'C': 0.05, 'gamma': 0.05}",0.71005
1,"{'C': 0.05, 'gamma': 0.5}",0.866425
2,"{'C': 0.05, 'gamma': 5.0}",0.609248
3,"{'C': 0.5, 'gamma': 0.05}",0.852435
4,"{'C': 0.5, 'gamma': 0.5}",0.867669
5,"{'C': 0.5, 'gamma': 5.0}",0.404878
6,"{'C': 5, 'gamma': 0.05}",0.866425
7,"{'C': 5, 'gamma': 0.5}",0.566514
8,"{'C': 5, 'gamma': 5.0}",0.467676


In [53]:
# parameters for rbf SVM
parameters = {'C': [0.5, 5, 50, 500],'gamma':[0.005, 0.05, 0.5,5]}

#run SVM with rbf kernel

svc = SVC()
clf = GridSearchCV(SVC(kernel='rbf', max_iter = 1000), parameters, cv=4, return_train_score=True)
clf.fit(X_train, y_train)

print ('RESULTS FOR rbf KERNEL')

print("Best parameters set found:")
print(clf.best_params_)

print("Score with best parameters:")
print(clf.best_score_)

print("\nAll scores on the grid:")
df=pd.DataFrame(clf.cv_results_)
df[['params', 'mean_test_score']]



RESULTS FOR rbf KERNEL
Best parameters set found:
{'C': 5, 'gamma': 0.5}
Score with best parameters:
0.8950258807495881

All scores on the grid:




Unnamed: 0,params,mean_test_score
0,"{'C': 0.5, 'gamma': 0.005}",0.802694
1,"{'C': 0.5, 'gamma': 0.05}",0.859068
2,"{'C': 0.5, 'gamma': 0.5}",0.885699
3,"{'C': 0.5, 'gamma': 5}",0.866322
4,"{'C': 5, 'gamma': 0.005}",0.843626
5,"{'C': 5, 'gamma': 0.05}",0.874715
6,"{'C': 5, 'gamma': 0.5}",0.895026
7,"{'C': 5, 'gamma': 5}",0.87399
8,"{'C': 50, 'gamma': 0.005}",0.86228
9,"{'C': 50, 'gamma': 0.05}",0.886839


In [19]:
# execution time
t = []
for i in range(100):
    start_time = time.time()
    best_SVM = SVC(kernel='rbf', C=5, gamma=0.5).fit(X_train,y_train)
    t.append(time.time() - start_time)
print('Execution time = ', np.mean(t))
print('Highest score = ', best_SVM.score(X_test,y_test))

Execution time =  4.2895867943763735
Highest score =  0.9088271860754248
