In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, log_loss
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline 
# import warnings
# warnings.filterwarnings("ignore")

In [3]:
df= pd.read_csv('Vehicle.csv')
lbl = LabelEncoder()
df['Class'] = lbl.fit_transform(df['Class'])
X = df.drop('Class', axis=1)
y = df['Class']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.3,
                                                    random_state=23,
                                                    stratify=y)
kfold = StratifiedKFold(n_splits=5, shuffle=True,
                        random_state=23)

linear without scaling

In [5]:
svm = SVC(kernel='linear', probability=True, random_state=23)

decision_function_shape = ['ovo', 'ovr']
params = {'C':np.linspace(0.001,5,10),
          'decision_function_shape':decision_function_shape}
gcv = GridSearchCV(svm, param_grid=params, 
                   cv=kfold, n_jobs=4, scoring='neg_log_loss')
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)

{'C': 0.5564444444444444, 'decision_function_shape': 'ovo'}
-0.4411668203366073


rbf without scaling

In [6]:
svm = SVC(kernel='rbf', probability=True, random_state=23)

params = {'C':np.linspace(0.001,5,10),
          'decision_function_shape':['ovo', 'ovr']}
gcv = GridSearchCV(svm, param_grid=params, 
                   cv=kfold, n_jobs=4, scoring='neg_log_loss')
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)

{'C': 5.0, 'decision_function_shape': 'ovo'}
-0.8213854540403659


linear with scaling

In [7]:
scaler = StandardScaler()

svm = SVC(kernel='linear',
          probability=True, random_state=23)
pipe = Pipeline([('SCL', scaler),('SVM', svm)])
params = {'SCL':[MinMaxScaler(),StandardScaler()],
          'SVM__C': np.linspace(0.001, 5, 10),
          'SVM__decision_function_shape':['ovo','ovr']}
gcv = GridSearchCV(pipe, param_grid=params,n_jobs=4,
                   cv=kfold, scoring='neg_log_loss')
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

{'SCL': StandardScaler(), 'SVM__C': 5.0, 'SVM__decision_function_shape': 'ovo'}
-0.4197001959820602


rbf with Scaling

In [8]:
svm = SVC(kernel='rbf', probability=True, random_state=23)
kernel = ['rbf']
decision_function_shape = ['ovo', 'ovr']
params = {'C':np.linspace(0.001,5,10),'decision_function_shape':decision_function_shape}
gcv = GridSearchCV(svm, param_grid=params, 
                   cv=kfold, n_jobs=4, scoring='neg_log_loss')
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)

{'C': 5.0, 'decision_function_shape': 'ovo'}
-0.8213854540403659
