In [26]:
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

from sklearn.svm import SVC

from sklearn.pipeline import Pipeline

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

import os

In [52]:
os.chdir(r"C:\Hogwarts\machine_learning\Cases\Image Segmentation")
img = pd.read_csv("Image_Segmentation.csv")
img.head()

Unnamed: 0,Class,region.centroid.col,region.centroid.row,region.pixel.count,short.line.density.5,short.line.density.2,vedge.mean,vegde.sd,hedge.mean,hedge.sd,intensity.mean,rawred.mean,rawblue.mean,rawgreen.mean,exred.mean,exblue.mean,exgreen.mean,value.mean,saturation.mean,hue-mean
0,BRICKFACE,188,133,9,0.0,0.0,0.333333,0.266667,0.5,0.077778,6.666666,8.333334,7.777778,3.888889,5.0,3.333333,-8.333333,8.444445,0.53858,-0.924817
1,BRICKFACE,105,139,9,0.0,0.0,0.277778,0.107407,0.833333,0.522222,6.111111,7.555555,7.222222,3.555556,4.333334,3.333333,-7.666666,7.555555,0.532628,-0.965946
2,BRICKFACE,34,137,9,0.0,0.0,0.5,0.166667,1.111111,0.474074,5.851852,7.777778,6.444445,3.333333,5.777778,1.777778,-7.555555,7.777778,0.573633,-0.744272
3,BRICKFACE,39,111,9,0.0,0.0,0.722222,0.374074,0.888889,0.429629,6.037037,7.0,7.666666,3.444444,2.888889,4.888889,-7.777778,7.888889,0.562919,-1.175773
4,BRICKFACE,16,128,9,0.0,0.0,0.5,0.077778,0.666667,0.311111,5.555555,6.888889,6.666666,3.111111,4.0,3.333333,-7.333334,7.111111,0.561508,-0.985811


In [53]:
X = img.drop("Class", axis = 1)

y = img["Class"]

In [54]:
le = LabelEncoder()
le_y = le.fit_transform(y)

print(le.classes_)

['BRICKFACE' 'CEMENT' 'FOLIAGE' 'GRASS' 'PATH' 'SKY' 'WINDOW']


In [55]:
scaler = StandardScaler()

In [56]:
svm = SVC(kernel = "linear",
         probability = True,
         random_state = 2022)

In [57]:
pipe = Pipeline([("STD", scaler), ("SVM", svm)])
print(pipe.get_params())

{'memory': None, 'steps': [('STD', StandardScaler()), ('SVM', SVC(kernel='linear', probability=True, random_state=2022))], 'verbose': False, 'STD': StandardScaler(), 'SVM': SVC(kernel='linear', probability=True, random_state=2022), 'STD__copy': True, 'STD__with_mean': True, 'STD__with_std': True, 'SVM__C': 1.0, 'SVM__break_ties': False, 'SVM__cache_size': 200, 'SVM__class_weight': None, 'SVM__coef0': 0.0, 'SVM__decision_function_shape': 'ovr', 'SVM__degree': 3, 'SVM__gamma': 'scale', 'SVM__kernel': 'linear', 'SVM__max_iter': -1, 'SVM__probability': True, 'SVM__random_state': 2022, 'SVM__shrinking': True, 'SVM__tol': 0.001, 'SVM__verbose': False}


In [64]:
params = {"SVM__C": np.linspace(0.001, 10, 20),
         "SVM__decision_function_shape": ["ovo", "ovr"] }

In [67]:
kfold = StratifiedKFold(n_splits = 5,
                        shuffle = True,
                        random_state = 2022)

In [69]:
gcv = GridSearchCV(pipe,
                  param_grid = params,
                  cv = kfold,
                  verbose = 3,
                  scoring = "neg_log_loss")
gcv.fit(X, y)

Fitting 5 folds for each of 40 candidates, totalling 200 fits
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.755 total time=   0.0s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.752 total time=   0.0s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.755 total time=   0.0s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.756 total time=   0.0s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.774 total time=   0.0s
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-1.755 total time=   0.0s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-1.752 total time=   0.0s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-1.755 total time=   0.0s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-1.756 total time=   0.0s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-1.774 total time=   0.0

[CV 5/5] END SVM__C=4.211105263157895, SVM__decision_function_shape=ovr;, score=-0.417 total time=   0.0s
[CV 1/5] END SVM__C=4.737368421052632, SVM__decision_function_shape=ovo;, score=-0.409 total time=   0.0s
[CV 2/5] END SVM__C=4.737368421052632, SVM__decision_function_shape=ovo;, score=-0.484 total time=   0.0s
[CV 3/5] END SVM__C=4.737368421052632, SVM__decision_function_shape=ovo;, score=-0.391 total time=   0.0s
[CV 4/5] END SVM__C=4.737368421052632, SVM__decision_function_shape=ovo;, score=-0.451 total time=   0.0s
[CV 5/5] END SVM__C=4.737368421052632, SVM__decision_function_shape=ovo;, score=-0.419 total time=   0.0s
[CV 1/5] END SVM__C=4.737368421052632, SVM__decision_function_shape=ovr;, score=-0.409 total time=   0.0s
[CV 2/5] END SVM__C=4.737368421052632, SVM__decision_function_shape=ovr;, score=-0.484 total time=   0.0s
[CV 3/5] END SVM__C=4.737368421052632, SVM__decision_function_shape=ovr;, score=-0.391 total time=   0.0s
[CV 4/5] END SVM__C=4.737368421052632, SVM__de

[CV 1/5] END SVM__C=8.947473684210527, SVM__decision_function_shape=ovr;, score=-0.421 total time=   0.0s
[CV 2/5] END SVM__C=8.947473684210527, SVM__decision_function_shape=ovr;, score=-0.520 total time=   0.0s
[CV 3/5] END SVM__C=8.947473684210527, SVM__decision_function_shape=ovr;, score=-0.407 total time=   0.0s
[CV 4/5] END SVM__C=8.947473684210527, SVM__decision_function_shape=ovr;, score=-0.467 total time=   0.0s
[CV 5/5] END SVM__C=8.947473684210527, SVM__decision_function_shape=ovr;, score=-0.401 total time=   0.0s
[CV 1/5] END SVM__C=9.473736842105263, SVM__decision_function_shape=ovo;, score=-0.423 total time=   0.0s
[CV 2/5] END SVM__C=9.473736842105263, SVM__decision_function_shape=ovo;, score=-0.525 total time=   0.0s
[CV 3/5] END SVM__C=9.473736842105263, SVM__decision_function_shape=ovo;, score=-0.409 total time=   0.0s
[CV 4/5] END SVM__C=9.473736842105263, SVM__decision_function_shape=ovo;, score=-0.467 total time=   0.0s
[CV 5/5] END SVM__C=9.473736842105263, SVM__de

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=2022, shuffle=True),
             estimator=Pipeline(steps=[('STD', StandardScaler()),
                                       ('SVM',
                                        SVC(kernel='linear', probability=True,
                                            random_state=2022))]),
             param_grid={'SVM__C': array([1.00000000e-03, 5.27263158e-01, 1.05352632e+00, 1.57978947e+00,
       2.10605263e+00, 2.63231579e+00, 3.15857895e+00, 3.68484211e+00,
       4.21110526e+00, 4.73736842e+00, 5.26363158e+00, 5.78989474e+00,
       6.31615789e+00, 6.84242105e+00, 7.36868421e+00, 7.89494737e+00,
       8.42121053e+00, 8.94747368e+00, 9.47373684e+00, 1.00000000e+01]),
                         'SVM__decision_function_shape': ['ovo', 'ovr']},
             scoring='neg_log_loss', verbose=3)

In [70]:
print(gcv.best_params_)

{'SVM__C': 1.0535263157894736, 'SVM__decision_function_shape': 'ovo'}


In [71]:
print(gcv.best_score_)

-0.40457971833739437
