SVM
- Binary Classification
- Multi-class Classification
  - One-vs-One
  - One-vs-All 

In [9]:
import sklearn
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, r2_score, roc_auc_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss

from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.compose import make_column_selector, make_column_transformer

In [3]:
df = pd.read_csv(r'C:\Users\DAI.STUDENTSDC\Desktop\Machine Learning\Data Sets\Cases\Satellite Imaging\Satellite.csv', sep=';')

X = df.drop(['classes'], axis=1)
y = df['classes']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24)

Linear

In [None]:
svm = SVC(kernel='linear', probability=True,random_state=24, decision_function_shape='ovo')
scale_std = MinMaxScaler()
pipe = Pipeline([
    # ('CT', ct),
    ('SCL', scale_std),
    ('SVC', svm)
])
 

In [7]:
pipe.fit(X_train, y_train)

In [8]:
y_pred_proba = pipe.predict_proba(X_test)
print(log_loss(y_test, y_pred_proba))

0.3446049113759542


Linear GridsearchCV

In [None]:
svm = SVC(kernel='linear', probability=True,random_state=24)
scale_mm = MinMaxScaler()
pipe = Pipeline([
    ('SCL', scale_mm),
    ('SVM', svm)
])

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)
params = {
    'SVM__C': np.linspace(0.001, 5, 3),
    'SVM__decision_function_shape': ['ovo', 'ovr']
}

gcv = GridSearchCV(estimator=pipe, param_grid=params, scoring='neg_log_loss', cv=kfold, verbose=3)
 

In [35]:
gcv.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.560 total time=   5.2s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.565 total time=   5.1s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.536 total time=   5.5s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.506 total time=   5.2s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.540 total time=   5.1s
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.560 total time=   5.5s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.565 total time=   5.6s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.536 total time=   5.2s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.506 total time=   5.4s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.540 total time=   5.5s


In [36]:
print(f'Best params: {gcv.best_params_}')
print(f'Best score: {gcv.best_score_}')

Best params: {'SVM__C': 5.0, 'SVM__decision_function_shape': 'ovo'}
Best score: -0.3385006982938944


In [37]:
best_model = gcv.best_estimator_
y_pred = best_model.predict(X_test)

In [38]:
print(accuracy_score(y_test, y_pred))

0.8653547384774728


Polynomial GridsearchCV

In [None]:
svm = SVC(kernel='poly', probability=True,random_state=24)
scale_mm = MinMaxScaler()
pipe = Pipeline([
    ('SCL', scale_mm),
    ('SVM', svm)
])

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)
params = {
    'SVM__C': np.linspace(0.001, 5, 3),
    'SVM__degree': [2,3],
    'SVM__decision_function_shape': ['ovo', 'ovr']
}

gcvp = GridSearchCV(estimator=pipe, param_grid=params, scoring='neg_log_loss', cv=kfold, verbose=3)


In [40]:
gcvp.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__degree=2;, score=-0.460 total time=   3.4s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__degree=2;, score=-0.455 total time=   3.5s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__degree=2;, score=-0.435 total time=   3.5s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__degree=2;, score=-0.418 total time=   3.6s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__degree=2;, score=-0.455 total time=   3.4s
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__degree=3;, score=-0.415 total time=   1.9s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__degree=3;, score=-0.411 total time=   2.0s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__degree=3;, score=-0.399 total time=   1.9s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__d

In [41]:
print(f'Best params: {gcvp.best_params_}')
print(f'Best score: {gcvp.best_score_}')


Best params: {'SVM__C': 5.0, 'SVM__decision_function_shape': 'ovo', 'SVM__degree': 2}
Best score: -0.2994729936053615


In [42]:
best_model = gcvp.best_estimator_
y_pred = best_model.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.8912480580010357


Radial GridSearchCV

In [43]:
svm = SVC(kernel='rbf', probability=True,random_state=24)
scale_mm = MinMaxScaler()
pipe = Pipeline([
    ('SCL', scale_mm),
    ('SVM', svm)
])

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)
params = {
    'SVM__C': np.linspace(0.001, 5, 3),
    'SVM__gamma': np.linspace(0.001, 5, 3),
    'SVM__decision_function_shape': ['ovo', 'ovr']
}

gcvr = GridSearchCV(estimator=pipe, param_grid=params, scoring='neg_log_loss', cv=kfold, verbose=3)


In [44]:
gcvr.fit(X_train, y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-0.849 total time=  11.0s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-0.867 total time=  10.1s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-0.834 total time=  10.3s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-0.829 total time=  10.0s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-0.834 total time=  10.4s
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=2.5004999999999997;, score=-0.711 total time=  10.3s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=2.5004999999999997;, score=-0.708 total time=  10.3s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=2.5004999999999997;, score=-0.672 total time=  10.7s
[CV 

In [45]:
print(f'Best params: {gcvr.best_params_}')
print(f'Best score: {gcvr.best_score_}')


Best params: {'SVM__C': 5.0, 'SVM__decision_function_shape': 'ovo', 'SVM__gamma': 5.0}
Best score: -0.2280545577407247


In [46]:
best_model = gcvr.best_estimator_
y_pred = best_model.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.9228379078197825
