In [11]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score,accuracy_score
from sklearn.model_selection import train_test_split,StratifiedKFold,GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer 
from sklearn.compose import make_column_selector
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve, roc_auc_score,log_loss

In [12]:
satellite = pd.read_csv('Satellite.csv', sep= ";")
satellite.head(2)

Unnamed: 0,x.1,x.2,x.3,x.4,x.5,x.6,x.7,x.8,x.9,x.10,...,x.28,x.29,x.30,x.31,x.32,x.33,x.34,x.35,x.36,classes
0,92,115,120,94,84,102,106,79,84,102,...,104,88,121,128,100,84,107,113,87,grey soil
1,84,102,106,79,84,102,102,83,80,102,...,100,84,107,113,87,84,99,104,79,grey soil


In [13]:
y= satellite['classes']
X= satellite.drop('classes', axis=1)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=24,test_size=0.3,stratify=y)

In [14]:
X_train

Unnamed: 0,x.1,x.2,x.3,x.4,x.5,x.6,x.7,x.8,x.9,x.10,...,x.27,x.28,x.29,x.30,x.31,x.32,x.33,x.34,x.35,x.36
4322,50,69,90,76,50,69,90,76,50,73,...,100,83,51,79,96,79,51,75,96,79
2912,56,54,79,64,59,57,82,68,59,60,...,111,91,84,106,111,91,92,115,120,102
5155,93,111,113,92,93,111,109,87,97,111,...,119,94,92,117,114,90,92,108,105,86
2194,76,91,91,70,76,83,87,70,68,79,...,66,52,59,63,66,55,63,63,69,55
781,88,102,106,87,88,102,111,83,88,111,...,113,87,88,103,104,83,88,107,109,87
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5175,67,68,81,67,71,75,85,71,63,54,...,68,60,71,73,75,60,71,77,79,64
6283,63,91,100,83,67,91,109,87,75,91,...,93,75,63,81,93,83,63,91,101,86
276,87,95,105,83,83,99,110,83,87,99,...,111,91,88,102,115,87,84,111,106,87
114,56,54,71,63,56,51,67,63,53,51,...,76,70,56,49,73,70,49,40,69,66


### Linear Kernel

In [17]:
svm = SVC(kernel='linear',probability=True,random_state=24, decision_function_shape='ovr')
scaler_mm = MinMaxScaler()
pipe = Pipeline([('SCL',scaler_mm),('SVM',svm)])
pipe.fit(X_train,y_train)
y_pred = pipe.predict(X_test)
print(accuracy_score(y_test,y_pred))


0.865872604867944


In [18]:
y_pred_prob = pipe.predict_proba(X_test)
print(log_loss(y_test, y_pred_prob))

0.3571064276620879


### GridSearchCV

In [20]:
svm = SVC(kernel='linear',probability=True,random_state=24)
pipe= Pipeline([('SCL', scaler_mm),('SVM', svm)])
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)

params = {'SVM__C' : np.linspace(0.001,5,3),
          'SVM__decision_function_shape' : ['ovo', 'ovr']}
gcv = GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='neg_log_loss', verbose=3)
gcv.fit(X,y)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.487 total time=   8.8s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.492 total time=   8.6s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.526 total time=   8.5s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.497 total time=   8.3s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.472 total time=   8.4s
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.487 total time=   8.5s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.492 total time=   8.4s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.526 total time=   8.6s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.497 total time=   8.8s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.472 total time=   8.8s


In [22]:
print(gcv.best_params_)
print(gcv.best_score_)

{'SVM__C': 5.0, 'SVM__decision_function_shape': 'ovo'}
-0.33206246904927117


In [23]:
### Radial Kernel

In [28]:
svm = SVC(kernel='rbf',probability=True,random_state=24)
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)
pipe= Pipeline([('SCL', scaler_mm),('SVM', svm)])
params = {'SVM__C' : np.linspace(0.001,5,3),
          'SVM__gamma' : np.linspace(0.001,5,3), 'SVM__decision_function_shape' : ['ovo','ovr']}
gcv = GridSearchCV(pipe,param_grid=params,cv=kfold,scoring='neg_log_loss', verbose=3)
gcv.fit(X,y)

Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-0.850 total time=  17.5s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-0.821 total time=  17.5s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-0.791 total time=  16.9s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-0.762 total time=  18.3s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-0.877 total time=  17.3s
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=2.5004999999999997;, score=-0.636 total time=  18.3s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=2.5004999999999997;, score=-0.550 total time=  19.7s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=2.5004999999999997;, score=-0.546 total time=  18.5s
[CV 

In [29]:
print(gcv.best_params_)
print(gcv.best_score_)

{'SVM__C': 5.0, 'SVM__decision_function_shape': 'ovo', 'SVM__gamma': 5.0}
-0.2140689944828622
