# Train SVM

In [1]:
import cv2
import numpy as np
import pickle as pkl
import matplotlib.pyplot as plt

from sklearn import svm, metrics
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report


### 1) Load Data

In [2]:
# load SIFT data
with open('../CW_Dataset/SIFT/hist_array_train.pkl', 'rb') as f:
    X_train = pkl.load(f)

with open('../CW_Dataset/SIFT/hist_array_test.pkl', 'rb') as f:
    X_test = pkl.load(f)

with open('../CW_Dataset/SIFT/hist_label_train.pkl', 'rb') as f:
    y_train = pkl.load(f)

with open('../CW_Dataset/SIFT/hist_label_test.pkl', 'rb') as f:
    y_test = pkl.load(f)


### 2) Gridsearch for SVM

In [4]:
# create SVM classifier with stratified gridsearch
param_grid = {
    'C': [1, 10, 1000, 10000, 100000],
    'gamma': [1, 0.1, 0.01, 0.001]
    }

# fit stratified gridsearch
grid = GridSearchCV(svm.SVC(), param_grid, cv=5, scoring='f1_micro', verbose=1, n_jobs=-1)
grid.fit(X_train, y_train)

# print best parameters
print('Best parameters: {}'.format(grid.best_params_))

Fitting 5 folds for each of 15 candidates, totalling 75 fits
Best parameters: {'C': 1000, 'gamma': 0.1}


In [11]:
# # retrain model with best parameters
clf = svm.SVC(C=grid.best_params_['C'], gamma=grid.best_params_['gamma'])
clf.fit(X_train, y_train)

SVC(C=1000, gamma=0.1)

In [12]:
# save best model
with open('../Models/sift-svm.pkl', 'wb') as f:
    pkl.dump(grid.best_estimator_, f)

### 3) Test SVM

In [13]:
# load best model
with open('../Models/sift-svm.pkl', 'rb') as f:
    clf = pkl.load(f)

In [14]:
# evaluate model
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.44      0.02      0.04       329
           2       0.00      0.00      0.00        74
           3       0.00      0.00      0.00       160
           4       0.41      0.93      0.57      1185
           5       0.00      0.00      0.00       478
           6       0.00      0.00      0.00       162
           7       0.37      0.19      0.25       680

    accuracy                           0.40      3068
   macro avg       0.17      0.16      0.12      3068
weighted avg       0.29      0.40      0.28      3068



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
# create confusion matrix
confusion_matrix = metrics.confusion_matrix(y_test, y_pred)
print('Confusion matrix: \n{}'.format(confusion_matrix))


Confusion matrix: 
[[   7    0    0  291    0    0   31]
 [   0    0    0   68    0    0    6]
 [   1    0    0  134    0    0   25]
 [   4    0    0 1102    0    0   79]
 [   0    0    0  411    0    0   67]
 [   0    0    0  144    0    0   18]
 [   4    0    0  545    0    0  131]]
