# <center> 10. SVM, KNN </center>

- SVM and KNN for classification if bridge is up or down by photo of bridge.

In [2]:
import numpy as np
import pandas as pd
from os import walk
from PIL import Image, ImageFilter, ImageOps    
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

from sklearn.neighbors import KNeighborsClassifier

from tqdm import tqdm

In [3]:
##### Jupyter notebook theme setup:
# !pip install jupyterthemes
!jt -t gruvboxd -fs 95 -tfs 11 -nfs 115 -cellw 80% -T
##### Reset theme:
# !jt -r
##### Plot style:
from jupyterthemes import jtplot
jtplot.style()

# Reload page after cell evaluation

## Data Import

NB! Dataset was to big to be send to github

In [10]:
img_dir = 'cropped/'
_, _, up = next(walk(img_dir + 'up'))
_, _, down = next(walk(img_dir + 'down'))
_, _, mov = next(walk(img_dir + 'mov'))
up = [img_dir + 'up/' + filename for filename in up]
down = [img_dir + 'down/' + filename for filename in down]
mov = [img_dir + 'mov/' + filename for filename in mov]

### Sampling

Lets get a smaller sample

In [7]:
filenames = up + down + mov
y_full = ['up']*len(up) + ['down']*len(down) + ['mov']*len(mov) 
n_max = len(filenames)
part_coeff = 0.2
int(n_max * part_coeff)

11032

In [8]:
sample_idx = sorted(np.random.choice(np.arange(len(filenames)), int(n_max * part_coeff), replace=False))

In [9]:
filenames_sample = np.array(filenames)[sample_idx]
y = np.array(y_full)[sample_idx]

In [42]:
# конвертация изображений в numpy-массив
X = []
for img_path in tqdm(filenames_sample):
    img = Image.open(img_path)
    X.append(np.hstack(np.array(img)))
X = np.array(X)

100%|███████████████████████████████████████████████████████████████████████████| 11032/11032 [00:28<00:00, 381.36it/s]


## Datasets prepare

In [43]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## SVM

In [46]:
svm_params = {
        'kernel': ('linear', 'poly', 'rbf', 'sigmoid'),
        'C': np.arange(0.5, 2.5, 1)
    }
svm_model = GridSearchCV(svm.SVC(), svm_params, n_jobs=-3)
%time svm_model.fit(X_train, y_train)
svm_model.best_params_

Wall time: 18min 47s


GridSearchCV(estimator=SVC(), n_jobs=-3,
             param_grid={'C': array([0.5, 1.5]),
                         'kernel': ('linear', 'poly', 'rbf', 'sigmoid')})

{'C': 1.5, 'kernel': 'poly'}

In [47]:
y_svm_pred = svm_model.predict(X_test)
print(classification_report(y_test, y_svm_pred))

              precision    recall  f1-score   support

        down       0.92      0.99      0.96       964
         mov       0.96      0.76      0.85       372
          up       0.96      0.96      0.96       871

    accuracy                           0.94      2207
   macro avg       0.95      0.90      0.92      2207
weighted avg       0.94      0.94      0.94      2207



## KNN

In [48]:
knn_params = {
        'n_neighbors': np.arange(3, 15, 3),
        'weights': ('uniform', 'distance'),
        'metric': ('minkowski', 'manhattan')
    }
knn_model = GridSearchCV(KNeighborsClassifier(), knn_params, n_jobs=-3)
%time knn_model.fit(X_train, y_train)
knn_model.best_params_

Wall time: 9min 50s


GridSearchCV(estimator=KNeighborsClassifier(), n_jobs=-3,
             param_grid={'metric': ('minkowski', 'manhattan'),
                         'n_neighbors': array([ 3,  6,  9, 12]),
                         'weights': ('uniform', 'distance')})

{'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'uniform'}

In [49]:
y_knn_pred = knn_model.predict(X_test)
print(classification_report(y_test, y_knn_pred))

              precision    recall  f1-score   support

        down       0.99      0.95      0.97       964
         mov       0.93      0.91      0.92       372
          up       0.94      0.98      0.96       871

    accuracy                           0.96      2207
   macro avg       0.95      0.95      0.95      2207
weighted avg       0.96      0.96      0.96      2207



Probably, the reasons of both algorithms works successfuly:
1. Photos are made from similar points (so there are many train sample from the same point of view);
2. Photos are clear;