In [1]:
import os
import numpy as np

from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from tools.hog_transformer import HogTransformer
from tools.rgb_2_gray_transformer import RGB2GrayTransformer
from tools.functions import load_images_from_folder

In [2]:
ROOT = os.path.join('..', '..', '..')
DS_NAME = 'data1'
DATA_DIR = os.path.join(
    ROOT,
    '..',
    DS_NAME,
    'images_original_inception_resnet_v2_150x150_categorized')
TRAINING_DIR = os.path.join(DATA_DIR, 'training')
VALIDATION_DIR = os.path.join(DATA_DIR, 'validation')
X_train, y_train = load_images_from_folder(TRAINING_DIR, 200, flat=False)
X_test, y_test = load_images_from_folder(VALIDATION_DIR, flat=False)

In [3]:
grayify = RGB2GrayTransformer()
hogify = HogTransformer(
    pixels_per_cell=(20, 20),
    cells_per_block=(2, 2))
scalify = StandardScaler()
X_train_gray = grayify.fit_transform(X_train)
X_train_hog = hogify.fit_transform(X_train_gray)
X_train_prepared = scalify.fit_transform(X_train_hog)
X_test_gray = grayify.transform(X_test)
X_test_hog = hogify.transform(X_test_gray)
X_test_prepared = scalify.transform(X_test_hog)

In [4]:
neighbors = np.max(y_train)
knn = KNeighborsClassifier(n_neighbors=neighbors)
knn.fit(X_train_prepared, y_train)

y_pred = knn.predict(X_test_prepared)

print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

Accuracy: 0.2713178294573643


In [5]:
pipe = Pipeline([
    ('grayify', RGB2GrayTransformer()),
    ('hogify', HogTransformer()),
    ('scalify', StandardScaler()),
    ('reduce_dim', 'passthrough'),
    ('classify', KNeighborsClassifier(n_neighbors=neighbors))
])
param_grid = [
    {
        'reduce_dim': [PCA()],
        'reduce_dim__n_components': [2, 5, 10, 20, 30],
        'reduce_dim__whiten': [True, False],
        'hogify__pixels_per_cell': [(10, 10), (14, 14), (17, 17), (20, 20), (25, 25), (30, 30)],
        'hogify__cells_per_block': [(2, 2), (3, 3), (4, 4), (5, 5)]
    }
]
grid = GridSearchCV(pipe, cv=5, param_grid=param_grid, verbose=2)
grid.fit(X_train, y_train)

Fitting 5 folds for each of 240 candidates, totalling 1200 fits
[CV] END hogify__cells_per_block=(2, 2), hogify__pixels_per_cell=(10, 10), reduce_dim=PCA(), reduce_dim__n_components=2, reduce_dim__whiten=True; total time=   7.7s
[CV] END hogify__cells_per_block=(2, 2), hogify__pixels_per_cell=(10, 10), reduce_dim=PCA(), reduce_dim__n_components=2, reduce_dim__whiten=True; total time=   7.8s
[CV] END hogify__cells_per_block=(2, 2), hogify__pixels_per_cell=(10, 10), reduce_dim=PCA(), reduce_dim__n_components=2, reduce_dim__whiten=True; total time=   7.8s
[CV] END hogify__cells_per_block=(2, 2), hogify__pixels_per_cell=(10, 10), reduce_dim=PCA(), reduce_dim__n_components=2, reduce_dim__whiten=True; total time=   7.8s
[CV] END hogify__cells_per_block=(2, 2), hogify__pixels_per_cell=(10, 10), reduce_dim=PCA(), reduce_dim__n_components=2, reduce_dim__whiten=True; total time=   8.0s
[CV] END hogify__cells_per_block=(2, 2), hogify__pixels_per_cell=(10, 10), reduce_dim=PCA(), reduce_dim__n_comp

In [6]:
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)

print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(grid.best_params_)

Accuracy: 0.2771317829457364
{'hogify__cells_per_block': (4, 4), 'hogify__pixels_per_cell': (10, 10), 'reduce_dim': PCA(n_components=10), 'reduce_dim__n_components': 10, 'reduce_dim__whiten': False}
