In [88]:
import os
import numpy as np

from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline

from tools.functions import load_images_from_folder

In [89]:
ROOT = os.path.join('..', '..', '..')
DS_NAME = 'data1'
DATA_DIR = os.path.join(
    ROOT,
    '..',
    DS_NAME,
    'images_original_inception_resnet_v2_150x150_categorized')
TRAINING_DIR = os.path.join(DATA_DIR, 'training')
VALIDATION_DIR = os.path.join(DATA_DIR, 'validation')
X_train, y_train = load_images_from_folder(TRAINING_DIR, 200)
X_test, y_test = load_images_from_folder(VALIDATION_DIR)

In [90]:
neighbors = np.max(y_train)
knn = KNeighborsClassifier(n_neighbors=neighbors)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

Accuracy: 0.40891472868217055


In [None]:
pipe = Pipeline([
    ('reduce_dim', 'passthrough'),
    ('classify', KNeighborsClassifier(n_neighbors=neighbors))
])
param_grid = [
    {
        'reduce_dim': [PCA()],
        'reduce_dim__n_components': [2, 5, 10, 20, 30],
        'reduce_dim__whiten': [True, False]
    }
]
grid = GridSearchCV(pipe, cv=5, param_grid=param_grid, verbose=2)
grid.fit(X_train, y_train)

In [94]:
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)

print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(grid.best_params_)

Accuracy: 0.4263565891472868
{'reduce_dim': PCA(n_components=20), 'reduce_dim__n_components': 20, 'reduce_dim__whiten': False}
