In [1]:
import os

from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import SGDClassifier

from tools.functions import load_images_from_folder

In [2]:
ROOT = os.path.join('..', '..', '..')
DS_NAME = 'data1'
DATA_DIR = os.path.join(
    ROOT,
    '..',
    DS_NAME,
    'images_original_inception_resnet_v2_150x150_categorized')
TRAINING_DIR = os.path.join(DATA_DIR, 'training')
VALIDATION_DIR = os.path.join(DATA_DIR, 'validation')
X_train, y_train = load_images_from_folder(TRAINING_DIR, 100)
X_test, y_test = load_images_from_folder(VALIDATION_DIR)

In [3]:
class PassthroughPreprocessor:
    def __init__(self, **_):
        self.n_components = None

    def fit(self, X, y=None):
        return self

    def fit_transform(self, X, y=None):
        return X

    def transform(self, X):
        return X

    def set_params(self, **_):
        pass

    def get_params(self, **_):
        return {}

In [None]:
pipe = Pipeline([
    ('reduce_dim', 'passthrough'),
    ('classify', SGDClassifier())
])
param_grid = [
    {
        'reduce_dim': [PCA()],
        'reduce_dim__n_components': [5, 10, 20],
        'classify__alpha': [0.0001, 0.001, 0.01, 0.1],
        'classify__loss': ['hinge', 'log', 'modified_huber', 'squared_hinge'],
        'classify__penalty': ['l1', 'l2', 'elasticnet'],
        'classify__max_iter': [500, 1000]
    }
]

grid = GridSearchCV(pipe, cv=5, n_jobs=-1, param_grid=param_grid, verbose=2)

grid.fit(X_train, y_train)

In [6]:
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)

print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(grid.best_params_)

Accuracy: 0.36627906976744184
{'classify__alpha': 0.0001, 'classify__loss': 'modified_huber', 'classify__max_iter': 1000, 'classify__penalty': 'l1', 'reduce_dim': <__main__.PassthroughPreprocessor object at 0x00000239AC0D9A60>, 'reduce_dim__n_components': 10}
