In [25]:
from preprocess import Preprocess
import numpy as np
import pandas as pd
from torchvision import transforms
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score


In [16]:
IMAGE_RESIZE = (64, 64)
TRANSFORM = transforms.Compose([
    transforms.Resize(IMAGE_RESIZE),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.mean(dim=0)), # gray
    transforms.Lambda(lambda x: x.view(-1)) # flatten
])
train_data = Preprocess.load_train_pairs(transform=TRANSFORM)
val_data = Preprocess.load_test_pairs(transform=TRANSFORM)

In [7]:
# Load PCA
with open('trained/pca.pkl', 'rb') as f:
    pca = pickle.load(f)
pca

In [12]:
def apply_PCA(pair_dataset):
    X = np.empty((len(pair_dataset), pca.n_components * 2))
    y = np.empty(len(pair_dataset))
    for i, (image1, image2, label) in enumerate(pair_dataset):
        image1 = pca.transform(image1.reshape(1, -1))[0]
        image2 = pca.transform(image2.reshape(1, -1))[0]
        X[i] = np.hstack((image1, image2))
        y[i] = label
    return X, y    

In [20]:
X_train, y_train = apply_PCA(train_data)
X_val, y_val = apply_PCA(val_data)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_train.shape, y_train.shape, X_val.shape, y_val.shape

((2200, 512), (2200,), (1000, 512), (1000,))

### Regressão logística

In [27]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': np.logspace(-5, 5, 10),
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
}
grid_search = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, cv=5)
grid_search.fit(X_train, y_train)

print("Best parameters: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

Best parameters:  {'C': 0.0016681005372000592, 'solver': 'newton-cg'}
Best cross-validation score:  0.515


In [28]:
logreg_model = LogisticRegression(**grid_search.best_params_)
logreg_model.fit(X_train, y_train)

In [29]:
print('Train accuracy:', accuracy_score(y_train, logreg_model.predict(X_train)))
print('Val accuracy:', accuracy_score(y_val, logreg_model.predict(X_val)))

Train accuracy: 0.7127272727272728
Val accuracy: 0.541


### Random Forest

In [30]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf_random_search = RandomizedSearchCV(RandomForestClassifier(), param_distributions=param_dist, n_iter=10, cv=5)
rf_random_search.fit(X_train, y_train)

print("Best parameters: ", rf_random_search.best_params_)
print("Best cross-validation score: ", rf_random_search.best_score_)

Best parameters:  {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_depth': None}
Best cross-validation score:  0.5272727272727273


In [31]:
rf_model = RandomForestClassifier(**rf_random_search.best_params_)
rf_model.fit(X_train, y_train)

print('Train accuracy:', accuracy_score(y_train, rf_model.predict(X_train)))
print('Val accuracy:', accuracy_score(y_val, rf_model.predict(X_val)))

Train accuracy: 1.0
Val accuracy: 0.542


### Rede neural

In [41]:
import keras_tuner as kt
import keras

def build_model(hp):
    model = keras.Sequential()

    # Tune the number of layers
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(keras.layers.Dense(units=hp.Int('units_' + str(i),
                                                min_value=4,
                                                max_value=32,
                                                step=4),
                                   activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))

    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=5,
    directory='my_dir',
    project_name='helloworld4')

tuner.search_space_summary()

tuner.search(X_train, y_train, epochs=40, validation_data=(X_val, y_val))

tuner.results_summary()

best_hps = tuner.get_best_hyperparameters(num_trials = 10)[0]

Trial 5 Complete [00h 00m 30s]
val_accuracy: 0.5315999984741211

Best val_accuracy So Far: 0.5327999949455261
Total elapsed time: 00h 02m 30s
Results summary
Results in my_dir\helloworld4
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 1 summary
Hyperparameters:
num_layers: 1
units_0: 28
learning_rate: 0.001
units_1: 16
Score: 0.5327999949455261

Trial 4 summary
Hyperparameters:
num_layers: 3
units_0: 24
learning_rate: 0.001
units_1: 24
units_2: 4
Score: 0.5315999984741211

Trial 3 summary
Hyperparameters:
num_layers: 1
units_0: 32
learning_rate: 0.0001
units_1: 32
units_2: 16
Score: 0.5275999903678894

Trial 2 summary
Hyperparameters:
num_layers: 3
units_0: 28
learning_rate: 0.001
units_1: 12
units_2: 4
Score: 0.5252000093460083

Trial 0 summary
Hyperparameters:
num_layers: 2
units_0: 4
learning_rate: 0.001
units_1: 4
Score: 0.5232000112533569


In [42]:
best_hp = tuner.get_best_hyperparameters()[0]
nn_model = tuner.hypermodel.build(best_hp)
nn_model.fit(X_train, y_train, epochs=40, validation_data=(X_val, y_val))
nn_model.summary()

Epoch 1/40
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4915 - loss: 0.8563 - val_accuracy: 0.5090 - val_loss: 0.7531
Epoch 2/40
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6605 - loss: 0.6190 - val_accuracy: 0.5200 - val_loss: 0.7546
Epoch 3/40
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7748 - loss: 0.5182 - val_accuracy: 0.5070 - val_loss: 0.7730
Epoch 4/40
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8363 - loss: 0.4468 - val_accuracy: 0.5200 - val_loss: 0.7993
Epoch 5/40
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8932 - loss: 0.3854 - val_accuracy: 0.5130 - val_loss: 0.8357
Epoch 6/40
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9149 - loss: 0.3219 - val_accuracy: 0.5200 - val_loss: 0.8748
Epoch 7/40
[1m69/69[0m [32m━━━━━━━━━━