In [1]:
import timm
import torch
import functools
import numpy as np
from torchvision import transforms
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
TRANSFORM = transforms.ToTensor()

In [3]:



class TIMM:
    def __init__(self):
        # https://huggingface.co/timm/vit_medium_patch16_reg4_gap_256.sbb_in1k
        self.model = timm.create_model(
            'vit_medium_patch16_reg4_gap_256.sbb_in1k',
            pretrained=True,
            num_classes=0,  # remove classifier nn.Linear
        )
        self.model.eval()

        # get model specific transforms (normalization, resize)
        data_config = timm.data.resolve_model_data_config(self.model)
        self.transforms = timm.data.create_transform(**data_config, is_training=False)

    @functools.lru_cache(maxsize=None)
    def extract(self, img) -> torch.Tensor:
        with torch.no_grad():
            return self.model(self.transforms(img).unsqueeze(0))[0]

timm_model = TIMM()

In [4]:
# transforma cada par em uma linha de uma ndarray (é quem criou os CSVs dev_train e dev_val)
def pairs_as_ndarray(pairs):
    result = []
    for i, pair in enumerate(pairs):
        image1, image2, label = pair
        print(f'Progress: {i}/{len(pairs)}', end='\r')
        attr1 = timm_model.extract(image1)
        attr2 = timm_model.extract(image2)
        row = np.concatenate((attr1, attr2, [label]))
        result.append(row)
    return np.array(result)

def duplicate_by_symmetry(x):
    result = []
    for row in x:
        num_of_features = int((len(row) - 1) / 2)
        left = row[:num_of_features]
        right = row[num_of_features:-1]
        label = row[-1]
        result.append(np.concatenate((left, right, [label])))
        result.append(np.concatenate((right, left, [label])))
    return np.array(result)

In [5]:
# dev_train = pairs_as_ndarray(Preprocess.load_train_pairs(transform=TRANSFORM))
# dev_val = pairs_as_ndarray(Preprocess.load_test_pairs(transform=TRANSFORM))
# Load from .npy
dev_train = duplicate_by_symmetry(np.load('dev_train.npy'))
dev_val = np.load('dev_val.npy')
dev_train.shape, dev_val.shape

((4400, 1025), (1000, 1025))

In [6]:
# Save the arrays
# np.save('dev_train.npy', dev_train)
# np.save('dev_val.npy', dev_val)

In [7]:
X_train = dev_train[:, :-1]
y_train = dev_train[:, -1]
X_val = dev_val[:, :-1]
y_val = dev_val[:, -1]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

X_train.shape, y_train.shape, X_val.shape, y_val.shape

((4400, 1024), (4400,), (1000, 1024), (1000,))

In [8]:
### Regressão logística
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': np.logspace(-5, 5, 5),
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
}
grid_search = GridSearchCV(LogisticRegression(max_iter=10000), param_grid, cv=5)
grid_search.fit(X_train, y_train)

print("Best parameters: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)


Best parameters:  {'C': 0.0031622776601683794, 'solver': 'newton-cg'}
Best cross-validation score:  0.5627272727272727


In [9]:

logreg_model = LogisticRegression(**grid_search.best_params_)
logreg_model.fit(X_train, y_train)
print('Train accuracy:', accuracy_score(y_train, logreg_model.predict(X_train)))
print('Val accuracy:', accuracy_score(y_val, logreg_model.predict(X_val)))


Train accuracy: 0.7109090909090909
Val accuracy: 0.578


In [10]:

### Random Forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf_random_search = RandomizedSearchCV(RandomForestClassifier(), param_distributions=param_dist, n_iter=10, cv=5)
rf_random_search.fit(X_train, y_train)

print("Best parameters: ", rf_random_search.best_params_)
print("Best cross-validation score: ", rf_random_search.best_score_)


Best parameters:  {'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_depth': 10}
Best cross-validation score:  0.6845454545454545


In [11]:

rf_model = RandomForestClassifier(**rf_random_search.best_params_)
rf_model.fit(X_train, y_train)

print('Train accuracy:', accuracy_score(y_train, rf_model.predict(X_train)))
print('Val accuracy:', accuracy_score(y_val, rf_model.predict(X_val)))


Train accuracy: 0.975909090909091
Val accuracy: 0.714


In [12]:

### Rede neural
import keras_tuner as kt
import keras

def build_model(hp):
    model = keras.Sequential()

    # Tune the number of layers
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(keras.layers.Dense(units=hp.Int('units_' + str(i),
                                                min_value=4,
                                                max_value=32,
                                                step=4),
                                   activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))

    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=5,
    directory='runs2',
    project_name=f'nn-timm-medium')

tuner.search_space_summary()

tuner.search(X_train, y_train, epochs=40, validation_data=(X_val, y_val))

tuner.results_summary()

best_hps = tuner.get_best_hyperparameters(num_trials = 10)[0]


Trial 5 Complete [00h 00m 36s]
val_accuracy: 0.7419999957084655

Best val_accuracy So Far: 0.7465999960899353
Total elapsed time: 00h 02m 53s
Results summary
Results in runs2\nn-timm-medium
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 0 summary
Hyperparameters:
num_layers: 3
units_0: 32
learning_rate: 0.01
units_1: 4
units_2: 4
Score: 0.7465999960899353

Trial 4 summary
Hyperparameters:
num_layers: 3
units_0: 32
learning_rate: 0.001
units_1: 8
units_2: 20
Score: 0.7419999957084655

Trial 1 summary
Hyperparameters:
num_layers: 3
units_0: 28
learning_rate: 0.001
units_1: 12
units_2: 12
Score: 0.7346000075340271

Trial 3 summary
Hyperparameters:
num_layers: 2
units_0: 16
learning_rate: 0.001
units_1: 24
units_2: 8
Score: 0.7338000059127807

Trial 2 summary
Hyperparameters:
num_layers: 2
units_0: 12
learning_rate: 0.0001
units_1: 8
units_2: 8
Score: 0.7212000012397766


In [13]:

best_hp = tuner.get_best_hyperparameters()[0]
nn_model = tuner.hypermodel.build(best_hp)
nn_model.fit(X_train, y_train, epochs=40, validation_data=(X_val, y_val))
nn_model.summary()


Epoch 1/40
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5480 - loss: 0.7054 - val_accuracy: 0.6050 - val_loss: 0.6416
Epoch 2/40
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6008 - loss: 0.6338 - val_accuracy: 0.6150 - val_loss: 0.6229
Epoch 3/40
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 990us/step - accuracy: 0.6437 - loss: 0.5956 - val_accuracy: 0.6590 - val_loss: 0.6165
Epoch 4/40
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 927us/step - accuracy: 0.6720 - loss: 0.5795 - val_accuracy: 0.6650 - val_loss: 0.6116
Epoch 5/40
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 887us/step - accuracy: 0.6894 - loss: 0.5497 - val_accuracy: 0.6860 - val_loss: 0.6185
Epoch 6/40
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7239 - loss: 0.5257 - val_accuracy: 0.6780 - val_loss: 0.5903
Epoch 7/40
[1m138/138

In [14]:

train_accuracy = nn_model.evaluate(X_train, y_train)[1]
val_accuracy = nn_model.evaluate(X_val, y_val)[1]
train_accuracy, val_accuracy


[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 363us/step - accuracy: 0.9199 - loss: 0.2386
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 532us/step - accuracy: 0.7265 - loss: 0.7695


(0.9188636541366577, 0.7039999961853027)

In [15]:


### SVM
from sklearn.svm import SVC

# Define the parameter grid for grid search
param_grid = {
    'C': np.logspace(-3, 2, base=10, num=6),
    'kernel': ['rbf', 'sigmoid'],
    'gamma': np.logspace(-3, 2, base=10, num=6)
}

# Perform grid search to find the best parameters
grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(X_train, y_train)

print("Best parameters: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)


Best parameters:  {'C': 1.0, 'gamma': 0.001, 'kernel': 'rbf'}
Best cross-validation score:  0.7168181818181818


In [16]:

# Create an SVM model with the best parameters
svm_model = SVC(**grid_search.best_params_)
svm_model.fit(X_train, y_train)

# Evaluate the model
print('Train accuracy:', accuracy_score(y_train, svm_model.predict(X_train)))
print('Val accuracy:', accuracy_score(y_val, svm_model.predict(X_val)))

Train accuracy: 0.9404545454545454
Val accuracy: 0.757
