In [2]:
import sys
import os
import numpy as np
import pickle

from scipy.io import loadmat
from glob import glob
from sklearn.model_selection import train_test_split
from keras.callbacks import TensorBoard
from keras.optimizers import Adadelta
from os.path import join

sys.path.append('scripts')
from data_helpers import *
from model_helpers import *

%load_ext autoreload
%autoreload 2

Using TensorFlow backend.


In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [None]:
OUT_PATH_MODEL = 'saved'

In [3]:
# Data hyperparameters
ratio_0_1 = 1
excluded_classes = []
target_size = (512, 512)
test_ratio = 0.25

# Model hyperparameters
mobilenet_width = 1
dense_layers = [128, 64, 32]

# Train hyperparameters
num_epochs = 500
batch_size = 32

In [5]:
files = sorted(glob('data/imgs/*jpg'))
labels_data = loadmat('data/imagelabels.mat')['labels'][0].tolist()

unique_labels = set(labels_data)
grouped_files = [[(data[0], data[1]) for data in zip(files, labels_data) if data[1] == label]
                 for label in unique_labels]

same_pairs, diff_pairs, rest_data = create_pairs(grouped_files, ratio_0_1=ratio_0_1)

In [19]:
pairs = same_pairs + diff_pairs
file_pairs = [(pair[0][0], pair[1][0]) for pair in pairs]
class_pairs = [(pair[0][1], pair[1][1]) for pair in pairs]
labels = [int(not (pair[0] == pair[1])) for pair in class_pairs]

In [25]:
train_files, test_files, c_train, c_test, y_train, y_test = train_test_split(
        file_pairs, class_pairs, labels, test_size=test_ratio, shuffle=True, stratify=labels)


In [None]:
x_train = data_helpers.create_img_pairs(train_files, 'train', target_size=target_size)
x_test = data_helpers.create_img_pairs(test_files, 'test', target_size=target_size)

In [None]:
training_generator = DataGenerator(
    x_train,
    y_train, 
    batch_size=batch_size, 
    augment=True,
    shuffle=True)

In [None]:
checkpointer = CustomModelCheckpoint(
    join(OUT_PATH_MODEL, 'logs'), 
    monitor='val_loss', 
    verbose=0, 
    save_weights_only=False, 
    mode='auto', 
    period=1)

tensorboard = TensorBoard(
    log_dir=join(OUT_PATH_MODEL, 'logs'), 
    histogram_freq=0, 
    batch_size=batch_size, 
    write_graph=False, 
    write_grads=False, 
    write_images=False, 
    embeddings_freq=0,
    embeddings_layer_names=None, 
    embeddings_metadata=None, 
    embeddings_data=None)

In [None]:
encoder = create_mobile_net_encoder((*target_size, 3), dense_layers, alpha=mobilenet_width)
siamese_model = create_siamese_model(encoder)

# Compile model
siamese_model.compile(loss=contrastive_loss, optimizer=Adadelta(), metrics=[accuracy])

In [None]:
# Start training
history_dict = siamese_model.fit_generator(
    generator=training_generator,
    validation_data=(split_imgs(np.array(x_test)), y_test),
    epochs=num_epochs,
    verbose=1,
    shuffle=False,
    use_multiprocessing=True,
    workers=10,
    callbacks=[checkpointer, tensorboard])

# Save history
with open(join(OUT_PATH_MODEL, 'history.pkl'), 'wb') as f:
    pickle.dump(history_dict.history, f)