*Downloading git LFS*

In [None]:
!wget https://github.com/git-lfs/git-lfs/releases/download/v2.4.2/git-lfs-linux-amd64-2.4.2.tar.gz

*Extracting git LFS*

In [None]:
!tar xvf git-lfs-linux-amd64-2.4.2.tar.gz

*Installing git LFS*

In [None]:
!cd git-lfs-2.4.2/ && ./install.sh

In [None]:
!git lfs install

*Cloning the repository*

In [None]:
!git lfs clone --recurse-submodules https://github.com/marco-c/autowebcompat.git

*Marking working directory*

In [None]:
import os
os.chdir('autowebcompat')

*Installing requirements*

In [None]:
!pip install selenium==3.4.3


*Run the model*

In [None]:
import argparse
import random
import time

from keras.callbacks import Callback
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

from autowebcompat import network
from autowebcompat import utils

In [None]:
BATCH_SIZE = 32
EPOCHS = 15
random.seed(42)

In [None]:
args = {}
args['network'] = 'vgg16'
args['optimiser'] = 'sgd'
args['classification_type'] = 'Y + D vs N'
args['early_stopping'] = False

In [None]:
labels = utils.read_labels()
print("read lables")
utils.prepare_images()
print("prepared images")
all_image_names = [i for i in utils.get_images() if i in labels]
all_images = sum([[i + '_firefox.png', i + '_chrome.png'] for i in all_image_names], [])
image = utils.load_image(all_images[0])
input_shape = image.shape

In [None]:
SAMPLE_SIZE = len(all_image_names)
TRAIN_SAMPLE = 80 * (SAMPLE_SIZE // 100)
VALIDATION_SAMPLE = 10 * (SAMPLE_SIZE // 100)
TEST_SAMPLE = SAMPLE_SIZE - (TRAIN_SAMPLE + VALIDATION_SAMPLE)

In [None]:
def load_pair(fname):
    return [fname + '_firefox.png', fname + '_chrome.png']

In [None]:
random.shuffle(all_image_names)
images_train, images_validation, images_test = all_image_names[:TRAIN_SAMPLE], all_image_names[TRAIN_SAMPLE:VALIDATION_SAMPLE + TRAIN_SAMPLE], all_image_names[SAMPLE_SIZE - TEST_SAMPLE:]

In [None]:
def couples_generator(images):
    for i in images:
        yield load_pair(i), utils.to_categorical_label(labels[i], args['classification_type'])

In [None]:
def gen_func(images):
    return couples_generator(images)

In [None]:
train_couples_len = sum(1 for e in gen_func(images_train))
validation_couples_len = sum(1 for e in gen_func(images_validation))
test_couples_len = sum(1 for e in gen_func(images_test))

In [None]:
data_gen = utils.get_ImageDataGenerator(all_images, input_shape)

train_iterator = utils.CouplesIterator(utils.make_infinite(gen_func, images_train), input_shape, data_gen, BATCH_SIZE)
validation_iterator = utils.CouplesIterator(utils.make_infinite(gen_func, images_validation), input_shape, data_gen, BATCH_SIZE)
test_iterator = utils.CouplesIterator(utils.make_infinite(gen_func, images_test), input_shape, data_gen, BATCH_SIZE)

In [None]:
model = network.create(input_shape, args['network'])
network.compile(model, args['optimiser'])

timer = Timer()
callbacks_list = [ModelCheckpoint('best_train_model.hdf5', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max'), timer]

if args['early_stopping']:
    callbacks_list.append(EarlyStopping(monitor='val_accuracy', patience=2))

train_history = model.fit_generator(train_iterator, callbacks=callbacks_list, validation_data=validation_iterator, steps_per_epoch=train_couples_len / BATCH_SIZE, validation_steps=validation_couples_len / BATCH_SIZE, epochs=EPOCHS)
score = model.evaluate_generator(test_iterator, steps=test_couples_len / BATCH_SIZE)
print(score)

train_history = train_history.history
train_history.update({'epoch time': timer.epoch_times})
information = args
information.update({'Accuracy': score, 'Train Time': timer.train_time, 'Number of Train Samples': train_couples_len, 'Number of Validation Samples': validation_couples_len, 'Number of Test Samples': test_couples_len})
utils.write_train_info(information, model, train_history)