### Imports

In [1]:
from os.path import join

from keras.applications import VGG16, VGG19, InceptionV3, Xception, ResNet50
from keras.layers import GlobalAveragePooling2D, Dense, Dropout
from keras.models import Model, load_model
from keras.utils.np_utils import to_categorical
from keras.callbacks import ModelCheckpoint, Callback
from keras.utils.training_utils import multi_gpu_model

import tensorflow as tf

import os
import numpy as np

from batch_generator import BatchGenerator, BatchSequence

from sklearn.metrics import recall_score, precision_score, f1_score

import gzip, pickle

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### Load data

In [2]:
# Images
images_path_train = os.path.abspath('data/train/')
images_path_validation = os.path.abspath('data/validation/')
images_path_test = os.path.abspath('data/test/')

# Labels
with gzip.open('data/y_train.pickle','rb') as fp:
    y_train = pickle.load(fp)
with gzip.open('data/y_validation.pickle','rb') as fp:
    y_validation = pickle.load(fp)

### Metrics / callbacks

In [4]:
class Metrics(Callback):

    def on_train_begin(self, logs={}):
        self.mean_f1s = []
        self.recalls = []
        self.precisions = []

    def on_epoch_end(self, epoch, logs={}):
        y_pred = (np.asarray(self.model.predict(self.validation_data[0]))).round()
        y_true = self.validation_data[1]

        mean_f1 = f1_score(y_true, y_pred, average='micro')
        recall = recall_score(y_true, y_pred, average='micro')
        precision = precision_score(y_true, y_pred, average='micro')
        self.mean_f1s.append(mean_f1)
        self.recalls.append(recall)
        self.precisions.append(precision)

        print('mean_F1: {} — precision: {} — recall: {}'.format(mean_f1, precision, recall))

metrics = Metrics()

In [5]:
# checkpoint
filepath="models/inceptionV3-fc-{epoch:02d}-{val_loss:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

## Network

In [12]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [13]:
batch_size=128*2 # 128 per GPU

In [14]:
# load the generators
training_gen = BatchGenerator(input_dir=images_path_train, y=y_train, batch_size=batch_size)
val_gen = BatchSequence(input_dir=images_path_validation, y=y_validation, batch_size=batch_size)

In [15]:
with tf.device('/cpu:0'):
    base_model = VGG19(weights='imagenet', include_top=False, input_shape=(290,290,3))

    # Adding the last two fully-connected layers
    x = base_model.output
    x = GlobalAveragePooling2D()(x) # global average pooling (flatten)
    x = Dense(1024, activation='relu')(x) # should be rather large with 228 output labels
    x = Dropout(0.5)(x)
    y = Dense(228, activation='sigmoid')(x) # sigmoid instead of softmax to have independent probabilities

    model = Model(inputs=base_model.input, outputs=y)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


In [16]:
# Train only the top layer
for layer in base_model.layers:
    layer.trainable = False

In [17]:
# Multi-GPU data parallelism
multi_model = multi_gpu_model(model, gpus=2)

In [18]:
# Use binary loss instead of categorical loss to penalize each output independently
multi_model.compile(optimizer='adam', loss='binary_crossentropy')

In [19]:
# Train the network
epochs = 1
steps = int(y_train.shape[0]/batch_size) + 1

#history = model.fit_generator(training_gen, steps_per_epoch=steps, epochs=1, callbacks=[checkpoint, metrics], validation_data=val_gen)
history = multi_model.fit_generator(training_gen, steps_per_epoch=steps, epochs=epochs)

model.save("models/VGG19-fc-{}_epochs.h5".format(epochs))

Epoch 1/1


### Test models


In [3]:
predict_gen = BatchSequence(input_dir=images_path_validation, y=y_validation, batch_size=128)

for path in ['models/VGG19-fc-1_epochs.h5', 'models/Xception-fc-1_epochs.h5', 'models/VGG16-fc-1_epochs.h5', 'models/ResNet50-fc-1_epochs.h5']:
    model = load_model(path)
    model.compile(optimizer='adam', loss='binary_crossentropy') # training configuration
    
    predictions = model.predict_generator(predict_gen, verbose=1)

    y_true = y_validation
    y_pred = (predictions > 0.5).astype(int)

    pr = precision_score(y_true, y_pred, average='micro')
    rc = recall_score(y_true, y_pred, average='micro')
    f1 = f1_score(y_true, y_pred, average='micro')

    print("[{}] Precision: {} Recall: {} F1: {}".format(path, pr, rc, f1))
    break



[models/VGG19-fc-1_epochs.h5] Precision: 0.8470145509282488 Recall: 0.21260169769023451 F1: 0.33989046832843395
