In [1]:
import keras
import cv2
import os
import numpy as np
import random as rnd
import random as rand
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import Flatten, Dense
from keras.utils.training_utils import multi_gpu_model
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# We define in advance constants to build the model

INPUT_SHAPE = (96, 96, 3)
IM_HEIGHT = 96
IM_WIDTH = 96
OUTPUT_SIZE = 2

LEARNING_RATE = 0.01
OPTIMIZER = keras.optimizers.Adam()
LOSS = 'binary_crossentropy'
METRIC = 'accuracy'

SL_TRAIN_SIZE = 50121
SL_VALIDATION_SIZE = 11631
EPOCHS = 10
VALIDATION_SPLIT = 0.2
BATCH_SIZE = 50
STEPS_PER_EPOCH = (2 * SL_TRAIN_SIZE) // BATCH_SIZE + 1
VALIDATION_STEPS_PER_EPOCH = (2 * SL_VALIDATION_SIZE) // BATCH_SIZE + 1

TRAIN_PATH = "./data_set/train/"
VALIDATION_PATH = "./data_set/validation/"
TEST_PATH = "./data_set/test/"

MODEL_PATH = "./binary_classifier/net_1_model.json"
WEIGHTS_PATH = "./binary_classifier/net_1_weights.h5"

In [3]:
# Create data generator for augmented sea lions images

sea_lions_train_datagen = ImageDataGenerator(
    rotation_range=360,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest')

In [4]:
# Sea lions patches extractor
def extract_sea_lion_patch(path, epoch, n_epochs):
    img = cv2.imread(path)
    if rand.uniform(0.0, 1.0) < (epoch/n_epochs):
        # Perform transformation
        for aug_img in sea_lions_train_datagen.flow(img_to_array(img), [1, 0]):
            img = array_to_img(aug_img)
            break
    patch = img[72-48:72+48, 72-48:72+48, :]
    return patch

In [9]:
def train_generator(epoch_tot, batch_size, num_steps):
    sl_lst_tmp = os.listdir(TRAIN_PATH + 'sea_lions')
    sl_lst = []
    for elem in sl_lst_tmp:
        sl_lst.append(list((elem, 'sea_lion')))
    bkg_lst_tmp = os.listdir(TRAIN_PATH + 'background')
    for curr_epoch in range(epoch_tot):
        bkg_lst = []
        for elem in rnd.sample(bkg_lst_tmp, SL_TRAIN_SIZE):
            bkg_lst.append(list((elem, 'background')))
        lst = rnd.shuffle(sl_lst + bkg_lst)
        for step in range(num_steps):
            patches = []
            classes = []
            i = 0
            for i in range(step*batch_size, min((step*batch_size)+i, 2*SL_TRAIN_SIZE)):
                if lst[(step*batch_size)+i][1] == 'background':
                    img = cv2.imread(TRAIN_PATH + 'background/' + lst[(step*batch_size)+i][0])
                    patches.append(img)
                    classes.append([0, 1])
                else:
                    patches.append(extract_sea_lion_patch(TRAIN_PATH + 'sea_lions/' + lst[(step*batch_size)+i][0], step, epoch_tot))
                    classes.append([1, 0])
            X_train = np.array(patches)
            # X_train = X_train.astype('float32')
            # X_train /= 255
            Y_train = np.array(classes)
            yield X_train, Y_train

In [17]:
# test
b = 0
for a in train_generator(10, 50, STEPS_PER_EPOCH):
    print(np.shape(a[0]))
    b += 1
    if b > 1:
        break

(0,)
(0,)


In [6]:
# Create data generators for the validation set

validation_data_gen = ImageDataGenerator(rescale=1./255)


validation_generator = validation_data_gen.flow_from_directory(
    VALIDATION_PATH,
    classes=['sea_lions', 'background'],
    target_size=(IM_HEIGHT, IM_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical")

Found 25062 images belonging to 2 classes.


In [7]:
# Build parallel model (multi gpu)

model = Sequential()
# First layer
model.add(Convolution2D(8, (5, 5), activation='relu', padding='valid', input_shape=INPUT_SHAPE))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Second layer
model.add(Convolution2D(5, (3, 3), activation='relu', padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Third layer
model.add(Convolution2D(5, (3, 3), activation='relu', padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Fourth layer
model.add(Convolution2D(10, (3, 3), activation='relu', padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(OUTPUT_SIZE, activation='softmax'))

parallel_model = multi_gpu_model(model, gpus=2)
parallel_model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=[METRIC])

In [10]:
# Train (multi gpu)

# Fit model on training data
history = parallel_model.fit_generator(
    train_generator(EPOCHS, BATCH_SIZE, STEPS_PER_EPOCH),
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    verbose=1,
    validation_data=validation_generator, 
    validation_steps=VALIDATION_STEPS_PER_EPOCH)

Epoch 1/10


ValueError: Error when checking input: expected conv2d_1_input to have 4 dimensions, but got array with shape (0, 1)

In [11]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [11]:
# Save trained model (multi gpu)

# serialize weights to HDF5
parallel_model.save_weights(WEIGHTS_PATH)

In [None]:
# TODO add testing