In [None]:
# Imports

import numpy as np
import cv2
import pandas as pd
import random
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
%matplotlib inline

In [None]:
# We define in advance constants to build the model

INPUT_SHAPE = (96, 96, 3)
OUTPUT_SIZE = 2

FULL_SIZE = 160

EPOCHS = 10
VALIDATION_SPLIT = 0.2
BATCH_SIZE = 50

MODEL_PATH = "./models/net_0_model.h5"

In [None]:
# Read coordinates from files
sealions_df_train = pd.read_csv('./sealions_train.csv', dtype={"coord_x": int, "coord_y": int, "class": str, "filename": str})
sealions_df_test = pd.read_csv('./sealions_test.csv', dtype={"coord_x": int, "coord_y": int, "class": str, "filename": str})
empty_df_train = pd.read_csv('./empty_train.csv', dtype={"coord_x": int, "coord_y": int, "filename": str})
empty_df_test = pd.read_csv('./empty_test.csv', dtype={"coord_x": int, "coord_y": int, "filename": str})

file_names_sea_lions_train = sealions_df_train.filename.unique()
file_names_sea_lions_test = sealions_df_test.filename.unique()
file_names_background_train = empty_df_train.filename.unique()
file_names_background_test = empty_df_test.filename.unique()

# Create global variable for data sets
data_set_sea_lions_train = []
data_set_sea_lions_test = []
data_set_background_train = []
data_set_background_test = []

# Use a random seed
random.seed(42)

In [None]:
# Load images and extract 96x96 patches for sea lions
def extract_sea_lions(file_names, data_set, sea_lions_df):
    for file in file_names:
        image = cv2.imread("./kaggle_sea_lions/Train/" + file)
        df = sea_lions_df[sea_lions_df['filename'] == file]
        for row in df.iterrows():
            x = row[1]['coord_x']
            y = row[1]['coord_y']
            if x < 48:
                x = 0
            elif x > len(image[0]) - 48:
                x = len(image[0]) - 96
            else:
                x = x - 48
            if y < 48:
                y = 0
            elif y > len(image) - 48:
                y = len(image) - 96
            else:
                y = y - 48
            patch = image[y:y+96, x:x+96, :]
            data_set.append(list((patch, "sea lion")))

In [None]:
# Load images and extract 96x96 patches for background
def extract_background(file_names, data_set, empty_df):
    for file in file_names:
        image = cv2.imread("./kaggle_sea_lions/Train/" + file)
        df = empty_df[empty_df['filename'] == file]
        for row in df.iterrows():
            x = row[1]['coord_x']
            y = row[1]['coord_y']
            patch = image[y-48:y+48, x-48:x+48, :]
            data_set.append(list((patch, "background")))

In [None]:
# Extract patches
extract_sea_lions(file_names_sea_lions_train, data_set_sea_lions_train, sealions_df_train)
extract_sea_lions(file_names_sea_lions_test, data_set_sea_lions_test, sealions_df_test)
extract_background(file_names_background_train, data_set_background_train, empty_df_train)
extract_background(file_names_background_test, data_set_background_test, empty_df_test)

In [None]:
# Build train set
train_set = data_set_sea_lions_train + data_set_background_train
random.shuffle(train_set)
X_train = []
Y_train = []
for data in train_set:
    X_train.append(data[0])
    if data[1] == "sea lion":
        Y_train.append([1, 0])
    elif data[1] == "background":
        Y_train.append([0, 1])
X_train = np.array(X_train)
Y_train = np.array(Y_train)

# Build test set
test_set = data_set_sea_lions_test + data_set_background_test
random.shuffle(test_set)
X_test = []
Y_test = []
for data in test_set:
    X_test.append(data[0])
    if data[1] == "sea lion":
        Y_test.append([1, 0])
    elif data[1] == "background":
        Y_test.append([0, 1])
X_test = np.array(X_test)
Y_test = np.array(Y_test)

# Convert data types and normalize values
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

In [None]:
# Build parallel model (multi gpu)

model = Sequential()
# First layer
model.add(Convolution2D(8, (5, 5), activation='relu', padding='valid', input_shape=INPUT_SHAPE))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# Second layer
model.add(Convolution2D(5, (3, 3), activation='relu', padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# Third layer
model.add(Convolution2D(5, (3, 3), activation='relu', padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# Fourth layer
model.add(Convolution2D(10, (3, 3), activation='relu', padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dropout(0.5))
model.add(Dense(OUTPUT_SIZE, activation='softmax'))

parallel_model = multi_gpu_model(model, gpus=2)
parallel_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Train and test (multi gpu)

# Fit model on training data
parallel_model.fit(X_train, Y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=VALIDATION_SPLIT, verbose=1)

# Evaluate model on test data
loss_and_metrics = parallel_model.evaluate(X_test, Y_test, batch_size=BATCH_SIZE, verbose=1)

# Save trained model (multi gpu)
parallel_model.save(MODEL_PATH)

In [None]:
# Evaluate ROC and AUC

Y_pred = parallel_model.predict_proba(X_test)

roc_auc_score(Y_test, Y_pred)