In [68]:
%matplotlib inline
import numpy as np
np.random.seed(2016)
import matplotlib.pyplot as plt

import os
import glob
import datetime
import pandas as pd
import time
import warnings
warnings.filterwarnings("ignore")

from scipy.misc import imread
from scipy.misc import imresize

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss

from keras.models import Sequential
from keras.models import load_model
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from keras import __version__ as keras_version

from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K

In [32]:
DATA_PATH = '/a/data/fisheries_monitoring/data/'

In [33]:
def load_cropped_train():
    X_train = []
    X_train_id = []
    y_train = []
    start_time = time.time()

    print('Read train images')
    folders = ['ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'NoF']
    for fld in folders:
        index = folders.index(fld)
        print('Load folder {} (Index: {})'.format(fld, index))
        path = os.path.join(DATA_PATH + 'classifiers/cropped_from_origin/', fld, '*.jpg')
        files = sorted(glob.glob(path))
        for fl in files:
            flbase = os.path.basename(fl)
            img = image.load_img(fl, target_size=(224, 224))
            img = image.img_to_array(img)
            X_train.append(img)
            X_train_id.append(fld + '/' + flbase)
            y_train.append(index)

    print('Read train data time: {} seconds'.format(round(time.time() - start_time, 2)))
    return X_train, y_train, X_train_id

def read_and_normalize_cropped_train_data():
    train_data, train_target, train_id = load_cropped_train()

    print('Convert to numpy...')
    train_data = np.array(train_data)
    train_target = np.array(train_target)

    print('Convert to float...')
    train_data = train_data.astype('float32')
    train_data = train_data / 255
    train_target = np_utils.to_categorical(train_target, 8)

    print('Train shape:', train_data.shape)
    print(train_data.shape[0], 'train samples')
    return train_data, train_target, train_id

In [34]:
train_data, train_target, train_id = read_and_normalize_cropped_train_data()

Read train images
Load folder ALB (Index: 0)
Load folder BET (Index: 1)
Load folder DOL (Index: 2)
Load folder LAG (Index: 3)
Load folder OTHER (Index: 4)
Load folder SHARK (Index: 5)
Load folder YFT (Index: 6)
Load folder NoF (Index: 7)
Read train data time: 30.29 seconds
Convert to numpy...
Convert to float...
('Train shape:', (4836, 224, 224, 3))
(4836, 'train samples')


In [35]:
base_model = ResNet50(weights='imagenet', include_top = False, input_shape=(224,224,3))

x = base_model.output
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(8, activation='softmax')(x)
model = Model(input=base_model.input, output=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer='sgd', loss='categorical_crossentropy')

In [36]:
batch_size = 16
nb_epoch = 30
random_state = 51

X_train, X_test, y_train, y_test = train_test_split(train_data, train_target, test_size=0.2, random_state=0)

In [37]:
callbacks = [EarlyStopping(monitor='val_loss', patience=3, verbose=0),]
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, shuffle=True,
          verbose=2, validation_data=(X_test, y_test), callbacks=callbacks)
model.save(DATA_PATH + 'models/ResNet50_OriginalCrop.h5')

Train on 3868 samples, validate on 968 samples
Epoch 1/30
58s - loss: 1.0148 - val_loss: 4.9977
Epoch 2/30
57s - loss: 0.6599 - val_loss: 6.6429
Epoch 3/30
57s - loss: 0.5281 - val_loss: 7.7167
Epoch 4/30
57s - loss: 0.4406 - val_loss: 2.2186
Epoch 5/30
57s - loss: 0.3767 - val_loss: 0.4230
Epoch 6/30
57s - loss: 0.3404 - val_loss: 0.3760
Epoch 7/30
57s - loss: 0.3106 - val_loss: 0.3305
Epoch 8/30
57s - loss: 0.2789 - val_loss: 0.3237
Epoch 9/30
57s - loss: 0.2397 - val_loss: 0.3593
Epoch 10/30
57s - loss: 0.2271 - val_loss: 0.2952
Epoch 11/30
57s - loss: 0.2087 - val_loss: 0.3165
Epoch 12/30
57s - loss: 0.1861 - val_loss: 0.2840
Epoch 13/30
57s - loss: 0.1713 - val_loss: 0.2883
Epoch 14/30
57s - loss: 0.1573 - val_loss: 0.2651
Epoch 15/30
57s - loss: 0.1474 - val_loss: 0.2616
Epoch 16/30
57s - loss: 0.1276 - val_loss: 0.2506
Epoch 17/30
57s - loss: 0.1333 - val_loss: 0.2523
Epoch 18/30
57s - loss: 0.1203 - val_loss: 0.2498
Epoch 19/30
57s - loss: 0.1101 - val_loss: 0.2527
Epoch 20/30


<keras.callbacks.History at 0x1da07d0>

In [38]:
predictions_valid = model.predict(X_test.astype('float32'), batch_size=batch_size, verbose=2)
score = log_loss(y_test, predictions_valid)

In [39]:
print "log loss score: ", score

log loss score:  0.234915012656


In [40]:
from sklearn.metrics import accuracy_score
y_true = np.argmax(y_test, axis = 1)
y_pred = np.argmax(predictions_valid, axis = 1)
acc = accuracy_score(y_true, y_pred, normalize=True, sample_weight=None)

In [41]:
print "accuracy: ", acc

accuracy:  0.927685950413


In [64]:
error_true = []
error_pred = []
for n in xrange(len(y_true)):
    if y_true[n] != y_pred[n]:
        error_true.append(y_true[n])
        error_pred.append(y_pred[n])
print "true: ", error_true[0:35]
print "pred: ", error_pred[0:35]
print "true: ", error_true[35:]
print "pred: ", error_pred[35:]

true:  [0, 1, 1, 6, 6, 6, 1, 1, 1, 1, 1, 0, 6, 1, 1, 1, 1, 2, 6, 2, 7, 4, 4, 0, 7, 1, 6, 1, 4, 6, 6, 0, 2, 1, 6]
pred:  [6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 6, 6, 0, 6, 1, 0, 0, 0, 0, 7, 0, 6, 7]
true:  [7, 5, 6, 4, 1, 5, 1, 1, 6, 2, 0, 3, 7, 0, 0, 1, 4, 6, 2, 6, 4, 1, 0, 6, 1, 0, 6, 4, 6, 6, 4, 0, 0, 4, 1]
pred:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 4, 6, 0, 1, 0, 0, 2, 0, 2, 5, 0, 4, 6, 0, 0, 2, 7, 6, 1, 7, 0, 0]


In [69]:
len(error_true)

70