# Fish monitoring pipeline 

Some setup...

In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns

from keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss

from util.data_utils import *

%load_ext autoreload
%autoreload 2

new_style = {'grid': False}
plt.rc('axes', **new_style)
p = sns.color_palette()

Using TensorFlow backend.


### Load data

Load localizer train dataset. This dataset comprises of images taken from different boat decks with fishes of different species. They are annotated with the position (x, y, w, h) where each fish is located in each image.

In [2]:
boxes = pd.read_csv('/a/data/fisheries_monitoring/data/detection_labels/boxes.csv',
                     names = ["img", "x","y","w","h"])
 
INPUT_LOC_WIDTH = 224
INPUT_LOC_HEIGHT = 224
PATH = '/a/data/fisheries_monitoring/data/localizers/original/'
loc_data, loc_target, loc_index = load_raw_data(boxes, INPUT_LOC_WIDTH, INPUT_LOC_HEIGHT, PATH)
#loc_res_data, loc_res_target, loc_res_index = resize_data_and_boxes(loc_data, loc_target, loc_index,
                                                                    
print "Localizer train dataset shape:", loc_data.shape
print "Localizer annotated boxes shape:", loc_target.shape

Localizer train dataset shape: (4371, 224, 224, 3)
Localizer annotated boxes shape: (4371, 4)


Load classifier labels and train dataset. This dataset is build by cropped images from the original dataset in which the fish ocuppies most part of the image. The labels are the specie of each fish.

In [3]:
INPUT_CLS_WIDTH = 224
INPUT_CLS_HEIGHT = 224
PATH = '/a/data/fisheries_monitoring/data/classifiers/cropped_from_origin'

train_data, train_target, train_id = load_cropped_train(PATH, INPUT_CLS_WIDTH, INPUT_CLS_HEIGHT)

print 'Convert to numpy...'
train_data = np.array(train_data)
train_target = np.array(train_target)

print 'Convert to float...'
train_data = train_data.astype('float32')
cropped_data = train_data / 255
labels = np_utils.to_categorical(train_target, 8)

print 'Train shape:', cropped_data.shape
print cropped_data.shape[0], 'train samples'

Read train images
Load folder ALB (Index: 0)
Load folder BET (Index: 1)
Load folder DOL (Index: 2)
Load folder LAG (Index: 3)
Load folder OTHER (Index: 4)
Load folder SHARK (Index: 5)
Load folder YFT (Index: 6)
Load folder NoF (Index: 7)
Read train data time: 35.63 seconds
Convert to numpy...
Convert to float...
Train shape: (4836, 224, 224, 3)
4836 train samples


### Create pipeline with desired localizer and classifier

In [5]:
from models.pipeline import Pipeline
import models.classifiers as cls
import models.localizers as loc

model = Pipeline(loc.ResNet50(), cls.InceptionV3())

### Train model

Split localizer dataset and set it into the pipeline

In [6]:
X_loc_train, X_loc_test, y_loc_train, y_loc_test = train_test_split(loc_data, loc_target,
                                                                    test_size=0.2, random_state=0)

In [7]:
model.set_localizer_train_data(X_loc_train, y_loc_train, X_loc_test, y_loc_test)

In [33]:
X_loc_train.shape

(3496, 224, 224, 3)

Do the same for the classifier dataset

In [8]:
X_cls_train, X_cls_test, y_cls_train, y_cls_test = train_test_split(cropped_data, labels,
                                                                    test_size=0.2, random_state=0)


In [9]:
model.set_classifier_train_data(X_cls_train, y_cls_train, X_cls_test, y_cls_test)

Now, train

In [12]:
batch_size = 50
nb_epoch = 3
random_state = 51
callbacks = [EarlyStopping(monitor='val_loss', patience=3, verbose=0),]

model.fit(batch_size=batch_size, nb_epoch=nb_epoch, shuffle=True)

Train on 3496 samples, validate on 875 samples
Epoch 1/3
48s - loss: 14113.0878 - val_loss: 14462.2835
Epoch 2/3
48s - loss: 13165.1110 - val_loss: 13204.7410
Epoch 3/3
48s - loss: 12071.2358 - val_loss: 12625.8512
A
Train on 3868 samples, validate on 968 samples
Epoch 1/3
43s - loss: 1.4775 - val_loss: 1.4261
Epoch 2/3
40s - loss: 1.2836 - val_loss: 1.2572
Epoch 3/3
40s - loss: 1.1541 - val_loss: 1.1821


  validation_data=(self.cls_data['X_val'], self.cls_data['y_val']))


### Testing the model

In [48]:
# TODO: We are using for testing the same dataset as for validation, need to change it
predictions = model.predict(X_cls_test.astype('float32'), batch_size=batch_size)
# Compute loss
score = log_loss(y_cls_test, predictions)
print "log loss score: ", score

log loss score:  2.49918918943


Compute accuracy

In [65]:
acc = (np.argmax(predictions, 1) == np.argmax(y_cls_test, 1)).mean()

print "accuracy: ", acc

accuracy:  0.0898760330579
