# The Nature Conservancy Fisheries Monitoring Kaggle Competition

This is an attempt to compete in the above mentioned Kaggle competition using a neural network written in Keras.

In [1]:
# Lets load us some settings!
import json

print "Loading settings..."
with open('SETTINGS.json') as settings_file:
    settings = json.load(settings_file)

# Source directory for your data
source_dir = settings['source_dir']
train_dir = settings['train_dir']

print "Settings loaded!"

Loading settings...
Settings loaded!


In [7]:
# Credit goes to pengpaiSH for getting me started on the right path with this
# Unnecessary to run this codeblock more than once.
import os
import numpy as np
import shutil

np.random.seed(42)

root_train = source_dir + 'fish_train_set'
root_val = source_dir + 'fish_val_set'

root_total = source_dir + train_dir

FishNames = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']

nbr_train_samples = 0
nbr_val_samples = 0

# Training proportion
split_proportion = 0.8

for fish in FishNames:
    if fish not in os.listdir(root_train):
        os.mkdir(os.path.join(root_train, fish))

    total_images = os.listdir(os.path.join(root_total, fish))

    nbr_train = int(len(total_images) * split_proportion)

    np.random.shuffle(total_images)

    train_images = total_images[:nbr_train]

    val_images = total_images[nbr_train:]

    for img in train_images:
        source = os.path.join(root_total, fish, img)
        target = os.path.join(root_train, fish, img)
        shutil.copy(source, target)
        nbr_train_samples += 1

    if fish not in os.listdir(root_val):
        os.mkdir(os.path.join(root_val, fish))

    for img in val_images:
        source = os.path.join(root_total, fish, img)
        target = os.path.join(root_val, fish, img)
        shutil.copy(source, target)
        nbr_val_samples += 1

print('Finish splitting train and val images!')
print('# training samples: {}, # val samples: {}'.format(nbr_train_samples, nbr_val_samples))

Finish splitting train and val images!
# training samples: 3019, # val samples: 758


In [None]:
import os
from keras.layers import Flatten, Dense, Activation, Dropout
from keras.models import Model
from keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.noise import GaussianNoise
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.models import Sequential
from keras.regularizers import l2


root_train = source_dir + 'fish_train_set'
root_val = source_dir + 'fish_val_set'

# Credit goes to pengpaiSH for this section
learning_rate = 0.0001
img_width = 299
img_height = 299
nbr_train_samples = 3019
nbr_validation_samples = 758
nbr_epochs = 25
batch_size = 32

train_data_dir = root_train
val_data_dir = root_val

FishNames = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']

### Finally, this is me.  This is also the guts of the model.
model = Sequential()

### Model 2
model.add(Convolution2D(128, 5, 5, input_shape=(299, 299, 3)))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Activation("relu"))

model.add(Convolution2D(64, 3, 3))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Activation("relu"))

model.add(Convolution2D(32, 3, 3))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Activation("relu"))

model.add(Convolution2D(16, 2, 2))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Activation("relu"))


model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors

model.add(Dense(1024))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(8))
model.add(Activation("softmax"))

model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])


###

# autosave best Model
best_model_file = "./weights.h5"
best_model = ModelCheckpoint(best_model_file, monitor='val_acc', verbose = 1, save_best_only = True)

# Credit goes to pengpaiSH for this section
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.1,
        zoom_range=0.1,
        rotation_range=10.,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True)


val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size = (img_width, img_height),
        batch_size = batch_size,
        shuffle = True,
        classes = FishNames,
        class_mode = 'categorical')

validation_generator = val_datagen.flow_from_directory(
        val_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        shuffle = True,
        classes = FishNames,
        class_mode = 'categorical')

model.fit_generator(
        train_generator,
        samples_per_epoch = nbr_train_samples,
        nb_epoch = nbr_epochs,
        validation_data = validation_generator,
        nb_val_samples = nbr_validation_samples,
        callbacks = [best_model])

Found 3019 images belonging to 8 classes.
Found 758 images belonging to 8 classes.
Epoch 1/25


In [13]:
# Credit goes to pengpaiSH for most of this section

from keras.models import load_model
import os
from keras.preprocessing.image import ImageDataGenerator
import numpy as np


img_width = 299
img_height = 299
batch_size = 32
nbr_test_samples = 1000
nbr_augmentation = 5

FishNames = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']

root_path = './'


weights_path = os.path.join(root_path, 'weights.h5')

test_data_dir = os.path.join(source_dir, 'test_stg1/')

# test data generator for prediction
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
        test_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        shuffle = False, # Important !!!
        classes = None,
        class_mode = None)

test_image_list = test_generator.filenames

print('Loading model and weights from training process ...')
model = load_model(weights_path)

for idx in range(nbr_augmentation):
    print('{}th augmentation for testing ...'.format(idx))
    
    random_seed = idx

    test_generator = test_datagen.flow_from_directory(
            test_data_dir,
            target_size=(img_width, img_height),
            batch_size=batch_size,
            shuffle = False, # Important !!!
            seed = random_seed,
            classes = None,
            class_mode = None)

    test_image_list = test_generator.filenames
    #print('image_list: {}'.format(test_image_list[:10]))
    print('Begin to predict for testing data ...')
    if idx == 0:
        predictions = model.predict_generator(test_generator, nbr_test_samples)
    else:
        predictions += model.predict_generator(test_generator, nbr_test_samples)

predictions /= nbr_augmentation

print('Begin to write submission file ..')
f_submit = open(os.path.join(root_path, 'submit.csv'), 'w')
f_submit.write('image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT\n')
for i, image_name in enumerate(test_image_list):
    pred = ['%.6f' % p for p in predictions[i, :]]
    if i % 100 == 0:
        print('{} / {}'.format(i, nbr_test_samples))
    f_submit.write('%s,%s\n' % (os.path.basename(image_name), ','.join(pred)))

f_submit.close()

print('Submission file successfully generated!')

Found 1000 images belonging to 1 classes.
Loading model and weights from training process ...
Begin to predict for testing data ...
0th augmentation for testing ...
Found 1000 images belonging to 1 classes.
Begin to predict for testing data ...
1th augmentation for testing ...
Found 1000 images belonging to 1 classes.
Begin to predict for testing data ...
2th augmentation for testing ...
Found 1000 images belonging to 1 classes.
Begin to predict for testing data ...
3th augmentation for testing ...
Found 1000 images belonging to 1 classes.
Begin to predict for testing data ...
4th augmentation for testing ...
Found 1000 images belonging to 1 classes.
Begin to predict for testing data ...
Begin to write submission file ..
0 / 1000
100 / 1000
200 / 1000
300 / 1000
400 / 1000
500 / 1000
600 / 1000
700 / 1000
800 / 1000
900 / 1000
Submission file successfully generated!


## Submission History
* 2016-12-10 @ ~22:30: 1.70565 (Worse than sample submission)