## Train kaggle fish model

A tranfer learning model build upon InceptionV3 for the competition

https://www.kaggle.com/c/the-nature-conservancy-fisheries-monitoring

The kaggle data is downloaded to data/train.

In [None]:
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Conv2D, BatchNormalization
from keras import backend as K

def create_dir_if_not_exist(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

In [None]:
## Move files into train and test folder ##
from shutil import copyfile
import os
if False:    
    %rm data/train -r
    %rm data/validation -r
    train_ratio = 0.7
    raw_path = 'data/raw/'
    train_path = 'data/train/'
    val_path = 'data/validation/'

    categories = os.listdir(raw_path)

    for cat in categories:
        create_dir_if_not_exist(train_path+cat)
        create_dir_if_not_exist(val_path+cat)

        images = os.listdir(raw_path+cat)
        for im in images:
            if np.random.rand() < train_ratio:
                copyfile(raw_path+cat+"/"+im, train_path+cat+"/"+im)
            else:
                copyfile(raw_path+cat+"/"+im, val_path+cat+"/"+im)

In [None]:
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))


In [None]:
from keras.layers.core import Dropout
for layer in base_model.layers:
    layer.trainable = False
    
## ADD FINAL LAYER
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(2048, activation='relu')(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(8, activation='softmax')(x)

# this is the model we will train
model = Model(input=base_model.input, output=predictions)

In [None]:
### DATA GENERATOR ###
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
        rescale=1./255
        )

train_generator = train_datagen.flow_from_directory(
    'data/train',
    target_size = (299,299),
    batch_size=32,
    class_mode="categorical"
)

validation_generator = train_datagen.flow_from_directory(
    'data/validation/',
    target_size = (299,299),
    batch_size=32,
    class_mode="categorical"
)

In [None]:
## TEST ONE PICTURE ##
from keras.preprocessing.image import load_img, img_to_array
from matplotlib.pyplot import imshow
%matplotlib inline
im = load_img('data/train/ALB/img_00029.jpg')
imshow(im)
img_to_array(im).shape

In [None]:
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, CSVLogger
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3, epsilon = 0.01,
                  patience=2, min_lr=0.00001, verbose = 1)

#checkpoint = ModelCheckpoint("model_checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5", 
#                             monitor=['val_loss','accuracy'])

csv_logger = CSVLogger("training.log", separator=',', append=True)


## Train our new layers while freezing the others

In [None]:

for layer in base_model.layers:
    layer.trainable = False
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])


model.fit_generator(
        train_generator,
    validation_data = validation_generator,
    samples_per_epoch=2000,
    nb_val_samples = 500,
    nb_epoch=5,
    callbacks = [reduce_lr,checkpoint, csv_logger])

## Train more layers as well

In [None]:
for layer in model.layers[:172]:
   layer.trainable = False
for layer in model.layers[172:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy',metrics=['accuracy'])

model.fit_generator(
        train_generator,
    validation_data = validation_generator,
    samples_per_epoch=2000,
    nb_val_samples = 500,
    nb_epoch=5,
    callbacks = [reduce_lr,checkpoint, csv_logger])

In [None]:
test_path = 'data/test_stg1/'
test_images = os.listdir(test_path)

preds = list()
names = list()
for im in test_images:
    x = img_to_array(load_img(test_path+im,target_size=(299, 299)))
    x = np.expand_dims(x, axis=0)/255.
    r = model.predict(x)
    names.append(im)
    preds.append(r)

In [None]:
import pandas as pd
classPreds = pd.DataFrame(np.concatenate(preds), 
                          columns=['ALB','BET','DOL','LAG','NoF','OTHER','SHARK','YFT']).clip(0.2,0.8)
classPreds['image'] = names

arrCol = ['image','ALB','BET','DOL','LAG','NoF','OTHER','SHARK','YFT']

classPreds[arrCol].to_csv("submission_clipped.csv",index=False)