# Setup directories

In [None]:
working_directory = "/Users/johannes/Dropbox/Projekte/Kaggle Fish challenge/code/"
data_path = "/Users/johannes/Dropbox/Projekte/Kaggle Fish challenge/"
train_path = data_path+'train/'
val_path = data_path+'validation/'
test_path = data_path+'test_stg1'

os.chdir(working_directory)

# Setup

In [1]:
#### Setup ####

from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom

import pandas as pd

import keras
from keras import backend as K
from keras.utils.data_utils import get_file
from keras.models import Sequential, Model
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers import Input
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD, RMSprop
from keras.preprocessing import image

from __future__ import division,print_function

import os, json, importlib
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)

import matplotlib


Using Theano backend.


# Function definitions

In [2]:
# Mean of each channel as provided by VGG researchers
vgg_mean = np.array([123.68, 116.779, 103.939]).reshape((3,1,1))

def vgg_preprocess(x):
    x = x - vgg_mean     # subtract mean
    return x[:, ::-1]    # reverse axis bgr->rgb
    
def ConvBlock(layers, model, filters):
    for i in range(layers): 
        model.add(ZeroPadding2D((1,1)))
        model.add(Convolution2D(filters, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

def FCBlock(model):
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    
def VGG_16():
    model = Sequential()
    model.add(Lambda(vgg_preprocess, input_shape=(3,224,224)))

    ConvBlock(2, model, 64)
    ConvBlock(2, model, 128)
    ConvBlock(3, model, 256)
    ConvBlock(3, model, 512)
    ConvBlock(3, model, 512)

    model.add(Flatten())
    FCBlock(model)
    FCBlock(model)
    model.add(Dense(1000, activation='softmax'))
    return model

# Finetuning
def fit_model(model, batches, val_batches, nb_epoch=1):
    model.fit_generator(batches, samples_per_epoch=batches.N, nb_epoch=nb_epoch, 
                        validation_data=val_batches, nb_val_samples=val_batches.N)
    
def pred_batch(imgs):
    preds = model.predict(imgs)
    idxs = np.argmax(preds, axis=1)

    print('Shape: {}'.format(preds.shape))
    #print('First 5 classes: {}'.format(classes[:5]))
    #print('First 5 probabilities: {}\n'.format(preds[0, :5]))
    print('Predictions prob/class: ')
    
    for i in range(len(idxs)):
        idx = idxs[i]
        print ('  {:.4f}/{}'.format(preds[i, idx], classes[idx]))

# Model setup

In [None]:
fish = VGG_16()

# Get weights from fast.ai or local directory 
#FILES_PATH = 'http://www.platform.ai/models/'
#fpath = get_file('vgg16.h5', FILES_PATH+'vgg16.h5', cache_subdir='models')
fish.load_weights(data_path+'vgg16_weights/'+'vgg16.h5')

# Replace last layer by dropout and 8 class output layer
fish.pop()
for layer in fish.layers: layer.trainable=False
fish.add(Dropout(0.4))
fish.add(Dense(8, activation='softmax'))
# Compile new model
opt = keras.optimizers.Adam(lr = 0.001)
# categorical_crossentropy is equal to multiclass logloss
fish.compile(optimizer = opt, loss='categorical_crossentropy', metrics=['accuracy'])
fish.summary()

# Finetune last model layer

In [3]:
batch_size = 8

# Define function to create a batch generator
def get_batches(path, gen=image.ImageDataGenerator(), shuffle=True, batch_size=8, class_mode='categorical'):
    return gen.flow_from_directory(path, target_size=(224,224),
            class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)


generator = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, 
                shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
# Get and pre-process data
train_batches = get_batches(train_path, gen = generator, shuffle=True, batch_size=batch_size)
val_batches = get_batches(val_path, gen = generator, shuffle=True, batch_size=batch_size)

# Finetune the model
fish.model.optimizer.lr = 0.001
fish.model.fit_generator(train_batches, samples_per_epoch=train_batches.nb_sample, nb_epoch=5,
                    validation_data=val_batches, nb_val_samples=val_batches.nb_sample)


fish.save_weights(data_path+'fish_finetune2.h5')
fish.load_weights(data_path+'fish_finetune2.h5')

NameError: name 'train_path' is not defined

# Make predictions

test_batches = get_batches(test_path, shuffle=False, batch_size=batch_size, class_mode=None)
predictions = fish.model.predict_generator(test_batches, test_batches.nb_sample)
predictions_clipped = pd.DataFrame(predictions.clip(min=0.05, max=0.95))
predictions_clipped.columns = train_batches.class_indices.keys()

# Make and upload submission

In [4]:
filenames = test_batches.filenames
ids = [f[f.find('/')+1:] for f in filenames]
ids = pd.DataFrame(ids)

predictions_clipped.insert(0, "image",ids)
predictions_clipped.to_csv(data_path + "/submissions/sample_submission3.csv", index=False)

NameError: name 'test_batches' is not defined