In [10]:
import os, random
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns
%matplotlib inline 

from keras.models import Sequential
from keras.layers import Convolution2D, BatchNormalization, LeakyReLU, AveragePooling2D, Flatten, Dropout, Dense
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from keras import backend as K

TRAIN_DIR = '../data/train/'
TEST_DIR = '../data/test_stg1/'
FISH_CLASSES = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']
ROWS = 256
COLS = 256

In [None]:
if os.path.exists('data_train_{}_{}.pickle'.format(ROWS, COLS)):
    print ('Exist data_train_{}_{}.pickle. Loading from data file.'.format(ROWS, COLS))
    with open('data_train_{}_{}.pickle'.format(ROWS, COLS), 'rb') as f:
        data_train = pickle.load(f)
    x_train = data_train['x_train']
    y_train = data_train['y_train']
else:
    print ('Generating data_train_{}_{}.pickle.'.format(ROWS, COLS))

    def get_images(fish):
        """Load files from train folder"""
        fish_dir = TRAIN_DIR+'{}'.format(fish)
        images = [fish+'/'+im for im in os.listdir(fish_dir)]
        return images

    def read_image(src):
        """Read and resize individual images"""
        im = Image.open(src)
        im = im.resize((COLS, ROWS), Image.BILINEAR)
        im = np.asarray(im)
        return im

    files = []
    y_train = []

    for fish in FISH_CLASSES:
        fish_files = get_images(fish)
        files.extend(fish_files)

        y_fish = np.tile(fish, len(fish_files))
        y_train.extend(y_fish)
        #print("{0} photos of {1}".format(len(fish_files), fish))

    y_train = np.array(y_train)
    x_train = np.ndarray((len(files), ROWS, COLS, 3), dtype=np.float32)

    for i, im in enumerate(files): 
        x_train[i] = read_image(TRAIN_DIR+im)
        if i%1000 == 0: print('Processed {} of {}'.format(i, len(files)))

    x_train = x_train / 255
    #print(x_train.shape)

    # One Hot Encoding Labels
    y_train = LabelEncoder().fit_transform(y_train)
    y_train = np_utils.to_categorical(y_train)

    #save data to file
    import pickle

    data_train = {'x_train': x_train,'y_train': y_train }

    with open('data_train_{}_{}.pickle'.format(ROWS, COLS), 'wb') as f:
        pickle.dump(data_train, f)

#x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, random_state=22, stratify=y_all)

Generating data_train_256_256.pickle.
Processed 0 of 3777
Processed 1000 of 3777
Processed 2000 of 3777


In [9]:
print X_all

[[[[253 254 253]
   [253 254 252]
   [252 254 252]
   ..., 
   [ 98  93  90]
   [ 98  93  89]
   [ 99  94  90]]

  [[228 228 224]
   [187 187 189]
   [197 195 194]
   ..., 
   [100  95  91]
   [101  96  92]
   [102  97  93]]

  [[217 215 210]
   [164 164 165]
   [184 182 184]
   ..., 
   [101  96  92]
   [102  97  93]
   [103  98  94]]

  ..., 
  [[ 46  55  70]
   [ 51  60  75]
   [ 67  76  91]
   ..., 
   [ 57  66  85]
   [ 50  60  79]
   [ 49  59  81]]

  [[ 46  55  69]
   [ 49  59  72]
   [ 63  72  86]
   ..., 
   [ 63  72  81]
   [ 57  66  79]
   [ 53  61  80]]

  [[ 46  56  68]
   [ 48  58  70]
   [ 56  66  78]
   ..., 
   [ 57  70  77]
   [ 61  71  84]
   [ 61  68  89]]]


 [[[253 254 253]
   [253 254 252]
   [252 254 252]
   ..., 
   [253 252 251]
   [252 252 252]
   [252 252 252]]

  [[228 228 224]
   [187 187 189]
   [197 195 194]
   ..., 
   [237 241 222]
   [248 251 237]
   [250 253 241]]

  [[217 215 210]
   [164 164 165]
   [184 182 184]
   ..., 
   [105 115 103]
   [137 1

In [None]:
optimizer = Adam(lr=1e-4, decay=0.0)

def create_model():
    model = Sequential()

    model.add(Convolution2D(32, 3, 3, border_mode='same', dim_ordering='tf'), input_shape=(ROWS, COLS, 3))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Convolution2D(32, 3, 3, border_mode='same', dim_ordering='tf'), subsample=(2, 2))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    
    model.add(Convolution2D(64, 3, 3, border_mode='same', dim_ordering='tf'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Convolution2D(64, 3, 3, border_mode='same', dim_ordering='tf'), subsample=(2, 2))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))

    model.add(Convolution2D(128, 3, 3, border_mode='same', dim_ordering='tf'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Convolution2D(128, 3, 3, border_mode='same', dim_ordering='tf'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Convolution2D(128, 3, 3, border_mode='same', dim_ordering='tf'), subsample=(2, 2))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    
    model.add(Convolution2D(256, 3, 3, border_mode='same', dim_ordering='tf'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Convolution2D(256, 3, 3, border_mode='same', dim_ordering='tf'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Convolution2D(256, 3, 3, border_mode='same', dim_ordering='tf'), subsample=(2, 2))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    
    model.add(Convolution2D(256, 3, 3, border_mode='same', dim_ordering='tf'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Convolution2D(256, 3, 3, border_mode='same', dim_ordering='tf'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Convolution2D(256, 3, 3, border_mode='same', dim_ordering='tf'), subsample=(2, 2))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    
    model.add(AveragePooling2D(pool_size=(7, 7), dim_ordering='tf'))
    model.add(Flatten())
    model.add(Dense(len(FISH_CLASSES), activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=categorical_accuracy)
    return model

In [None]:
datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

datagen.fit(X_train)