In [1]:
import numpy as np
import keras

import os
import glob
import cv2
import datetime
import pandas as pd
import time
import warnings

warnings.filterwarnings("ignore")

from sklearn.cross_validation import KFold
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from sklearn.metrics import log_loss
from keras import __version__ as keras_version
from keras.callbacks import EarlyStopping

import data_set
%load_ext autoreload
%autoreload 2



Using TensorFlow backend.


In [2]:
keras_version

'2.0.1'

In [3]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

In [4]:
FLAGS = dict()
FLAGS['width'] = 32
FLAGS['height'] = 32
FLAGS['batch_size'] = 20
FLAGS['kernel_1_out'] = 8
FLAGS['kernel_2_out'] = 8
FLAGS['conv2_input_width'] = 16
FLAGS['conv2_input_height'] = 16
FLAGS['n_classes'] = 8
FLAGS['learning_rate'] = 0.001
FLAGS['batch_size'] = 16
FLAGS['n_epochs'] = 20
FLAGS['train_report_step'] = 20
FLAGS['val_report_step'] = 80
FLAGS['keep_prob'] = 0.75
FLAGS['reg'] = 0.01
FLAGS['patience'] = 3

In [5]:
data = data_set.DataSet(width = FLAGS['width'], height=FLAGS['height'])

Read train images
Load folder ALB (Index: 0)
Load folder BET (Index: 1)
Load folder DOL (Index: 2)
Load folder LAG (Index: 3)
Load folder NoF (Index: 4)
Load folder OTHER (Index: 5)
Load folder SHARK (Index: 6)
Load folder YFT (Index: 7)
Read train data time: 469.97 seconds


In [6]:
X_val, y_val = data.get_validation_set()

In [14]:
def create_model():
    model = Sequential()
    model.add(ZeroPadding2D((1, 1), input_shape=(32, 32, 3)))
    model.add(Convolution2D(4, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(4, 3, 3, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(8, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(8, 3, 3, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(8, activation='softmax'))

    sgd = SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss='categorical_crossentropy')

    return model


In [15]:
model = create_model()

In [16]:
datagen = ImageDataGenerator(horizontal_flip=True,
                            rotation_range=40,
                            fill_mode='nearest')

In [10]:
val_generator = ImageDataGenerator()
val_gen_flow = val_generator.flow(X_val, y_val, batch_size=FLAGS['batch_size'])

In [11]:
train_gen = datagen.flow(data.X_train, 
                                 data.y_train,
                                 shuffle=True,
                                 batch_size=FLAGS['batch_size'])


In [12]:
steps_per_epoch = data.X_train.shape[0]//FLAGS['batch_size']
val_steps = X_val.shape[0]//FLAGS['batch_size']
callbacks = [EarlyStopping(monitor='val_loss', patience=5, verbose=0)]
model.fit_generator(train_gen, 
                    steps_per_epoch= steps_per_epoch,                                 
                    nb_epoch=20,
                    validation_data=val_gen_flow,
                    validation_steps=val_steps,
                    callbacks = callbacks)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20


<keras.callbacks.History at 0x12f3f2630>

In [13]:
datagen.flow?

In [22]:
batch_per_epoch = data._num_examples / FLAGS['batch_size']
for e in range(FLAGS['n_epochs']):
    print('Epoch', e)
    batches = 0
    #data.shuffle()
    loss = 0
    perm0 = np.arange(data._num_examples)
    np.random.shuffle(perm0)
    data._X_train = data._X_train[perm0]
    data._y_train = data._y_train[perm0]
    
    for X_batch, y_batch in datagen.flow(data.X_train, 
                                 data.y_train,
                                 shuffle=True,
                                 batch_size=FLAGS['batch_size']):
        loss += model.train_on_batch(X_batch, y_batch)
        batches += 1
        if batches >= batch_per_epoch:
            print('avg. loss for epoch {}:{}'.format(e, loss/batch_per_epoch))
            break


Epoch 0
avg. loss for epoch 0:1.6692178354323286
Epoch 1
avg. loss for epoch 1:1.610673231385631
Epoch 2
avg. loss for epoch 2:1.60135467034308
Epoch 3
avg. loss for epoch 3:1.6017817150319267
Epoch 4
avg. loss for epoch 4:1.5989781152093225
Epoch 5
avg. loss for epoch 5:1.5963131842412763
Epoch 6
avg. loss for epoch 6:1.5965758575899085
Epoch 7
avg. loss for epoch 7:1.5968559774015567
Epoch 8
avg. loss for epoch 8:1.595537757684126
Epoch 9
avg. loss for epoch 9:1.595895082498378
Epoch 10
avg. loss for epoch 10:1.5944840542649008
Epoch 11
avg. loss for epoch 11:1.5953085563783416
Epoch 12
avg. loss for epoch 12:1.5928869957720275
Epoch 13
avg. loss for epoch 13:1.5948519143234299
Epoch 14
avg. loss for epoch 14:1.593489867888788
Epoch 15
avg. loss for epoch 15:1.5941285840804433
Epoch 16
avg. loss for epoch 16:1.5939951861626815
Epoch 17
avg. loss for epoch 17:1.5942977556760878
Epoch 18
avg. loss for epoch 18:1.5935194160085133
Epoch 19
avg. loss for epoch 19:1.5934625731048029


In [24]:
import load_images
X_test, y_test = load_images.load_test(width = FLAGS['width'], 
                                                      height=FLAGS['height'])

loading 0 of 1000
loading 20 of 1000
loading 40 of 1000
loading 60 of 1000
loading 80 of 1000
loading 100 of 1000
loading 120 of 1000
loading 140 of 1000
loading 160 of 1000
loading 180 of 1000
loading 200 of 1000
loading 220 of 1000
loading 240 of 1000
loading 260 of 1000
loading 280 of 1000
loading 300 of 1000
loading 320 of 1000
loading 340 of 1000
loading 360 of 1000
loading 380 of 1000
loading 400 of 1000
loading 420 of 1000
loading 440 of 1000
loading 460 of 1000
loading 480 of 1000
loading 500 of 1000
loading 520 of 1000
loading 540 of 1000
loading 560 of 1000
loading 580 of 1000
loading 600 of 1000
loading 620 of 1000
loading 640 of 1000
loading 660 of 1000
loading 680 of 1000
loading 700 of 1000
loading 720 of 1000
loading 740 of 1000
loading 760 of 1000
loading 780 of 1000
loading 800 of 1000
loading 820 of 1000
loading 840 of 1000
loading 860 of 1000
loading 880 of 1000
loading 900 of 1000
loading 920 of 1000
loading 940 of 1000
loading 960 of 1000
loading 980 of 1000


In [25]:
preds = model.predict(X_test)

In [26]:
import make_submission
submit = make_submission.makeSubmission(preds, y_test)

In [27]:
submit.tail(20)

Unnamed: 0,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT,image
980,0.448957,0.053299,0.033279,0.01905,0.136176,0.076786,0.042877,0.189576,img_07663.jpg
981,0.448957,0.053299,0.033279,0.01905,0.136176,0.076786,0.042877,0.189576,img_07678.jpg
982,0.448957,0.053299,0.033279,0.01905,0.136176,0.076786,0.042877,0.189576,img_07689.jpg
983,0.448957,0.053299,0.033279,0.01905,0.136176,0.076786,0.042877,0.189576,img_07700.jpg
984,0.448957,0.053299,0.033279,0.01905,0.136176,0.076786,0.042877,0.189576,img_07717.jpg
985,0.448957,0.053299,0.033279,0.01905,0.136176,0.076786,0.042877,0.189576,img_07746.jpg
986,0.448957,0.053299,0.033279,0.01905,0.136176,0.076786,0.042877,0.189576,img_07757.jpg
987,0.448957,0.053299,0.033279,0.01905,0.136176,0.076786,0.042877,0.189576,img_07792.jpg
988,0.448957,0.053299,0.033279,0.01905,0.136176,0.076786,0.042877,0.189576,img_07799.jpg
989,0.448957,0.053299,0.033279,0.01905,0.136176,0.076786,0.042877,0.189576,img_07818.jpg


In [18]:
preds = model.predict(X_test, batch_size=FLAGS['batch_size'], verbose=2)


In [24]:
import make_submission
submit = make_submission.makeSubmission(preds,y_test)

In [27]:
submit.to_csv('keras_32_32.csv', index=False)