In [1]:
%matplotlib inline
from __future__ import print_function
import os,sys
notebook_code_root = os.path.dirname(os.getcwd())
data_root = os.path.dirname(notebook_code_root) + '/data/'
print("Data root: %s" % data_root)
sys.path.insert(0,notebook_code_root)

import cutils
from cutils import *

from keras.layers.core import Reshape
from keras.layers.normalization import BatchNormalization
from keras.utils import to_categorical

import math

from vgg16 import *

opts = NotebookData(data_dir=data_root, sample_mode=False, preprocess=False)
print(opts)

Data root: /home/ubuntu/data/


Using Theano backend.
Using cuDNN version 5103 on context None
Mapped name None to device cuda: Tesla K80 (0000:00:1E.0)


Options:
  Testing directory: /home/ubuntu/data/statefarm/test/
  Training directory: /home/ubuntu/data/statefarm/train/
  Validation directory: /home/ubuntu/data/statefarm/valid/
  Preprocess directory: /home/ubuntu/data/statefarm/preprocessed/
  Results directory: /home/ubuntu/data/statefarm/results


In [2]:
# Load the VGG model.
vgg = Vgg16()

In [3]:
ls = vgg.model.layers
idx = 0
for i in range(len(ls)):
    if type(ls[i]) is MaxPooling2D:
        idx = i

conv_model = Sequential(layers=ls[:(idx+1)])
fc_layers = ls[(idx+1):]

In [4]:
def get_data(opt):
    tf = None
    vf = None
    tc = None
    vc = None
    trn_dir = os.path.join(opt.data_root, 'preprocessed', 'conv')
    tsize = (224,224)
    cmode = 'categorical'
    if opt.preprocess:
        # Create default values.
        gen = image.ImageDataGenerator()
        img_trn_batches = gen.flow_from_directory(opt.train_dir(),
                                                  target_size=tsize,
                                                  class_mode=cmode,
                                                  shuffle=False,
                                                  batch_size=64)
        img_val_batches = gen.flow_from_directory(opt.valid_dir(),
                                                  target_size=tsize,
                                                  class_mode=cmode,
                                                  shuffle=False,
                                                  batch_size=64)
        tc = to_categorical(img_trn_batches.classes)
        vc = to_categorical(img_val_batches.classes)
        tf_d = conv_model.predict_generator(img_trn_batches, verbose=1)
        vf = conv_model.predict_generator(img_val_batches, verbose=1)
        if not os.path.exists(trn_dir):
            os.makedirs(trn_dir)
        save_array(os.path.join(trn_dir, 'train-d-dt'), tf_d)
        save_array(os.path.join(trn_dir, 'val-dt'), vf)
        save_array(os.path.join(trn_dir, 'train-cl'), tc)
        save_array(os.path.join(trn_dir, 'val-cl'), vc)
        
        # Create transformed values.
        gen_t = image.ImageDataGenerator(rotation_range=15,
                                         width_shift_range=0.1,
                                         height_shift_range=0.1,
                                         shear_range=0.16)
        img_trn_batches_t = gen_t.flow_from_directory(opt.train_dir(),
                                                       target_size=tsize,
                                                       class_mode = cmode,
                                                       shuffle=False,
                                                       batch_size=64)
        tf_t = conv_model.predict_generator(img_trn_batches_t,
                                            steps=int(math.ceil(img_trn_batches_t.n / 64.0)) * 5,
                                            verbose=1)
        save_array(os.path.join(trn_dir, 'train-t-dt'), tf_t)
        tf = np.concatenate([tf_d, tf_t])
        tc = np.concatenate([tc] * 6)
        if tf.shape[0] != tc.shape[0]:
            raise ValueError
        
    else:
        tf_d = load_array(os.path.join(trn_dir, 'train-d-dt'))
        tf_t = load_array(os.path.join(trn_dir, 'train-t-dt'))
        vf = load_array(os.path.join(trn_dir, 'val-dt'))
        tc = load_array(os.path.join(trn_dir, 'train-cl'))
        vc = load_array(os.path.join(trn_dir, 'val-cl'))
        tf = np.concatenate([tf_d, tf_t])
        tc = np.concatenate([tc] * 6)
        
    return (tf, vf, tc, vc)

In [5]:
trn_features, val_features, img_trn_classes, img_val_classes = get_data(opts)
zipped_trn = zip(trn_features, img_trn_classes)
trn_features = None
img_trn_classes = None

In [6]:
def rand_training(zipped_trn):
    np.random.shuffle(zipped_trn)
    trn_data = np.stack([data for data, cl in zipped_trn], axis=0)
    trn_cls = np.stack([cl for data, cl in zipped_trn], axis=0)
    return trn_data, trn_cls

In [12]:
# Create a new model with only dense layers.
fc_model = Sequential([
     # Input shape is shape of the conv result.
     Reshape((25088,), input_shape=(512, 7, 7,)),
     Dense(10, activation="relu"),
     BatchNormalization(),
     Dropout(0.15),
     Dense(256, activation="relu"),
     BatchNormalization(),
     Dropout(0.6),
     Dense(10, activation='softmax')
    ])

In [13]:
fc_model.compile(optimizer=Adam(lr=0.0001),
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])

In [14]:
for i in range(1):
    print('\nIteration %d' % (i+1))
    training_data, training_classes = rand_training(zipped_trn)
    fc_model.fit(x=training_data,
                 y=training_classes,
                 batch_size=256,
                 validation_data=(val_features,img_val_classes),
                 epochs=5,
                 verbose=1)


Iteration 1
Train on 115056 samples, validate on 3248 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
process_model(fc_model, opts, 'v1', sub=True)

Predicting batch at /home/ubuntu/data/statefarm/preprocessed//test/batch0
Predicting batch at /home/ubuntu/data/statefarm/preprocessed//test/batch1
Predicting batch at /home/ubuntu/data/statefarm/preprocessed//test/batch2


In [11]:
from IPython.display import FileLink

FileLink('submission.csv')