In [15]:
%matplotlib inline
from __future__ import print_function
import os,sys
notebook_code_root = os.path.dirname(os.getcwd())
data_root = os.path.dirname(notebook_code_root) + '/data/'
print("Data root: %s" % data_root)
sys.path.insert(0,notebook_code_root)

import cutils
from cutils import *

from keras.layers.core import Reshape
from keras.layers.normalization import BatchNormalization
from keras.utils import to_categorical

from vgg16 import *

opts = NotebookData(data_dir=data_root, sample_mode=False, preprocess=True)
print(opts)

Data root: /home/ubuntu/data/
Options:
  Testing directory: /home/ubuntu/data/statefarm/test/
  Training directory: /home/ubuntu/data/statefarm/train/
  Validation directory: /home/ubuntu/data/statefarm/valid/
  Preprocess directory: /home/ubuntu/data/statefarm/preprocessed/
  Results directory: /home/ubuntu/data/statefarm/results


In [2]:
# Load the VGG model.
vgg = Vgg16()

In [3]:
ls = vgg.model.layers
idx = 0
for i in range(len(ls)):
    if type(ls[i]) is MaxPooling2D:
        idx = i

conv_model = Sequential(layers=ls[:(idx+1)])
fc_layers = ls[(idx+1):]

In [4]:
def get_data(opt):
    tf = None
    vf = None
    tc = None
    vc = None
    trn_dir = os.path.join(opt.data_root, 'preprocessed', 'conv')
    if opt.preprocess:
        gen = image.ImageDataGenerator()
        img_trn_batches = gen.flow_from_directory(opt.train_dir(),
                                                  target_size=(224,224),
                                                  class_mode='categorical',
                                                  shuffle=False,
                                                  batch_size=64)
        img_val_batches = gen.flow_from_directory(opt.valid_dir(),
                                                  target_size=(224,224),
                                                  class_mode='categorical',
                                                  shuffle=False,
                                                  batch_size=64)
        tc = to_categorical(img_trn_batches.classes)
        vc = to_categorical(img_val_batches.classes)
        tf = conv_model.predict_generator(img_trn_batches, verbose=1)
        vf = conv_model.predict_generator(img_val_batches, verbose=1)
        if not os.path.exists(trn_dir):
            os.makedirs(trn_dir)
        save_array(os.path.join(trn_dir, 'train-dt'), tf)
        save_array(os.path.join(trn_dir, 'val-dt'), vf)
        save_array(os.path.join(trn_dir, 'train-cl'), tc)
        save_array(os.path.join(trn_dir, 'val-cl'), vc)
    else:
        tf = load_array(os.path.join(trn_dir, 'train-dt'))
        vf = load_array(os.path.join(trn_dir, 'val-dt'))
        tc = load_array(os.path.join(trn_dir, 'train-cl'))
        vc = load_array(os.path.join(trn_dir, 'val-cl'))
        
    return (tf, vf, tc, vc)

In [5]:
trn_features, val_features, img_trn_classes, img_val_classes = get_data(opts)

Found 17622 images belonging to 10 classes.
Found 4802 images belonging to 10 classes.


In [6]:
def rand_training(trn, img):
    zipped_trn = zip(trn, img)
    np.random.shuffle(zipped_trn)
    trn_data = np.stack([data for data, cl in zipped_trn], axis=0)
    trn_cls = np.stack([cl for data, cl in zipped_trn], axis=0)
    return trn_data, trn_cls

In [67]:
# Create a new model with only dense layers.
fc_model = Sequential([
     # Input shape is shape of the conv result.
     Reshape((25088,), input_shape=(512, 7, 7,)),
     Dense(256, activation="relu"),
     BatchNormalization(),
     Dropout(0.2),
     Dense(128, activation='relu'),
     BatchNormalization(),
     Dropout(0.3),
     Dense(128, activation='relu'),
     BatchNormalization(),
     Dropout(0.5),
     Dense(10, activation='softmax')
    ])

In [68]:
fc_model.compile(optimizer=Adam(lr=0.000001),
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])

In [70]:
for i in range(40):
    training_data, training_classes = rand_training(trn_features, img_trn_classes)
    fc_model.fit(x=training_data,
                 y=training_classes,
                 batch_size=32,
                 validation_data=(val_features,img_val_classes),
                 epochs=2,
                 verbose=1)

Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate on 4802 samples
Epoch 1/2
Epoch 2/2
Train on 17622 samples, validate o

In [72]:
process_model(fc_model, opts, 'v1', sub=True)

Predicting batch at /home/ubuntu/data/statefarm/preprocessed//test/batch0
Predicting batch at /home/ubuntu/data/statefarm/preprocessed//test/batch1
Predicting batch at /home/ubuntu/data/statefarm/preprocessed//test/batch2
Predicting batch at /home/ubuntu/data/statefarm/preprocessed//test/batch3
Predicting batch at /home/ubuntu/data/statefarm/preprocessed//test/batch4
Predicting batch at /home/ubuntu/data/statefarm/preprocessed//test/batch5
Predicting batch at /home/ubuntu/data/statefarm/preprocessed//test/batch6
Predicting batch at /home/ubuntu/data/statefarm/preprocessed//test/batch7
Saved predictions to: /home/ubuntu/data/statefarm/results/v1/preds.dat
Saved filenames to: /home/ubuntu/data/statefarm/results/v1/filenames.dat


In [11]:
from IPython.display import FileLink

FileLink('submission.csv')