In [1]:
%matplotlib inline
from __future__ import print_function
import os,sys
notebook_code_root = os.path.dirname(os.getcwd())
data_root = os.path.dirname(notebook_code_root) + '/data/'
print("Data root: %s" % data_root)
sys.path.insert(0,notebook_code_root)

import cutils
from cutils import *

from keras.layers.core import Reshape
from keras.utils import to_categorical

from vgg16 import *

opts = NotebookData(data_dir=data_root, sample_mode=False, preprocess=False)
print(opts)

Data root: /home/ubuntu/data/


Using Theano backend.


Options:
  Testing directory: /home/ubuntu/data/statefarm/test/
  Training directory: /home/ubuntu/data/statefarm/train/
  Validation directory: /home/ubuntu/data/statefarm/valid/
  Preprocess directory: /home/ubuntu/data/statefarm/preprocessed/
  Results directory: /home/ubuntu/data/statefarm/results


Using cuDNN version 5103 on context None
Mapped name None to device cuda: Tesla K80 (0000:00:1E.0)


In [2]:
# Load the VGG model.
vgg = Vgg16()

In [3]:
ls = vgg.model.layers
idx = 0
for i in range(len(ls)):
    if type(ls[i]) is MaxPooling2D:
        idx = i

conv_model = Sequential(layers=ls[:(idx+1)])
fc_layers = ls[(idx+1):]

In [5]:
def get_data(opt):
    tf = None
    vf = None
    tc = None
    vc = None
    trn_dir = os.path.join(opt.data_root, 'preprocessed', 'conv')
    if opt.preprocess:
        gen = image.ImageDataGenerator()
        img_trn_batches = gen.flow_from_directory(opt.train_dir(),
                                                  target_size=(224,224),
                                                  class_mode='categorical',
                                                  shuffle=False,
                                                  batch_size=64)
        img_val_batches = gen.flow_from_directory(opt.valid_dir(),
                                                  target_size=(224,224),
                                                  class_mode='categorical',
                                                  shuffle=False,
                                                  batch_size=64)
        tc = to_categorical(img_trn_batches.classes)
        vc = to_categorical(img_val_batches.classes)
        tf = conv_model.predict_generator(img_trn_batches, verbose=1)
        vf = conv_model.predict_generator(img_val_batches, verbose=1)
        if not os.path.exists(trn_dir):
            os.makedirs(trn_dir)
        save_array(os.path.join(trn_dir, 'train-dt'), tf)
        save_array(os.path.join(trn_dir, 'val-dt'), vf)
        save_array(os.path.join(trn_dir, 'train-cl'), tc)
        save_array(os.path.join(trn_dir, 'val-cl'), vc)
    else:
        tf = load_array(os.path.join(trn_dir, 'train-dt'))
        vf = load_array(os.path.join(trn_dir, 'val-dt'))
        tc = load_array(os.path.join(trn_dir, 'train-cl'))
        vc = load_array(os.path.join(trn_dir, 'val-cl'))
        
    return (tf, vf, tc, vc)

In [6]:
trn_features, val_features, img_trn_classes, img_val_classes = get_data(opts)

In [7]:
zipped_trn = zip(trn_features, img_trn_classes)
np.random.shuffle(zipped_trn)
training_data = np.stack([data for data, cl in zipped_trn], axis=0)
training_classes = np.stack([cl for data, cl in zipped_trn], axis=0)

In [12]:
# Create a new model with only dense layers. Because it is based on
# Vgg16, create the same number of dense layers, but without dropout
# (or more specifically, with zero dropout).
fc_model = Sequential([
     # Input shape is shape of the conv result.
     Reshape((25088,), input_shape=(512, 7, 7,)),
     Dense(4096, activation="relu"),
     Dropout(0.1),
     Dense(4096, activation='relu'),
     Dropout(0.2),
     Dense(10, activation='softmax')
    ])

In [13]:
fc_model.compile(optimizer=Adam(lr=0.000001),
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])

In [16]:
fc_model.fit(x=training_data,
             y=training_classes,
             batch_size=32,
             validation_data=(val_features,img_val_classes),
             epochs=3,
             verbose=1)

Train on 20181 samples, validate on 2243 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7efd1b2b5090>

In [17]:
total_model_layers = conv_model.layers + fc_model.layers
final_model = Sequential(total_model_layers)

In [18]:
process_model(final_model, opts, 'v6', sub=True)

{'disp': <function disp at 0x7efe74040de8>, 'union1d': <function union1d at 0x7efe6c270848>, 'all': <function all at 0x7efe74103230>, 'issubsctype': <function issubsctype at 0x7efe740d7d70>, 'savez': <function savez at 0x7efe57da90c8>, 'atleast_2d': <function atleast_2d at 0x7efe740a4c80>, 'restoredot': <function restoredot at 0x7efe740f2578>, 'ptp': <function ptp at 0x7efe74103398>, 'PackageLoader': <class 'numpy._import_tools.PackageLoader'>, 'ix_': <function ix_ at 0x7efe6c240a28>, 'mirr': <function mirr at 0x7efe57dac0c8>, 'blackman': <function blackman at 0x7efe74038488>, 'FLOATING_POINT_SUPPORT': 1, 'division': _Feature((2, 2, 0, 'alpha', 2), (3, 0, 0, 'alpha', 0), 8192), 'busdaycalendar': <type 'numpy.busdaycalendar'>, 'pkgload': <function pkgload at 0x7efe74107230>, 'void': <type 'numpy.void'>, 'ubyte': <type 'numpy.uint8'>, 'moveaxis': <function moveaxis at 0x7efe740f27d0>, 'ERR_RAISE': 2, 'void0': <type 'numpy.void'>, 'tri': <function tri at 0x7efe7402cb90>, 'diag_indices': <

In [19]:
from IPython.display import FileLink

FileLink('submission.csv')