In [36]:
%matplotlib inline
from __future__ import print_function
import os,sys
notebook_code_root = os.path.dirname(os.getcwd())
data_root = os.path.dirname(notebook_code_root) + '/data/'
print("Data root: %s" % data_root)
sys.path.insert(0,notebook_code_root)

import cutils
from cutils import *

from keras.layers.core import Reshape
from keras.utils import to_categorical

from vgg16 import *

opts = NotebookData(data_dir=data_root, sample_mode=False, preprocess=False)
print(opts)

Data root: /home/ubuntu/data/
Options:
  Testing directory: /home/ubuntu/data/statefarm/test/
  Training directory: /home/ubuntu/data/statefarm/train/
  Validation directory: /home/ubuntu/data/statefarm/valid/
  Preprocess directory: /home/ubuntu/data/statefarm/preprocessed/
  Results directory: /home/ubuntu/data/statefarm/results


In [37]:
# Load the VGG model.
vgg = Vgg16()

In [38]:
ls = vgg.model.layers
idx = 0
for i in range(len(ls)):
    if type(ls[i]) is MaxPooling2D:
        idx = i

conv_model = Sequential(layers=ls[:(idx+1)])
fc_layers = ls[(idx+1):]

In [39]:
gen = image.ImageDataGenerator()
img_trn_batches = gen.flow_from_directory(opts.train_dir(),
                                          target_size=(224,224),
                                          class_mode='categorical',
                                          shuffle=False,
                                          batch_size=64)
img_val_batches = gen.flow_from_directory(opts.valid_dir(),
                                          target_size=(224,224),
                                          class_mode='categorical',
                                          shuffle=False,
                                          batch_size=64)
img_trn_classes = to_categorical(img_trn_batches.classes)
img_val_classes = to_categorical(img_val_batches.classes)

Found 20181 images belonging to 10 classes.
Found 2243 images belonging to 10 classes.


In [40]:
trn_features = conv_model.predict_generator(img_trn_batches, verbose=1)
val_features = conv_model.predict_generator(img_val_batches, verbose=1)



In [41]:
trn_dir = os.path.join(opts.data_root, 'preprocessed', 'conv')
if not os.path.exists(trn_dir):
    os.makedirs(trn_dir)
save_array(os.path.join(trn_dir, 'train-dt'), trn_features)
save_array(os.path.join(trn_dir, 'val-dt'), val_features)
save_array(os.path.join(trn_dir, 'train-cl'), img_trn_classes)
save_array(os.path.join(trn_dir, 'val-cl'), img_val_classes)

In [42]:
zipped_trn = zip(trn_features, img_trn_classes)
np.random.shuffle(zipped_trn)
training_data = np.stack([data for data, cl in zipped_trn], axis=0)
training_classes = np.stack([cl for data, cl in zipped_trn], axis=0)

In [43]:
# Create a new model with only dense layers. Because it is based on
# Vgg16, create the same number of dense layers, but without dropout
# (or more specifically, with zero dropout).
fc_model = Sequential([
     # Input shape is shape of the conv result.
     Reshape((25088,), input_shape=(512, 7, 7,)),
     Dense(4096, activation="relu"),
     Dense(4096, activation='relu'),
     Dense(10, activation='softmax')
    ])

In [44]:
fc_model.compile(optimizer=Adam(lr=0.000001),
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])

In [45]:
fc_model.fit(x=training_data,
             y=training_classes,
             batch_size=32,
             validation_data=(val_features,img_val_classes),
             epochs=20,
             verbose=1)

Train on 20181 samples, validate on 2243 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f2bbf13b310>

In [49]:
total_model_layers = conv_model.layers + fc_model.layers
final_model = Sequential(total_model_layers)

In [53]:
reload(cutils)
process_model(final_model, opts, 'v5', sub=True)

{'disp': <function disp at 0x7f2c2d56ade8>, 'union1d': <function union1d at 0x7f2c2d523848>, 'all': <function all at 0x7f2c2d985230>, 'issubsctype': <function issubsctype at 0x7f2c2d959d70>, 'savez': <function savez at 0x7f2c2d0940c8>, 'atleast_2d': <function atleast_2d at 0x7f2c2d5cec80>, 'restoredot': <function restoredot at 0x7f2c2d974578>, 'ptp': <function ptp at 0x7f2c2d985398>, 'PackageLoader': <class 'numpy._import_tools.PackageLoader'>, 'ix_': <function ix_ at 0x7f2c2d4f2a28>, 'mirr': <function mirr at 0x7f2c2d0970c8>, 'blackman': <function blackman at 0x7f2c2d562488>, 'FLOATING_POINT_SUPPORT': 1, 'division': _Feature((2, 2, 0, 'alpha', 2), (3, 0, 0, 'alpha', 0), 8192), 'busdaycalendar': <type 'numpy.busdaycalendar'>, 'pkgload': <function pkgload at 0x7f2c3c3e1230>, 'void': <type 'numpy.void'>, 'ubyte': <type 'numpy.uint8'>, 'moveaxis': <function moveaxis at 0x7f2c2d9747d0>, 'ERR_RAISE': 2, 'void0': <type 'numpy.void'>, 'tri': <function tri at 0x7f2c2d556b90>, 'diag_indices': <

NameError: global name 'FileLink' is not defined

In [54]:
from IPython.display import FileLink

FileLink('submission.csv')