# Setup

## Imports

In [29]:
from keras.models import Sequential, Model
from keras.applications import VGG16
from keras.layers import (Dense, BatchNormalization, Convolution2D, 
                         MaxPooling2D, Dropout, Input, Flatten)
from keras.optimizers import Adam
from keras_utilities import Vgg16, get_batches, onehot
from keras.preprocessing.image import ImageDataGenerator
from keras_utilities import load_array, save_array
import os
from IPython.display import FileLink
import numpy as np

## Control Panel

In [11]:
# path = "sample/"
# path = "data/"
path = "microsample/"

model_dir = "models/"

# Experiment with Augmentation

In [2]:
vgg = Vgg16()

In [4]:
trn_batches = get_batches(path+"train")
val_batches = get_batches(path+"valid")

Found 2057 images belonging to 10 classes.
Found 1983 images belonging to 10 classes.


In [5]:
trn_labels = onehot(trn_batches.classes)
val_labels = onehot(val_batches.classes)

Fine-tune the vgg model.

In [7]:
vgg.ft(10)

Fit a baseline model to see what happens without any data augmentation.

In [7]:
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Epoch 1/1


<keras.callbacks.History at 0x7f4ee5f608d0>

Without any augmentation, we get a val loss of 2.9975.

## ZCA Whitening

In [8]:
gen = ImageDataGenerator(zca_whitening=True)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1
   4/2057 [..............................] - ETA: 93s - loss: 1.7248 - acc: 0.5000





<keras.callbacks.History at 0x7f60a2aa1bd0>

Substantial improvement to 2.1387.

**Conclusion: use ZCA whitening**

## Rotation Range

In [12]:
gen = ImageDataGenerator(rotation_range=.1)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f60912b2150>

Helped a little, not much.

In [13]:
gen = ImageDataGenerator(rotation_range=.2)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f608dff60d0>

Nice! Solid val loss.

In [15]:
gen = ImageDataGenerator(rotation_range=.2)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)


Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f608ad71290>

**Conclusion: rotation range of 0.2. **

## Width Shift Range

In [20]:
gen = ImageDataGenerator(width_shift_range=.1)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f607d08bb10>

Better than without.

In [21]:
gen = ImageDataGenerator(width_shift_range=.2)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f607857f290>

WHOA big improvement!

In [22]:
gen = ImageDataGenerator(width_shift_range=.3)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f60751bed50>

Worse

** Conclusion: 0.2 width shift range. **

## Height Shift Range

In [23]:
gen = ImageDataGenerator(height_shift_range=.1)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f60726ba190>

In [24]:
gen = ImageDataGenerator(height_shift_range=.2)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f606f2ac190>

Worse

In [25]:
gen = ImageDataGenerator(height_shift_range=.3)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f606bd688d0>

## Shear Range

In [9]:
gen = ImageDataGenerator(shear_range=.1)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f4ecdaeaa10>

Not bad.

In [10]:
gen = ImageDataGenerator(shear_range=.2)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f4ec68461d0>

In [11]:
gen = ImageDataGenerator(shear_range=.3)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f4ec35b8110>

Awful.

** Conclusion: shear range = .1 **

## Zoom Range

In [12]:
gen = ImageDataGenerator(zoom_range=.1)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f4ec02c9d50>

In [13]:
gen = ImageDataGenerator(zoom_range=.2)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f4ebd0bb0d0>

In [14]:
gen = ImageDataGenerator(zoom_range=.3)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f4eb9e85150>

**Conclusion: zoom_range = 0.2 **

## Channel Shift Range

In [15]:
gen = ImageDataGenerator(channel_shift_range=.1)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f4eb67b8310>

In [16]:
gen = ImageDataGenerator(channel_shift_range=.2)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f4eb391e050>

In [17]:
gen = ImageDataGenerator(channel_shift_range=.3)
trn_batches = get_batches(path+"train", gen=gen)
vgg = Vgg16()
vgg.ft(10)
vgg.model.fit_generator(generator=trn_batches, samples_per_epoch=trn_batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Found 2057 images belonging to 10 classes.
Epoch 1/1


<keras.callbacks.History at 0x7f4eb0796050>

Final params:

* ZCA Whitening = True
* Rotation Range = 0.2
* Width Shift Range = 0.2
* Height Shift Range = 0.3
* Shear Range = 0.1
* Zoom Range = 0.2
* Channel Shift Range = 0.2

# Precompute Convolutional Layers

In [2]:
vgg = Vgg16()
last_conv_layer = [i for i,l in enumerate(vgg.model.layers) if isinstance(l, Convolution2D)][-1]
conv_model = Model(input=vgg.model.input, output=vgg.model.layers[last_conv_layer].output)

In [8]:
gen = ImageDataGenerator(zca_whitening=True, rotation_range=0.2, width_shift_range=0.2,
                         height_shift_range=0.3, shear_range=0.1, zoom_range=0.2, 
                         channel_shift_range=0.2)

In [11]:
batch_aug = get_batches("data/train", gen=gen, batch_size=32, shuffle=False)

Found 20441 images belonging to 10 classes.


In [12]:
val_aug = get_batches("data/valid", gen=ImageDataGenerator(), batch_size=32, shuffle=False)

Found 1983 images belonging to 10 classes.


Generated a dataset 5x the side of our normal train set using image augmentation. This takes a really long time once but then we only need to train a couple of output layers which saves us iteration time going forward.

In [14]:
# aug_train = conv_model.predict_generator(batch_aug, val_samples=batch_aug.nb_sample*5)



In [16]:
# save_array("serialized/aug_train.dat", aug_train)

In [19]:
aug_train = load_array("serialized/aug_train.dat")

MemoryError: 

In [18]:
# aug_val = conv_model.predict_generator(val_aug, val_samples=val_aug.nb_sample)
# save_array("serialized/aug_val.dat", aug_val)
aug_val = load_array("serialized/aug_val.dat")

# Train VGG Dense Layers

Save this so we don't have to deal with this again.

This is a vgg-inspired set of layers, but I'm not copying their architecture to a T. I'mma do me.

In [48]:
# inp = Input(conv_model.layers[-1].output_shape[1:])
# mp1 = MaxPooling2D((2,2))(inp)
# fl1 = Flatten()(mp1)
# bn1 = BatchNormalization(axis=1)(fl1)
# dr1 = Dropout(0.5)(bn1)
# dn1 = Dense(128, activation='relu')(dr1)
# bn2 = BatchNormalization()(dn1)
# dr2 = Dropout(0.5)(bn2)
# dn2 = Dense(128, activation='relu')(dr2)
# bn3 = BatchNormalization()(dn2)
# dr3 = Dropout(0.5)(bn3)
# dn3 = Dense(10, activation='softmax')(dr3)

# ft_model = Model(input=inp, output=dn3)
p = .8
ft_model = Sequential([
        MaxPooling2D(input_shape=conv_model.layers[-1].output_shape[1:]),
        Flatten(),
        Dropout(p/2.),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(p/2.),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(10, activation='softmax')
    ])

Interestingly, my version took up too much memory. I can't figure out exactly why seeing as I only used one extra layer and my architecture was nearly the same. need to research this more.

In [49]:
labels_aug = onehot(batch_aug.classes)

In [50]:
val_labels_aug = onehot(val_aug.classes)

In [51]:
import numpy as np
import pandas as pd
labels_aug = onehot(batch_aug.classes)
for i in range(4):
    labels_aug = np.concatenate([labels_aug, onehot(batch_aug.classes)], axis=0)

In [52]:
labels_aug.shape

(102205, 10)

In [53]:
ft_model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [54]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc801cb28d0>

In [57]:
ft_model.model.save_weights(model_dir+"vgg_precompute_conv_1epoch.h5")

In [58]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc801d44bd0>

In [59]:
ft_model.model.save_weights(model_dir+"vgg_precompute_conv_2epochs.h5")

In [60]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc801cdbfd0>

In [61]:
ft_model.model.save_weights(model_dir+"vgg_precompute_conv_3epochs.h5")

In [62]:
ft_model.compile(optimizer=Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])

**This is our winner.**

In [63]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc7ff10df10>

In [64]:
ft_model.model.save_weights(model_dir+"vgg_precompute_conv_4epochs.h5")

In [65]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc7ff39ffd0>

In [66]:
ft_model.model.save_weights(model_dir+"vgg_precompute_conv_5epochs.h5")

In [67]:
ft_model.compile(optimizer=Adam(lr=0.000001), loss='categorical_crossentropy', metrics=['accuracy'])

In [68]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc801968ed0>

In [69]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc801c62790>

In [70]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc7ff123150>

In [71]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc801c62210>

# Without Halving Regularization Params

In [72]:
# inp = Input(conv_model.layers[-1].output_shape[1:])
# mp1 = MaxPooling2D((2,2))(inp)
# fl1 = Flatten()(mp1)
# bn1 = BatchNormalization(axis=1)(fl1)
# dr1 = Dropout(0.5)(bn1)
# dn1 = Dense(128, activation='relu')(dr1)
# bn2 = BatchNormalization()(dn1)
# dr2 = Dropout(0.5)(bn2)
# dn2 = Dense(128, activation='relu')(dr2)
# bn3 = BatchNormalization()(dn2)
# dr3 = Dropout(0.5)(bn3)
# dn3 = Dense(10, activation='softmax')(dr3)

# ft_model = Model(input=inp, output=dn3)
p = .8
ft_model = Sequential([
        MaxPooling2D(input_shape=conv_model.layers[-1].output_shape[1:]),
        Flatten(),
        Dropout(p),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(10, activation='softmax')
    ])

Interestingly, my version took up too much memory. I can't figure out exactly why seeing as I only used one extra layer and my architecture was nearly the same. need to research this more.

In [73]:
labels_aug = onehot(batch_aug.classes)

In [74]:
val_labels_aug = onehot(val_aug.classes)

In [75]:
import numpy as np
import pandas as pd
labels_aug = onehot(batch_aug.classes)
for i in range(4):
    labels_aug = np.concatenate([labels_aug, onehot(batch_aug.classes)], axis=0)

In [76]:
labels_aug.shape

(102205, 10)

In [77]:
ft_model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [78]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc8006ca2d0>

In [79]:
ft_model.model.save_weights("vgg_precompute_conv_nohalf_1epoch.h5")

In [80]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc7ff39fb90>

In [81]:
ft_model.model.save_weights("vgg_precompute_conv_nohalf_2epochs.h5")

In [82]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc8006caf10>

In [83]:
ft_model.model.save_weights("vgg_precompute_conv_nohalf_3epochs.h5")

In [84]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc8006c9650>

In [85]:
ft_model.model.save_weights("vgg_precompute_conv_nohalf_4epochs.h5")

In [86]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc8006c9790>

In [87]:
ft_model.model.save_weights("vgg_precompute_conv_nohalf_5epochs.h5")

In [88]:
ft_model.compile(optimizer=Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])

In [89]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc7fd727a10>

In [90]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc7ff5f7810>

In [91]:
ft_model.fit(aug_train, 
             labels_aug, 
             validation_data=(aug_val, val_labels_aug), nb_epoch=1)

Train on 102205 samples, validate on 1983 samples
Epoch 1/1


<keras.callbacks.History at 0x7fc7ff5f8ad0>

Done -- looks like we're not beating our original model.

# Create Prediction File

In [3]:
test_batches = get_batches("data/test", batch_size=64, shuffle=False)

Found 79726 images belonging to 1 classes.


In [4]:
test_set_conv_output = conv_model.predict_generator(test_batches, 
                                                    val_samples=test_batches.nb_sample)

In [6]:
save_array("serialized/test_set_conv_output.dat", test_set_conv_output)

In [7]:
p = .8
final_model = Sequential([
    MaxPooling2D(input_shape=conv_model.layers[-1].output_shape[1:]),
    Flatten(),
    Dropout(p/2.),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(p/2.),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(p),
    Dense(10, activation='softmax')
])

In [13]:
final_model.load_weights(model_dir+"vgg_precompute_conv_4epochs.h5")

In [37]:
def create_prediction_file(filename, preds=None, model=None, clip_rate=.01):
    if not preds:
        preds = np.clip(model.predict(test_set_conv_output), 
                        a_min=clip_rate, 
                        a_max=1-clip_rate)
    elif not model and not preds:
        raise ValueError("Must pass either preds or a model.")
        
    files = os.listdir("data/test/unknown/")
        
    pred_str = map(lambda x: ",".join(["%.10f" % i for i in x]), preds.tolist())
    rows = [",".join([a,b]) + "\n" for a,b in zip(files, pred_str)]
    with open(filename, "a") as f:
        f.write("img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9\n")
        for row in rows:
            f.write(row)

In [35]:
create_prediction_file("results/vgg_precompute_imgaug_clip01.csv", model=final_model)

In [36]:
FileLink("results/vgg_precompute_imgaug_clip01.csv")

In [38]:
create_prediction_file("results/vgg_precompute_imgaug_clip005.csv", model=final_model, 
                       clip_rate=.005)

In [39]:
FileLink("results/vgg_precompute_imgaug_clip005.csv")