# FIX STRUCT DIR, don't stratify by subject. Valid subjects shouldn't be in train set at all.

# Initialization

In [1]:
import os, shutil, importlib
import utils; importlib.reload(utils)

os.environ["CUDA_VISIBLE_DEVICES"]=''
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
import h5py
import keras
import sklearn

from glob import glob
from PIL import Image
from os.path import join as jp

from keras import models, optimizers, regularizers, applications
from keras import backend as K
from keras.callbacks import ModelCheckpoint
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.layers import *
from keras.optimizers import Adam

from scipy import ndimage

from sklearn import metrics
from sklearn.model_selection import train_test_split

from IPython.display import FileLink

%matplotlib inline
np.set_printoptions(precision=4, linewidth=100)

Using TensorFlow backend.


## Struct Dir

For this comp, need to stratify the valid set based on the drivers as well as the categories

In [9]:
path = '../data/statefarm/'
cats = pd.read_csv('../data/statefarm/driver_imgs_list.csv').set_index('img')

In [10]:
utils.create_dirs(path, cats.classname.unique())

In [21]:
print("Take %d subjects" % np.floor(cats.subject.nunique() * 0.2))

Take 5 subjects


Move random subjects into valid

In [50]:
subj_valid = np.random.choice(cats.subject.unique(), 5, replace=False)
for cl in cats.classname.unique():
    imgs_valid = cats[(cats.classname==cl) & (cats.subject.isin(subj_valid))].index.values
    imgs_train = cats[(cats.classname==cl) & ~(cats.subject.isin(subj_valid))].index.values
    for img in imgs_valid:
        cats.ix[img, 'split'] = 'valid'
        shutil.move(os.path.join(path, 'train', cl, img),
                    os.path.join(path, 'valid', cl))
cats.split.fillna('train', inplace=True)
cats.to_csv(path+'driver_imgs_list.csv')

In [58]:
cats.groupby('split').subject.count()

split
train    18573
valid     3851
Name: subject, dtype: int64

Copy about 100 from each class into sample

In [59]:
for cl in cats.classname.unique():
    cats_train = cats[(cats.classname == cl) & (cats.split == 'train')]
    cats_valid = cats[(cats.classname == cl) & (cats.split == 'valid')]

#     # Remove any subjects that only have 1 occurence in either set
#     cats_valid_cnts = cats_valid.subject.value_counts()
#     cats_train_cnts = cats_train.subject.value_counts()
#     cats_valid_keep = cats_valid_cnts[cats_valid_cnts > 1].index.values
#     cats_train_keep = cats_train_cnts[cats_train_cnts > 1].index.values
#     cats_valid = cats_valid[cats_valid.subject.isin(cats_valid_keep)]    
#     cats_train = cats_train[cats_train.subject.isin(cats_train_keep)]    
    
    # Split data
    _, imgs_train = train_test_split(cats_train.index,
                                     test_size=100,
                                     stratify=cats_train.subject)
    _, imgs_valid = train_test_split(cats_valid.index,
                                     test_size=100,
                                     stratify=cats_valid.subject)
    for img_tr, img_v in zip(imgs_train, imgs_valid):
        shutil.copy(jp(path, 'train', cl, img_tr),
                    jp(path, 'sample', 'train', cl, img_tr))
        shutil.copy(jp(path, 'valid', cl, img_v),
                    jp(path, 'sample', 'valid', cl, img_v))

Copy test data

In [60]:
files = np.random.choice(glob(jp(path, 'test', 'test', '*.jpg')), 100, replace=False)
for img in files:
    shutil.copy(img, jp(path, 'sample', 'test', 'test', img.split('/')[-1]))

# Basic Models - Sample

## Define Paths

In [7]:
# path = '../data/statefarm/'
path = '../data/statefarm/sample/'
path_model = '../data/statefarm/models/'
path_test = path + 'test'
path_train = path + 'train'
path_valid = path + 'valid'
path_img_arrays = path + 'img_arrays/'

path_submit = path_model + 'submissions/'
path_checkpoint = path_model + 'checkpoints/'
path_results = path_model + 'results/'

batch_size = 8

## Create Batches

In [3]:
batch_size = 8

In [4]:
batches_train = utils.get_batches(path+'train', batch_size=batch_size)
batches_valid = utils.get_batches(path+'valid', batch_size=batch_size, shuffle=False)

Found 1000 images belonging to 10 classes.
Found 1000 images belonging to 10 classes.


In [5]:
classes, filenames = utils.get_classes(path)

Found 1000 images belonging to 10 classes.
Found 1000 images belonging to 10 classes.
Found 100 images belonging to 1 classes.


## Linear Model

In [98]:
model_linear = models.Sequential([
        BatchNormalization(axis=3, input_shape=(224, 224, 3)),
        Flatten(),
        Dense(10, activation='softmax')     
    ])

In [99]:
model_linear.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [100]:
model_linear.fit_generator(batches_train, samples_per_epoch=batches_train.n,
                           validation_data=baches_valid, nb_val_samples=batches_valid.n,
                           nb_epoch=5, callbacks=None)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fbf68642518>

Model isnt doing anything, lower lr

In [102]:
model_linear = models.Sequential([
        BatchNormalization(axis=1, input_shape=(224, 224, 3)),
        Flatten(),
        Dense(10, activation='softmax')     
    ])
model_linear.compile(Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])
model_linear.fit_generator(batches_train, samples_per_epoch=batches_train.n,
                           validation_data=baches_valid, nb_val_samples=batches_valid.n,
                           nb_epoch=2, callbacks=None)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7fbf682b0be0>

Increase lr 

In [103]:
K.set_value(model_linear.optimizer.lr, 0.0001)

In [105]:
model_linear.fit_generator(batches_train, samples_per_epoch=batches_train.n,
                           validation_data=baches_valid, nb_val_samples=batches_valid.n,
                           nb_epoch=4, callbacks=None)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7fbf68236a58>

acc_valid stablizes around 82. Generate random valid batches to see if there is sampling bias

In [106]:
batches_rand = utils.get_batches(jp(path, 'valid'), batch_size=batch_size, shuffle=True)

Found 1000 images belonging to 10 classes.


In [115]:
val_scores = [model_linear.evaluate_generator(batches_rand, batches_rand.n/10)
              for i in range(10)]

In [116]:
np.round(val_scores, 2)

array([[ 1.21,  0.79],
       [ 0.89,  0.84],
       [ 0.83,  0.83],
       [ 1.09,  0.82],
       [ 1.14,  0.81],
       [ 1.06,  0.79],
       [ 1.57,  0.78],
       [ 0.91,  0.77],
       [ 0.81,  0.81],
       [ 1.12,  0.81]])

## Linear Model with Maxout

In [117]:
model_linear_mo = models.Sequential([
        BatchNormalization(axis=1, input_shape=(224, 224, 3)),
        Flatten(),
        MaxoutDense(10),
        Activation('softmax')
    ])
model_linear_mo.compile(Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])
model_linear_mo.fit_generator(batches_train, samples_per_epoch=batches_train.n,
                           validation_data=baches_valid, nb_val_samples=batches_valid.n,
                           nb_epoch=2, callbacks=None)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7fbf44237d68>

In [119]:
K.set_value(model_linear.optimizer.lr, 0.0001)

In [121]:
model_linear_mo.fit_generator(batches_train, samples_per_epoch=batches_train.n,
                           validation_data=baches_valid, nb_val_samples=batches_valid.n,
                           nb_epoch=5, callbacks=None)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fbf68236828>

## L2 regularization

In [128]:
model_linear_l2 = models.Sequential([
        BatchNormalization(axis=3, input_shape=(224, 224, 3)),
        Flatten(),
        Dense(10, activation='softmax', W_regularizer=regularizers.l2(l=0.01))     
    ])
model_linear_l2.compile(Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
model_linear_l2.fit_generator(batches_train, samples_per_epoch=batches_train.n,
                           validation_data=baches_valid, nb_val_samples=batches_valid.n,
                           nb_epoch=5, callbacks=None)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fbf307b1ac8>

## Single Conv Layer

In [10]:
def gen_model_conv(lr):
    model_conv = models.Sequential([
            BatchNormalization(axis=3, input_shape=(224, 224, 3)),
            Convolution2D(32, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Convolution2D(64, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Flatten(),
            MaxoutDense(100),
            BatchNormalization(),
            Dense(10, activation='softmax')
        ])
    model_conv.compile(Adam(lr=lr), loss='categorical_crossentropy', metrics=['accuracy'])
    return model_conv

def test_model_conv(batches, epochs2=4):
    model_conv = models.Sequential([
            BatchNormalization(axis=3, input_shape=(224, 224, 3)),
            Convolution2D(32, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Convolution2D(64, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Flatten(),
            MaxoutDense(100),
            BatchNormalization(),
            Dense(10, activation='softmax')
        ])
    model_conv.compile(Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
    model_conv.fit_generator(batches, samples_per_epoch=batches.n,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n,
                         nb_epoch=2, callbacks=None)
    K.set_value(model_conv.optimizer.lr, 1e-3)
    model_conv.fit_generator(batches, samples_per_epoch=batches.n,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n,
                         nb_epoch=epochs2, callbacks=None)
    return model_conv

In [13]:
model_conv = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


Conv model trains very quickly. Add regularization

## Data Augmentation

Test different data augmentations to find best

#### Width

In [11]:
gen_t = image.ImageDataGenerator(width_shift_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [12]:
model = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


#### Height

In [13]:
gen_t = image.ImageDataGenerator(height_shift_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [14]:
model = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2

KeyboardInterrupt: 

#### Rotation

In [9]:
gen_t = image.ImageDataGenerator(rotation_range=15, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [None]:
model = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4

#### Shearing

In [9]:
gen_t = image.ImageDataGenerator(shear_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [None]:
model = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4

#### Zoom

In [9]:
gen_t = image.ImageDataGenerator(zoom_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [None]:
model = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4

#### Channel Shift

In [9]:
gen_t = image.ImageDataGenerator(channel_shift_range=10, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [None]:
model = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4

#### All

In [11]:
gen_t = image.ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1,
                                 rotation_range=15, shear_range=0.1,
                                 zoom_range=0.1, channel_shift_range=10)
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [14]:
model_conv = gen_model_conv(lr=0.001)
model_conv.fit_generator(batches_train, batches_train.n, nb_epoch=4,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7faa3d62b320>

In [15]:
model_conv.fit_generator(batches_train, batches_train.n, nb_epoch=20,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7faa740f1d68>

# Basic Models - Full

Do previous experiments on full dataset

## Define Paths

In [2]:
# path = '../data/statefarm/'
path = '../data/statefarm/sample/'
path_model = '../data/statefarm/models/'
path_test = path + 'test'
path_train = path + 'train'
path_valid = path + 'valid'
path_img_arrays = path + 'img_arrays/'

path_submit = path_model + 'submissions/'
path_checkpoint = path_model + 'checkpoints/'
path_results = path_model + 'results/'

## Create Batches

In [3]:
batch_size = 64

In [4]:
batches_train = utils.get_batches(path+'train', batch_size=batch_size)
batches_valid = utils.get_batches(path+'valid', batch_size=batch_size, shuffle=False)


Found 1000 images belonging to 10 classes.
Found 1000 images belonging to 10 classes.


Convert data to bcolz arrays

In [18]:
# utils.compress_imgs(path_train, path_img_arrays+'train.bc', chunk_size=5000)
# utils.compress_imgs(path_valid, path_img_arrays+'valid.bc')
# utils.compress_imgs(path_test, path_img_arrays+'test.bc', chunk_size=7500)

Load data from arrays

In [20]:
data_train = utils.load_array_bcolz(path_img_arrays+'train.bc')
data_valid = utils.load_array_bcolz(path_img_arrays+'valid.bc')
# data_test = utils.load_array_bcolz(path_img_arrays+'test.bc')
classes, filenames = utils.get_classes(path)
labels_train = classes[2]
labels_valid = classes[3]

FileNotFoundError: [Errno 2] No such file or directory: '../data/statefarm/img_arrays/test.bc/meta/sizes'

## Single Conv Layer

In [13]:
def gen_model_conv(lr):
    model_conv = models.Sequential([
            BatchNormalization(axis=3, input_shape=(224, 224, 3)),
            Convolution2D(32, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Convolution2D(64, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Flatten(),
            MaxoutDense(100),
            BatchNormalization(),
            Dense(10, activation='softmax')
        ])
    model_conv.compile(Adam(lr=lr), loss='categorical_crossentropy', metrics=['accuracy'])
    return model_conv

def test_model_conv(batches, epochs2=4):
    model_conv = models.Sequential([
            BatchNormalization(axis=3, input_shape=(224, 224, 3)),
            Convolution2D(32, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Convolution2D(64, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Flatten(),
            MaxoutDense(100),
            BatchNormalization(),
            Dense(10, activation='softmax')
        ])
    model_conv.compile(Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
    model_conv.fit_generator(batches, samples_per_epoch=batches.n,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n,
                         nb_epoch=2, callbacks=None)
    K.set_value(model_conv.optimizer.lr, 1e-3)
    model_conv.fit_generator(batches, samples_per_epoch=batches.n,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n,
                         nb_epoch=epochs2, callbacks=None)
    return model_conv

### No Data Aug

In [14]:
model_conv = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2

KeyboardInterrupt: 

### Data Aug

#### Width

In [11]:
gen_t = image.ImageDataGenerator(width_shift_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [12]:
model = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


#### Height

In [13]:
gen_t = image.ImageDataGenerator(height_shift_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [14]:
model = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2

KeyboardInterrupt: 

#### Rotation

In [9]:
gen_t = image.ImageDataGenerator(rotation_range=15, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [None]:
model = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4

#### Shearing

In [9]:
gen_t = image.ImageDataGenerator(shear_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [None]:
model = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4

#### Zoom

In [9]:
gen_t = image.ImageDataGenerator(zoom_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [None]:
model = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4

#### Channel Shift

In [9]:
gen_t = image.ImageDataGenerator(channel_shift_range=10, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [None]:
model = test_model_conv(batches_train)

Epoch 1/2
Epoch 2/2
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4

#### All

In [11]:
gen_t = image.ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1,
                                 rotation_range=15, shear_range=0.1,
                                 zoom_range=0.1, channel_shift_range=10,
                                 horizontal_flip=True)
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

Found 1000 images belonging to 10 classes.


In [14]:
model_conv = gen_model_conv(lr=0.001)
model_conv.fit_generator(batches_train, batches_train.n, nb_epoch=4,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7faa3d62b320>

In [15]:
model_conv.fit_generator(batches_train, batches_train.n, nb_epoch=20,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7faa740f1d68>

## Four Conv + Dropout

In [10]:
gen_t = image.ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1,
                                 rotation_range=15, shear_range=0.1,
                                 zoom_range=0.1, channel_shift_range=10)
gen = image.ImageDataGenerator()
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)
batches_valid = gen.flow(data_valid, labels_valid, batch_size=batch_size, shuffle=False)

Found 17934 images belonging to 10 classes.


In [11]:
model = models.Sequential([
        BatchNormalization(axis=3, input_shape=(224, 224, 3)),
        Convolution2D(32, 3, 3, activation='relu'),
        BatchNormalization(axis=3),
        MaxPooling2D((2, 2)),
        Convolution2D(64, 3, 3, activation='relu'),
        BatchNormalization(axis=3),
        MaxPooling2D((2, 2)),
        Convolution2D(128, 3, 3, activation='relu'),
        BatchNormalization(axis=3),
        MaxPooling2D((2, 2)),       
        Flatten(),
        MaxoutDense(256),
        BatchNormalization(),
        Dropout(0.0),
        MaxoutDense(256),
        BatchNormalization(),
        Dropout(0.0),
        Dense(10, activation='softmax')        
    ])

In [12]:
model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [13]:
model.fit_generator(batches_train, batches_train.n, nb_epoch=2,
                   validation_data=batches_valid, nb_val_samples=batches_valid.n)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7fe5eb24be10>

In [14]:
K.set_value(model.optimizer.lr, 0.0001)

In [15]:
model.fit_generator(batches_train, batches_train.n, nb_epoch=2,
                   validation_data=batches_valid, nb_val_samples=batches_valid.n)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7fe5e80512e8>

In [16]:
K.set_value(model.optimizer.lr, 0.0001)

In [17]:
callbacks = [ModelCheckpoint(
        path_checkpoint+'weights_4conv.{epoch:02d}-{val_loss:.3f}-{val_acc:.3f}.h5',
        save_best_only=False, monitor='val_acc')]
model.fit_generator(batches_train, batches_train.n, nb_epoch=4,
                   validation_data=batches_valid, nb_val_samples=batches_valid.n,
                   callbacks=callbacks)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7fe5e8051a20>

#### Checkpoint

In [9]:
model.load_weights(glob(path_checkpoint+'weights_4conv.*')[0])

IndexError: list index out of range

#### Submit

In [25]:
batches_test = utils.get_batches(path+'test', batch_size=batch_size, shuffle=False,
                                 class_mode=None)
preds = model.predict_generator(batches_test, batches_test.n)
np.save(path_results+'filenames_test_4conv.npy', batches_test.filenames)
np.save(path_results+'probs_test_4conv.npy', preds)

In [33]:
submit = pd.DataFrame(preds, columns=['c0','c1','c2','c3','c4','c5','c6','c7','c8','c9'])
submit['img'] = [x.split('/')[-1].split('.')[-2] for x in batches_test.filenames]
submit.to_csv(path_submit+'conv4.csv', index=False)
FileLink(path_submit+'conv4.csv')

# VGG19

In [3]:
import vgg19; importlib.reload(vgg19)
from vgg19 import Vgg19

In [4]:
vgg = Vgg19()

Loading model weights


## Conv Features

##### Create conv model

In [5]:
layers_conv, layers_fc = utils.split_at(vgg.model, Conv2D)

In [6]:
vgg_conv = models.Sequential(layers_conv)

##### Precalculate conv features

In [None]:
feats_conv_train = vgg_conv.predict(batches_train, batches_train.n)
feats_conv_valid = vgg_conv.predict(batches_valid, batches_valid.n)
# feats_conv_test = vgg_conv.predict(batches_test, batches_test.n)
# feats_conv_train = vgg_conv.predict_generator(batches_train, batches_train.n)
# feats_conv_valid = vgg_conv.predict_generator(batches_valid, batches_valid.n)
feats_conv_test = vgg_conv.predict_generator(batches_test, batches_test.n)

In [None]:
utils.save_array_bcolz(path_img_arrays+'train_conv.b5', feats_conv_train)
utils.save_array_bcolz(path_img_arrays+'valid_conv.b5', feats_conv_valid)
# utils.save_array_bcolz(path_img_arrays+'test_conv.b5', feats_conv_test)

In [None]:
feats_conv_train = utils.load_array_bcolz(path_img_arrays+'train_conv.b5')
feats_conv_valid = utils.load_array_bcolz(path_img_arrays+'valid_conv.b5')
# feats_conv_test = utils.load_array_bcolz(path_img_arrays+'test_conv.b5')

##### Precalculate data augmented conv features

In [None]:
gen_t = image.ImageDataGenerator(width_shift_range=0.15, height_shift_range=0.1,
                                 rotation_range=15, shear_range=0.1,
                                 zoom_range=0.1, channel_shift_range=20)
gen = image.ImageDataGenerator()
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size, shuffle=False)

In [None]:
feats_conv_train_da = vgg_conv.predict_generator(batches_train, batches_train.n)

## Simplified fc layers

In [8]:
def gen_fcbn_layers(p):
    return[
        MaxPooling2D(input_shape=layers_conv[-1].output_shape[1:]),
        Flatten(),
        Dropout(p/2),
        MaxoutDense(128),
        BatchNormalization(),
        Dropout(p/2),
        MaxoutDense(128),
        BatchNormalization(),
        Dropout(p),
        Dense(10, activation='softmax')
    ]
vgg_fc = models.Sequential(gen_fcbn_layers(0.8))
vgg_fc.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
callbacks = [ModelCheckpoint(
        path_checkpoint+'weights_4conv.{epoch:02d}-{val_loss:.3f}-{val_acc:.3f}.h5',
        save_best_only=False, monitor='val_acc')]
vgg_fc.fit(feats_conv_train, labels_train, batch_size=batch_size, nb_epoch=1,
           validation_data=(feats_conv_valid, labels_valid), callbacks=callbacks)