# Initialization

In [1]:
##### IMPORTS #####
import os, shutil, importlib
import utils; importlib.reload(utils)

# os.environ["CUDA_VISIBLE_DEVICES"]=''
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
import h5py
import keras
import sklearn

from glob import glob
from PIL import Image
from os.path import join as jp

from keras import models, optimizers, regularizers, applications
from keras import backend as K
from keras.callbacks import ModelCheckpoint
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.layers import *
from keras.optimizers import Adam

from scipy import ndimage

from sklearn import metrics
from sklearn.model_selection import train_test_split

from IPython.display import FileLink

%matplotlib inline
np.set_printoptions(precision=4, linewidth=100)

Using TensorFlow backend.


## Struct Dir

For this comp, need to stratify the valid set based on the drivers as well as the categories

In [None]:
path = '../data/statefarm/'
cats = pd.read_csv('../data/statefarm/driver_imgs_list.csv').set_index('img')

In [None]:
utils.create_dirs(path, cats.classname.unique())

In [None]:
print("Take %d subjects" % np.floor(cats.subject.nunique() * 0.2))

Move random subjects into valid

In [None]:
subj_valid = np.random.choice(cats.subject.unique(), 5, replace=False)
for cl in cats.classname.unique():
    imgs_valid = cats[(cats.classname==cl) & (cats.subject.isin(subj_valid))].index.values
    imgs_train = cats[(cats.classname==cl) & ~(cats.subject.isin(subj_valid))].index.values
    for img in imgs_valid:
        cats.ix[img, 'split'] = 'valid'
        shutil.move(os.path.join(path, 'train', cl, img),
                    os.path.join(path, 'valid', cl))
cats.split.fillna('train', inplace=True)
cats.to_csv(path+'driver_imgs_list.csv')

In [None]:
cats.groupby('split').subject.count()

Copy about 100 from each class into sample

In [None]:
for cl in cats.classname.unique():
    cats_train = cats[(cats.classname == cl) & (cats.split == 'train')]
    cats_valid = cats[(cats.classname == cl) & (cats.split == 'valid')]
    
    # Split data
    _, imgs_train = train_test_split(cats_train.index,
                                     test_size=100,
                                     stratify=cats_train.subject)
    _, imgs_valid = train_test_split(cats_valid.index,
                                     test_size=100,
                                     stratify=cats_valid.subject)
    for img_tr, img_v in zip(imgs_train, imgs_valid):
        shutil.copy(jp(path, 'train', cl, img_tr),
                    jp(path, 'sample', 'train', cl, img_tr))
        shutil.copy(jp(path, 'valid', cl, img_v),
                    jp(path, 'sample', 'valid', cl, img_v))

Copy test data

In [None]:
files = np.random.choice(glob(jp(path, 'test', 'test', '*.jpg')), 100, replace=False)
for img in files:
    shutil.copy(img, jp(path, 'sample', 'test', 'test', img.split('/')[-1]))

# Basic Models - Sample

## Define Paths

In [None]:
# path = '../data/statefarm/'
path = '../data/statefarm/sample/'
path_model = '../data/statefarm/models/'
path_test = path + 'test'
path_train = path + 'train'
path_valid = path + 'valid'
path_img_arrays = path + 'img_arrays/'

path_submit = path_model + 'submissions/'
path_checkpoint = path_model + 'checkpoints/'
path_results = path_model + 'results/'

batch_size = 8

## Create Batches

In [None]:
batch_size = 8

In [None]:
batches_train = utils.get_batches(path+'train', batch_size=batch_size)
batches_valid = utils.get_batches(path+'valid', batch_size=batch_size, shuffle=False)

In [None]:
classes, filenames = utils.get_classes(path)

## Linear Model

In [None]:
model_linear = models.Sequential([
        BatchNormalization(axis=3, input_shape=(224, 224, 3)),
        Flatten(),
        Dense(10, activation='softmax')     
    ])

In [None]:
model_linear.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model_linear.fit_generator(batches_train, samples_per_epoch=batches_train.n,
                           validation_data=baches_valid, nb_val_samples=batches_valid.n,
                           nb_epoch=5, callbacks=None)

Model isnt doing anything, lower lr

In [None]:
model_linear = models.Sequential([
        BatchNormalization(axis=1, input_shape=(224, 224, 3)),
        Flatten(),
        Dense(10, activation='softmax')     
    ])
model_linear.compile(Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])
model_linear.fit_generator(batches_train, samples_per_epoch=batches_train.n,
                           validation_data=baches_valid, nb_val_samples=batches_valid.n,
                           nb_epoch=2, callbacks=None)

Increase lr 

In [None]:
K.set_value(model_linear.optimizer.lr, 0.0001)

In [None]:
model_linear.fit_generator(batches_train, samples_per_epoch=batches_train.n,
                           validation_data=baches_valid, nb_val_samples=batches_valid.n,
                           nb_epoch=4, callbacks=None)

acc_valid stablizes around 82. Generate random valid batches to see if there is sampling bias

In [None]:
batches_rand = utils.get_batches(jp(path, 'valid'), batch_size=batch_size, shuffle=True)

In [None]:
val_scores = [model_linear.evaluate_generator(batches_rand, batches_rand.n/10)
              for i in range(10)]

In [None]:
np.round(val_scores, 2)

## Linear Model with Maxout

In [None]:
model_linear_mo = models.Sequential([
        BatchNormalization(axis=1, input_shape=(224, 224, 3)),
        Flatten(),
        MaxoutDense(10),
        Activation('softmax')
    ])
model_linear_mo.compile(Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])
model_linear_mo.fit_generator(batches_train, samples_per_epoch=batches_train.n,
                           validation_data=baches_valid, nb_val_samples=batches_valid.n,
                           nb_epoch=2, callbacks=None)

In [None]:
K.set_value(model_linear.optimizer.lr, 0.0001)

In [None]:
model_linear_mo.fit_generator(batches_train, samples_per_epoch=batches_train.n,
                           validation_data=baches_valid, nb_val_samples=batches_valid.n,
                           nb_epoch=5, callbacks=None)

## L2 regularization

In [None]:
model_linear_l2 = models.Sequential([
        BatchNormalization(axis=3, input_shape=(224, 224, 3)),
        Flatten(),
        Dense(10, activation='softmax', W_regularizer=regularizers.l2(l=0.01))     
    ])
model_linear_l2.compile(Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
model_linear_l2.fit_generator(batches_train, samples_per_epoch=batches_train.n,
                           validation_data=baches_valid, nb_val_samples=batches_valid.n,
                           nb_epoch=5, callbacks=None)

## Single Conv Layer

In [None]:
def gen_model_conv(lr):
    model_conv = models.Sequential([
            BatchNormalization(axis=3, input_shape=(224, 224, 3)),
            Convolution2D(32, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Convolution2D(64, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Flatten(),
            MaxoutDense(100),
            BatchNormalization(),
            Dense(10, activation='softmax')
        ])
    model_conv.compile(Adam(lr=lr), loss='categorical_crossentropy', metrics=['accuracy'])
    return model_conv

def test_model_conv(batches, epochs2=4):
    model_conv = models.Sequential([
            BatchNormalization(axis=3, input_shape=(224, 224, 3)),
            Convolution2D(32, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Convolution2D(64, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Flatten(),
            MaxoutDense(100),
            BatchNormalization(),
            Dense(10, activation='softmax')
        ])
    model_conv.compile(Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
    model_conv.fit_generator(batches, samples_per_epoch=batches.n,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n,
                         nb_epoch=2, callbacks=None)
    K.set_value(model_conv.optimizer.lr, 1e-3)
    model_conv.fit_generator(batches, samples_per_epoch=batches.n,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n,
                         nb_epoch=epochs2, callbacks=None)
    return model_conv

In [None]:
model_conv = test_model_conv(batches_train)

Conv model trains very quickly. Add regularization

## Data Augmentation

Test different data augmentations to find best

#### Width

In [None]:
gen_t = image.ImageDataGenerator(width_shift_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model = test_model_conv(batches_train)

#### Height

In [None]:
gen_t = image.ImageDataGenerator(height_shift_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model = test_model_conv(batches_train)

#### Rotation

In [None]:
gen_t = image.ImageDataGenerator(rotation_range=15, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model = test_model_conv(batches_train)

#### Shearing

In [None]:
gen_t = image.ImageDataGenerator(shear_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model = test_model_conv(batches_train)

#### Zoom

In [None]:
gen_t = image.ImageDataGenerator(zoom_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model = test_model_conv(batches_train)

#### Channel Shift

In [None]:
gen_t = image.ImageDataGenerator(channel_shift_range=10, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model = test_model_conv(batches_train)

#### All

In [None]:
gen_t = image.ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1,
                                 rotation_range=15, shear_range=0.1,
                                 zoom_range=0.1, channel_shift_range=10)
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model_conv = gen_model_conv(lr=0.001)
model_conv.fit_generator(batches_train, batches_train.n, nb_epoch=4,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n)

In [None]:
model_conv.fit_generator(batches_train, batches_train.n, nb_epoch=20,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n)

# Basic Models - Full

## Define Paths

In [2]:
path = '../data/statefarm/'
# path = '../data/statefarm/sample/'
path_model = '../data/statefarm/models/'
path_test = path + 'test'
path_train = path + 'train'
path_valid = path + 'valid'
path_img_arrays = path + 'img_arrays/'

path_submit = path_model + 'submissions/'
path_checkpoint = path_model + 'checkpoints/'
path_results = path_model + 'results/'

## Create Batches

In [3]:
batch_size = 64
classes, filenames = utils.get_classes(path)
labels_train = classes[2]
labels_valid = classes[3]

Found 18572 images belonging to 10 classes.
Found 3852 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.


##### Convert data to bcolz arrays

In [3]:
# utils.compress_imgs(path_train, path_img_arrays+'train.bc', chunk_size=5000)
# utils.compress_imgs(path_valid, path_img_arrays+'valid.bc')
utils.compress_imgs(path_test, path_img_arrays+'test.bc', chunk_size=5000)

Found 79726 images belonging to 1 classes.
Saving 16 chunks


##### Load data from arrays

In [4]:
# data_train = utils.load_array_bcolz(path_img_arrays+'train.bc')
# data_valid = utils.load_array_bcolz(path_img_arrays+'valid.bc')
data_test = utils.load_array_bcolz(path_img_arrays+'test.bc')

In [8]:
batches_train = utils.get_batches(path+'train', batch_size=batch_size)
batches_valid = utils.get_batches(path+'valid', batch_size=batch_size, shuffle=False)
batches_test = utils.get_batches(path+'test', batch_size=batch_size, shuffle=False)


Found 18572 images belonging to 10 classes.
Found 3852 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.


## Single Conv Layer

In [None]:
def gen_model_conv(lr):
    model_conv = models.Sequential([
            BatchNormalization(axis=3, input_shape=(224, 224, 3)),
            Convolution2D(32, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Convolution2D(64, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Flatten(),
            MaxoutDense(100),
            BatchNormalization(),
            Dense(10, activation='softmax')
        ])
    model_conv.compile(Adam(lr=lr), loss='categorical_crossentropy', metrics=['accuracy'])
    return model_conv

def test_model_conv(batches, epochs2=4):
    model_conv = models.Sequential([
            BatchNormalization(axis=3, input_shape=(224, 224, 3)),
            Convolution2D(32, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Convolution2D(64, 3, 3, activation='relu'),
            BatchNormalization(axis=3),
            MaxPooling2D((3, 3)),
            Flatten(),
            MaxoutDense(100),
            BatchNormalization(),
            Dense(10, activation='softmax')
        ])
    model_conv.compile(Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
    model_conv.fit_generator(batches, samples_per_epoch=batches.n,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n,
                         nb_epoch=2, callbacks=None)
    K.set_value(model_conv.optimizer.lr, 1e-3)
    model_conv.fit_generator(batches, samples_per_epoch=batches.n,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n,
                         nb_epoch=epochs2, callbacks=None)
    return model_conv

### No Data Aug

In [None]:
model_conv = test_model_conv(batches_train)

### Data Aug

#### Width

In [None]:
gen_t = image.ImageDataGenerator(width_shift_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model = test_model_conv(batches_train)

#### Height

In [None]:
gen_t = image.ImageDataGenerator(height_shift_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model = test_model_conv(batches_train)

#### Rotation

In [None]:
gen_t = image.ImageDataGenerator(rotation_range=15, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model = test_model_conv(batches_train)

#### Shearing

In [None]:
gen_t = image.ImageDataGenerator(shear_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model = test_model_conv(batches_train)

#### Zoom

In [None]:
gen_t = image.ImageDataGenerator(zoom_range=0.1, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model = test_model_conv(batches_train)

#### Channel Shift

In [None]:
gen_t = image.ImageDataGenerator(channel_shift_range=10, fill_mode='constant')
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model = test_model_conv(batches_train)

#### All

In [None]:
gen_t = image.ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1,
                                 rotation_range=15, shear_range=0.1,
                                 zoom_range=0.1, channel_shift_range=10,
                                 horizontal_flip=True)
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)

In [None]:
model_conv = gen_model_conv(lr=0.001)
model_conv.fit_generator(batches_train, batches_train.n, nb_epoch=4,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n)

In [None]:
model_conv.fit_generator(batches_train, batches_train.n, nb_epoch=20,
                         validation_data=batches_valid, nb_val_samples=batches_valid.n)

## Four Conv + Dropout

In [None]:
gen_t = image.ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1,
                                 rotation_range=15, shear_range=0.1,
                                 zoom_range=0.1, channel_shift_range=10)
gen = image.ImageDataGenerator()
batches_train = utils.get_batches(path+'train', gen_t, batch_size=batch_size)
batches_valid = gen.flow(data_valid, labels_valid, batch_size=batch_size, shuffle=False)

In [None]:
model = models.Sequential([
        BatchNormalization(axis=3, input_shape=(224, 224, 3)),
        Convolution2D(32, 3, 3, activation='relu'),
        BatchNormalization(axis=3),
        MaxPooling2D((2, 2)),
        Convolution2D(64, 3, 3, activation='relu'),
        BatchNormalization(axis=3),
        MaxPooling2D((2, 2)),
        Convolution2D(128, 3, 3, activation='relu'),
        BatchNormalization(axis=3),
        MaxPooling2D((2, 2)),       
        Flatten(),
        MaxoutDense(256),
        BatchNormalization(),
        Dropout(0.0),
        MaxoutDense(256),
        BatchNormalization(),
        Dropout(0.0),
        Dense(10, activation='softmax')        
    ])

In [None]:
model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit_generator(batches_train, batches_train.n, nb_epoch=2,
                   validation_data=batches_valid, nb_val_samples=batches_valid.n)

In [None]:
K.set_value(model.optimizer.lr, 0.0001)

In [None]:
callbacks = [ModelCheckpoint(
        path_checkpoint+'weights_4conv1.{epoch:02d}-{val_loss:.3f}-{val_acc:.3f}.h5',
        save_best_only=True, monitor='val_acc')]
model.fit_generator(batches_train, batches_train.n, nb_epoch=2,
                   validation_data=batches_valid, nb_val_samples=batches_valid.n,
                    callbacks=callbacks)

In [None]:
epoch_keep = 2
utils.rm_checkpoints(path_checkpoint+'weights_4conv1', epoch_keep)

##### Checkpoint

In [None]:
model.load_weights(glob(path_checkpoint+'weights_4conv1.*')[0])

##### Submit

In [None]:
batches_test = utils.get_batches(path+'test', batch_size=batch_size, shuffle=False,
                                 class_mode=None)
preds = model.predict_generator(batches_test, batches_test.n)
np.save(path_results+'filenames_test_conv4.npy', batches_test.filenames)
np.save(path_results+'probs_test_conv4.npy', preds)

In [None]:
submit = pd.DataFrame(preds.clip(0.05, 0.95), columns=['c0','c1','c2','c3','c4','c5','c6','c7','c8','c9'])
submit['img'] = [x.split('/')[-1] for x in batches_test.filenames]
submit.to_csv(path_submit+'conv4.csv', index=False)
# FileLink(path_submit+'conv4.csv')
FileLink('../data/statefarm/models/submissions/conv4.csv')

##### Lower lr

In [None]:
K.set_value(model.optimizer.lr, 0.0001)

In [None]:
callbacks = [ModelCheckpoint(
        path_checkpoint+'weights_4conv.{epoch:02d}-{val_loss:.3f}-{val_acc:.3f}.h5',
        save_best_only=False, monitor='val_acc')]
model.fit_generator(batches_train, batches_train.n, nb_epoch=4,
                   validation_data=batches_valid, nb_val_samples=batches_valid.n,
                   callbacks=callbacks)

#### Checkpoint

In [None]:
model.load_weights(glob(path_checkpoint+'weights_4conv.*')[0])

#### Submit

In [None]:
batches_test = utils.get_batches(path+'test', batch_size=batch_size, shuffle=False,
                                 class_mode=None)
preds = model.predict_generator(batches_test, batches_test.n)
np.save(path_results+'filenames_test_4conv.npy', batches_test.filenames)
np.save(path_results+'probs_test_4conv.npy', preds)

In [None]:
submit = pd.DataFrame(preds, columns=['c0','c1','c2','c3','c4','c5','c6','c7','c8','c9'])
submit['img'] = [x.split('/')[-1] for x in batches_test.filenames]
submit.to_csv(path_submit+'conv4.csv', index=False)
FileLink(path_submit+'conv4.csv')

# VGG19

In [4]:
import vgg19; importlib.reload(vgg19)
from vgg19 import Vgg19

In [5]:
vgg = Vgg19()

Loading model weights


## Conv Features

##### Create conv model

In [9]:
layers_conv, layers_fc = utils.split_at(vgg.model, Conv2D)

In [10]:
vgg_conv = models.Sequential(layers_conv)

##### Precalculate conv features

In [11]:
# feats_conv_train = vgg_conv.predict(data_train, batch_size=batch_size)
# feats_conv_valid = vgg_conv.predict(data_valid, batch_size=batch_size)
# feats_conv_test = vgg_conv.predict(data_test, batch_size=batch_size)
# feats_conv_train = vgg_conv.predict_generator(batches_train, batches_train.n)
# feats_conv_valid = vgg_conv.predict_generator(batches_valid, batches_valid.n)
# feats_conv_test = vgg_conv.predict_generator(batches_test, batches_test.n)

In [12]:
# utils.save_array_bcolz(path_img_arrays+'conv_train.b5', feats_conv_train)
# utils.save_array_bcolz(path_img_arrays+'conv_valid.b5', feats_conv_valid)
# utils.save_array_bcolz(path_img_arrays+'test_conv.b5', feats_conv_test)

##### Precalculate data augmented conv features

In [10]:
gen_t = image.ImageDataGenerator(width_shift_range=0.15, height_shift_range=0.1,
                                 rotation_range=15, shear_range=0.1,
                                 zoom_range=0.1, channel_shift_range=20)
gen = image.ImageDataGenerator()
batches_train = utils.get_batches(path+'train', gen_t, batch_size=64, shuffle=False)

Found 18572 images belonging to 10 classes.


In [11]:
feats_conv_train_da = vgg_conv.predict_generator(batches_train, batches_train.n*2)

In [12]:
feats_conv_train_da.shape

(37144, 14, 14, 512)

In [13]:
utils.save_array_bcolz(path_img_arrays+'conv_train_da.b5', feats_conv_train_da)

##### Load data from arrays

In [8]:
feats_conv_train = utils.load_array_bcolz(path_img_arrays+'conv_train.b5')
feats_conv_valid = utils.load_array_bcolz(path_img_arrays+'conv_valid.b5')
# feats_conv_test = utils.load_array_bcolz(path_img_arrays+'test_conv.b5')

In [9]:
feats_conv_train_da = utils.load_array_bcolz(path_img_arrays+'conv_train_da.b5')

In [10]:
feats_conv_train_da = np.concatenate([feats_conv_train_da, feats_conv_train])

In [11]:
labels_train_da = np.concatenate([labels_train] * 3)

In [12]:
feats_conv_train_da.shape

(55716, 14, 14, 512)

In [13]:
labels_train_da.shape

(55716, 10)

## Simplified fc layers

#### Aug Data Train

##### Model

In [16]:
def gen_fcbn_layers(p):
    return[
        MaxPooling2D(input_shape=layers_conv[-1].output_shape[1:]),
        Flatten(),
        Dropout(p/2),
        MaxoutDense(256),
        BatchNormalization(),
        Dropout(p/2),
        MaxoutDense(256),
        BatchNormalization(),
        Dropout(p),
        Dense(10, activation='softmax')
    ]
vgg_fc = models.Sequential(gen_fcbn_layers(0.6))
vgg_fc.compile(Adam(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])

##### Train

In [19]:
callbacks = [ModelCheckpoint(
        path_checkpoint+'weights_dafc1.{epoch:02d}-{val_loss:.3f}-{val_acc:.3f}.h5',
        save_best_only=True, monitor='val_acc')]
vgg_fc.fit(feats_conv_train_da, labels_train_da, batch_size=batch_size, nb_epoch=2,
           validation_data=(feats_conv_valid, labels_valid), callbacks=callbacks)

Train on 55716 samples, validate on 3852 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f48edb55b00>

In [20]:
epoch_keep = 2
utils.rm_checkpoints(path_checkpoint+'weights_dafc1', epoch_keep)

##### Checkpoint

In [None]:
vgg_fc.load_weights(glob(path_checkpoint+'weights_dafc1.*')[0])

##### Lower lr

In [21]:
K.set_value(vgg_fc.optimizer.lr, 0.0001)

In [22]:
callbacks = [ModelCheckpoint(
        path_checkpoint+'weights_dafc1.{epoch:02d}-{val_loss:.3f}-{val_acc:.3f}.h5',
        save_best_only=True, monitor='val_acc')]
vgg_fc.fit(feats_conv_train_da, labels_train_da, batch_size=batch_size, nb_epoch=2,
           validation_data=(feats_conv_valid, labels_valid), callbacks=callbacks)

Train on 55716 samples, validate on 3852 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f48ed938f28>

In [23]:
epoch_keep = 2
utils.rm_checkpoints(path_checkpoint+'weights_dafc1', epoch_keep)

##### Checkpoint

In [None]:
vgg_fc.load_weights(glob(path_checkpoint+'weights_dafc1.*')[0])

##### Lower lr

In [24]:
K.set_value(vgg_fc.optimizer.lr, 0.00001)

In [25]:
callbacks = [ModelCheckpoint(
        path_checkpoint+'weights_dafc1.{epoch:02d}-{val_loss:.3f}-{val_acc:.3f}.h5',
        save_best_only=True, monitor='val_acc')]
vgg_fc.fit(feats_conv_train_da, labels_train_da, batch_size=batch_size, nb_epoch=2,
           validation_data=(feats_conv_valid, labels_valid), callbacks=callbacks)

Train on 55716 samples, validate on 3852 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f48ee7cc978>

In [26]:
epoch_keep = 2
utils.rm_checkpoints(path_checkpoint+'weights_dafc1', epoch_keep)

##### Checkpoint

In [24]:
vgg_fc.load_weights(glob(path_checkpoint+'weights_dafc1.*')[0])

### Pseudo Labeling

#### Calculate Features

In [17]:
labels_val_pseudo = vgg_fc.predict(feats_conv_valid, batch_size=batch_size)

In [18]:
feats_conv_pseudo = np.concatenate([feats_conv_train_da, feats_conv_valid])

In [19]:
labels_pseudo = np.concatenate([labels_train_da, labels_val_pseudo])

In [20]:
feats_conv_pseudo.shape

(59568, 14, 14, 512)

In [21]:
labels_pseudo.shape

(59568, 10)

#### Train

##### Train

In [25]:
K.set_value(vgg_fc.optimizer.lr, 0.00001)

In [26]:
callbacks = [ModelCheckpoint(
        path_checkpoint+'weights_dafcps.{epoch:02d}-{val_loss:.3f}-{val_acc:.3f}.h5',
        save_best_only=True, monitor='val_acc')]
vgg_fc.fit(feats_conv_pseudo, labels_pseudo, batch_size=batch_size, nb_epoch=2,
           validation_data=(feats_conv_valid, labels_valid), callbacks=callbacks)

Train on 59568 samples, validate on 3852 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f5ca9ec2898>

In [None]:
epoch_keep = 2
utils.rm_checkpoints(path_checkpoint+'weights_dafcps', epoch_keep)

##### Checkpoint

In [24]:
vgg_fc.load_weights(glob(path_checkpoint+'weights_dafcps.*')[0])

##### Train more

In [27]:
callbacks = [ModelCheckpoint(
        path_checkpoint+'weights_dafcps.{epoch:02d}-{val_loss:.3f}-{val_acc:.3f}.h5',
        save_best_only=True, monitor='val_acc')]
vgg_fc.fit(feats_conv_pseudo, labels_pseudo, batch_size=batch_size, nb_epoch=2,
           validation_data=(feats_conv_valid, labels_valid), callbacks=callbacks)

Train on 59568 samples, validate on 3852 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f5ca428c748>

In [None]:
epoch_keep = 2
utils.rm_checkpoints(path_checkpoint+'weights_dafcps', epoch_keep)

##### Checkpoint

In [28]:
vgg_fc.load_weights(glob(path_checkpoint+'weights_dafcps.*')[0])

##### Train more

In [30]:
callbacks = [ModelCheckpoint(
        path_checkpoint+'weights_dafcps.{epoch:02d}-{val_loss:.3f}-{val_acc:.3f}.h5',
        save_best_only=True, monitor='val_acc')]
vgg_fc.fit(feats_conv_pseudo, labels_pseudo, batch_size=batch_size, nb_epoch=5,
           validation_data=(feats_conv_valid, labels_valid), callbacks=callbacks)

Train on 59568 samples, validate on 3852 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f5ca9c5d3c8>

In [31]:
epoch_keep = 5
utils.rm_checkpoints(path_checkpoint+'weights_dafcps', epoch_keep)

##### Checkpoint

In [17]:
vgg_fc.load_weights(glob(path_checkpoint+'weights_dafcps.*')[0])

## Submit

In [19]:
# gen = image.ImageDataGenerator()
# batches_test = gen.flow(feats_conv_test, batch_size=batch_size, shuffle=False)
# preds = vgg_fc.predict_generator(batches_test, batches_test.n)
probs = vgg_fc.predict(feats_conv_test, batch_size=batch_size, verbose=1)



In [20]:
np.save(path_results+'filenames_test_vgg19.npy', batches_test.filenames)
np.save(path_results+'probs_test_vgg19.npy', probs)

In [26]:
submit = pd.DataFrame(np.clip(probs, (1-0.93)/9, 0.93),
                      columns=['c0','c1','c2','c3','c4','c5','c6','c7','c8','c9'])
submit['img'] = [x.split('/')[-1] for x in batches_test.filenames]
submit.to_csv(path_submit+'conv4.gz', index=False, compression='gzip')
FileLink(path_submit+'conv4.gz')

In [25]:
submit.head()

Unnamed: 0,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,img
0,0.060385,0.007778,0.009072,0.007778,0.857022,0.007778,0.007778,0.007778,0.013282,0.059394,img_67404.jpg
1,0.007778,0.007778,0.007778,0.007778,0.007778,0.93,0.007778,0.007778,0.007778,0.007778,img_85466.jpg
2,0.007778,0.007778,0.007778,0.007778,0.007778,0.865579,0.007778,0.007778,0.007778,0.133331,img_25938.jpg
3,0.007778,0.007778,0.007778,0.007778,0.007778,0.93,0.007778,0.007778,0.007778,0.007778,img_17138.jpg
4,0.007778,0.007778,0.007778,0.007778,0.007778,0.93,0.007778,0.007778,0.007778,0.007778,img_59439.jpg
