In [1]:
%matplotlib inline
import utils_ted
from utils_ted import *
import vgg16bn
from vgg16bn import *

Using TensorFlow backend.
  return f(*args, **kwds)


## Setup

In [2]:
path = "../data/redux/"
model_path = path+"models/"
if not os.path.exists(model_path): os.makedirs(model_path)

In [3]:
batch_size=64

In [4]:
batches = get_batches(path+'train', batch_size=batch_size) 
val_batches = get_batches(path+'valid', shuffle=False, batch_size=batch_size) 

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [5]:
trn_classes, val_classes, trn_labels, val_labels, trn_filenames, val_filenames, test_filenames = get_classes(path) 

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In this notebook we're going to create an ensemble of models and use their average as our predictions. For each ensemble, we're going to follow our usual fine-tuning steps:

1) Create a model that retrains just the last layer
2) Add this to a model containing all VGG layers except the last layer
3) Fine-tune just the dense layers of this model (pre-computing the convolutional layers)
4) Add data augmentation, fine-tuning the dense layers without pre-computation.

So first, we need to create our VGG model and pre-compute the output of the conv layers:

In [6]:
model = Vgg16BN().model
conv_layers, fc_layers = split_at(model, Conv2D)

In [7]:
conv_model = Sequential(conv_layers)

In [8]:
conv_trn_features = conv_model.predict_generator(batches, steps=ceil(batches.n/batches.batch_size), verbose=0)
conv_val_features = conv_model.predict_generator(val_batches, steps=ceil(val_batches.n/val_batches.batch_size), verbose=0)

In [9]:
save_array(model_path + 'train_vgg16_convlayer_features.bc', conv_trn_features)
save_array(model_path + 'valid_vgg16_convlayer_features.bc', conv_val_features)

In the future we can just load these precomputed features:

In [10]:
conv_trn_features = load_array(model_path + 'train_vgg16_convlayer_features.bc')
conv_val_features = load_array(model_path + 'valid_vgg16_convlayer_features.bc')

We can also save some time by pre-computing the training and validation arrays with the image decoding and resizing already done:

...and let's also grab the test data, for when we need to submit:

In [11]:
trn = get_data(path+'train')
val = get_data(path+'valid')

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [12]:
save_array(model_path+'train_data.bc', trn)
save_array(model_path+'valid_data.bc', val)

In [13]:
test = get_data(path+'valid')
save_array(model_path+'test_data.bc', test)

Found 2000 images belonging to 2 classes.


In the future we can just load these resized images:

In [14]:
trn = load_array(model_path+'train_data.bc')
val = load_array(model_path+'valid_data.bc')

MemoryError: 

In [None]:
test = load_array(model_path+'test_data.bc')

Finally, we can precompute the output of all but the last dropout and dense layers, for creating the first stage of the model:

In [None]:
model.pop()
model.pop()

In [None]:
ll_trn_features = model.predict_generator(batches, steps=ceil(batches.n/batches.batch_size), verbose=0)
ll_val_features = model.predict_generator(val_batches, steps=ceil(val_batches.n/val_batches.batch_size), verbose=0)

In [None]:
save_array(model_path + 'train_ll_features.bc', ll_trn_features)
save_array(model_path + 'valid_ll_features.bc', ll_val_features)

In [None]:
ll_trn_features = load_array(model_path + 'train_ll_features.bc')
ll_val_features = load_array(model_path + 'valid_ll_features.bc')

## Last layer

**The functions automate creating a model that trains the last layer from scratch, and then adds those new layers on to the main model.**

In [None]:
def get_ll_layers():
    return [
        BatchNormalization(input_shape=(4096,)),
        Dropout(0.5),
        Dense(2, activation='softmax')
    ]

In [None]:
def train_last_layer(i):
    # 1) Create a model that retrains just the last layer
    ll_layers = get_ll_layers()
    ll_model = Sequential(ll_layers)
    for layer in ll_model.layers: layer.trainable=True
    ll_model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    ll_model.optimizer.lr = 1e-5
    ll_model.fit(ll_trn_features, trn_labels, validation_data=(ll_val_features, val_labels), epochs=12, verbose=2)
    ll_model.optimizer.lr = 1e-7
    ll_model.fit(ll_trn_features, trn_labels, validation_data=(ll_val_features, val_labels), epochs=1, verbose=2)
    ll_model.save_weights(model_path + 'll_bn' + i + '.h5')
    
    # 2) Add this to a model containing all VGG layers except the last layer
    vgg = Vgg16BN()
    model = vgg.model
    model.pop(); model.pop(); model.pop()
    for layer in model.layers:  layer.trainable = False
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    
    ll_layers = get_ll_layers()
    for layer in ll_layers:  model.add(layer)
    for l1, l2 in zip(ll_model.layers, model.layers[-3:]):
        l2.set_weights(l1.get_weights())
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    model.save_weights(model_path + 'bn' + i + '.h5')
    return model

## Dense model

In [None]:
def get_conv_model(model):
    layers = model.layers
    last_conv_idx = [idx for idx, layer in enumerate(layers) if type(layer) is Conv2D][-1]
    
    conv_layers = layers[:last_conv_idx+1]
    conv_model = Sequential(conv_layers)
    fc_layers = layers[last_conv_idx+1:]
    return conv_model, fc_layers, last_conv_idx

In [None]:
def get_fc_layers(p, in_shape):
    return [
        MaxPooling2D(input_shape=in_shape),
        Flatten(),
        Dense(4096, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(4096, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(2, activation='softmax')
    ]

In [None]:
def train_dense_layers(i, model):
    # 3) Fine-tune just the dense layers of this model (pre-computing the convolutional layers)
    conv_model, fc_layers, last_conv_idx = get_conv_model(model)
    conv_shape = conv_model.output_shape[1:]
    fc_model = Sequential(get_fc_layers(0.5, conv_shape))
    for l1, l2 in zip(fc_model.layers, fc_layers):
        l1.set_weights(l2.get_weights())
        l1.trainable = True
    fc_model.compile(Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
    fc_model.fit(conv_trn_features, trn_labels, validation_data=[conv_val_features, val_labels], epochs=2, verbose=2)
    
    # 4) Add data augmentation, fine-tuning the dense layers without pre-computation
    gen_trn = image.ImageDataGenerator(rotation_range=10, width_shift_range=0.05, zoom_range=0.05, 
              channel_shift_range=10, height_shift_range=0.05, shear_range=0.05, horizontal_flip=True)
    gen_val = image.DataGenerator()
    batches = gen_trn.flow(trn, trn_labels, batch_size=batch_size)
    val_batches = gen_val.flow(val, val_labels, shuffle=False, batch_size=batch_size)
    
    for layer in conv_model.layers: layer.trainable = False
    for layer in get_fc_layers(0.5, conv_shape): conv_model.add(layer)
    for l1, l2 in zip(conv_model.layers[last_conv_idx+1:], fc_model.layers):
        l1.set_weights(l2.get_weights())
        l1.trainable = True
    conv_model.compile(Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
    conv_model.save_weights(model_path+'no_dropout_bn' + i + '.h5')
    conv_model.fit_generator(batches, steps_per_epoch = ceil(batches.n/batches.batch_size),
                             validation_data=val_batches, validation_steps=ceil(val_batches.n/val_batches.batch_size),
                            epochs=1, verbose=2)
    for layer in conv_model.layers[16:]: layer.trainable = True
    conv_model.compile(Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
    conv_model.fit_generator(batches, steps_per_epoch = ceil(batches.n/batches.batch_size),
                             validation_data=val_batches, validation_steps=ceil(val_batches.n/val_batches.batch_size),
                            epochs=8, verbose=2)
    
    conv_model.optimizer.lr = 1e-7
    conv_model.fit_generator(batches, steps_per_epoch = ceil(batches.n/batches.batch_size),
                             validation_data=val_batches, validation_steps=ceil(val_batches.n/val_batches.batch_size),
                            epochs=10, verbose=2)
    conv_model.save_weights(model_path + 'aug' + i + '.h5')

## Build ensemble

In [None]:
for i in range(5):
    i = str(i)
    model = train_last_layer(i)
    train_dense_layers(i, model)

## Combine ensemble and test

In [None]:
ens_model =vgg_ft(2)
for layer in ens_model.layers: layer.trainable = True

In [None]:
def get_ens_pred(arr, fname):
    ens_pred = []
    for i in range(5):
        ens_model.load_weights("{}{}{}.h5".format(model_path, fname, i))
        preds = ens_model.predict(arr, batch_size=batch_size)
        ens_pred.append(preds)
    return ens_pred

In [None]:
val_preds2 = get_ens_pred(val, 'aug')

In [None]:
val_avg_preds2 = np.stack(val_preds).mean(axis=0)

In [None]:
categorical_accuracy(val_labels, val_avg_preds2).eval()