In [1]:
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import RMSprop
import os
import math
from time import time
import datetime
from keras.callbacks import TensorBoard, TerminateOnNaN, LearningRateScheduler, Callback
import tensorflow as tf
from keras import backend as K
import multiprocessing as mp
import numpy as np


Using TensorFlow backend.


# Preprocessing

In [2]:
batch_size = 32
num_classes = 10
epochs = 100
data_augmentation = True
num_predictions = 20
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'keras_cifar10_trained_model.h5'

# The data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [3]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# Tensorboard

In [2]:
tensorboard = TensorBoard(log_dir="logs/0/{}".format(time()))

In [6]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = RMSprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [7]:
start = time()

if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True, callbacks=[tensorboard])
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        zca_epsilon=1e-06,  # epsilon for ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        # randomly shift images horizontally (fraction of total width)
        width_shift_range=0.1,
        # randomly shift images vertically (fraction of total height)
        height_shift_range=0.1,
        shear_range=0.,  # set range for random shear
        zoom_range=0.,  # set range for random zoom
        channel_shift_range=0.,  # set range for random channel shifts
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,  # randomly flip images
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format=None,
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.0)

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    model.fit_generator(datagen.flow(x_train, y_train,
                                     batch_size=batch_size),
                        epochs=epochs,
                        validation_data=(x_test, y_test),
                        workers=4,
                        steps_per_epoch=len(x_train)/batch_size, 
                        callbacks=[tensorboard])

Using real-time data augmentation.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100

In [8]:
# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=1)

end = time()
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
print('Runtime:', str(end-start))

Test loss: 0.7649821676254273
Test accuracy: 0.7514
Runtime: 1324.4300031661987


# Terminate on NaN

In [13]:
tensorboard = TensorBoard(log_dir="logs/nan/{}".format(time()))
terminateonnan = TerminateOnNaN()

In [14]:
model_nan = Sequential()
model_nan.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model_nan.add(Activation('relu'))
model_nan.add(Conv2D(32, (3, 3)))
model_nan.add(Activation('relu'))
model_nan.add(MaxPooling2D(pool_size=(2, 2)))
model_nan.add(Dropout(0.25))

model_nan.add(Conv2D(64, (3, 3), padding='same'))
model_nan.add(Activation('relu'))
model_nan.add(Conv2D(64, (3, 3)))
model_nan.add(Activation('relu'))
model_nan.add(MaxPooling2D(pool_size=(2, 2)))
model_nan.add(Dropout(0.25))

model_nan.add(Flatten())
model_nan.add(Dense(512))
model_nan.add(Activation('relu'))
model_nan.add(Dropout(0.5))
model_nan.add(Dense(num_classes))
model_nan.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = RMSprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model_nan.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [15]:
start = time()

if not data_augmentation:
    print('Not using data augmentation.')
    model_nan.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True, callbacks=[tensorboard, terminateonnan])
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        zca_epsilon=1e-06,  # epsilon for ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        # randomly shift images horizontally (fraction of total width)
        width_shift_range=0.1,
        # randomly shift images vertically (fraction of total height)
        height_shift_range=0.1,
        shear_range=0.,  # set range for random shear
        zoom_range=0.,  # set range for random zoom
        channel_shift_range=0.,  # set range for random channel shifts
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,  # randomly flip images
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format=None,
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.0)

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    model_nan.fit_generator(datagen.flow(x_train, y_train,
                                     batch_size=batch_size),
                        epochs=epochs,
                        validation_data=(x_test, y_test),
                        workers=4,
                        steps_per_epoch=len(x_train)/batch_size, 
                        callbacks=[tensorboard, terminateonnan])

Using real-time data augmentation.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100

In [16]:
# Score trained model.
scores = model_nan.evaluate(x_test, y_test, verbose=1)

end = time()
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
print('Runtime:', str(end-start))

Test loss: 0.7133792991638184
Test accuracy: 0.7628
Runtime: 1326.8641436100006


### Guaranteeing a NaN Loss
To test when this callback gets triggered, we need to guarantee that a NaN loss will occur. We can do this by replaceing some of the values in the training data with NaNs.

https://stackoverflow.com/questions/37232782/nan-loss-when-training-regression-network

In [33]:
x_train_nan = x_train.copy()
x_train_nan[x_train_nan >= 0] = np.nan
x_train_nan.shape

(50000, 32, 32, 3)

In [34]:
np.isnan(x_train_nan).any()

True

In [36]:
tensorboard = TensorBoard(log_dir="logs/nan/{}".format(time()))
terminateonnan = TerminateOnNaN()

model_nan_2 = Sequential()
model_nan_2.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model_nan_2.add(Activation('relu'))
model_nan_2.add(Conv2D(32, (3, 3)))
model_nan_2.add(Activation('relu'))
model_nan_2.add(MaxPooling2D(pool_size=(2, 2)))
model_nan_2.add(Dropout(0.25))

model_nan_2.add(Conv2D(64, (3, 3), padding='same'))
model_nan_2.add(Activation('relu'))
model_nan_2.add(Conv2D(64, (3, 3)))
model_nan_2.add(Activation('relu'))
model_nan_2.add(MaxPooling2D(pool_size=(2, 2)))
model_nan_2.add(Dropout(0.25))

model_nan_2.add(Flatten())
model_nan_2.add(Dense(512))
model_nan_2.add(Activation('relu'))
model_nan_2.add(Dropout(0.5))
model_nan_2.add(Dense(num_classes))
model_nan_2.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = RMSprop(lr=0.0001, decay=1e-6) 

# Let's train the model using RMSprop
model_nan_2.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [37]:
start = time()

if not data_augmentation:
    print('Not using data augmentation.')
    model_nan_2.fit(x_train_nan, y_train,
              batch_size=batch_size,
              epochs=300,
              validation_data=(x_test, y_test),
              shuffle=True, callbacks=[tensorboard, terminateonnan])
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        zca_epsilon=1e-06,  # epsilon for ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        # randomly shift images horizontally (fraction of total width)
        width_shift_range=0.1,
        # randomly shift images vertically (fraction of total height)
        height_shift_range=0.1,
        shear_range=0.,  # set range for random shear
        zoom_range=0.,  # set range for random zoom
        channel_shift_range=0.,  # set range for random channel shifts
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,  # randomly flip images
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format=None,
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.0)

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    model_nan_2.fit_generator(datagen.flow(x_train_nan, y_train,
                                     batch_size=batch_size),
                        epochs=epochs,
                        validation_data=(x_test, y_test),
                        workers=4,
                        steps_per_epoch=len(x_train)/batch_size, 
                        callbacks=[tensorboard, terminateonnan])

Using real-time data augmentation.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100


Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [38]:
# Score trained model.
scores = model_nan_2.evaluate(x_test, y_test, verbose=1)

end = time()
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
print('Runtime:', str(end-start))

Test loss: 2.302585478210449
Test accuracy: 0.1
Runtime: 1342.4824059009552


# Learning Rate Scheduler

In [18]:
tensorboard = TensorBoard(log_dir="logs/lr_scheduler/{}".format(time()))

# decay schedule adapted from https://machinelearningmastery.com/using-learning-rate-schedules-deep-learning-models-python-keras/
def schedule(epoch, lr):
    drop = 0.9
    epochs_drop = 10.0
    return lr * math.pow(drop, math.floor((1+epoch)/epochs_drop))
    
lrscheduler = LearningRateScheduler(schedule, verbose=1)

In [19]:
model_lr = Sequential()
model_lr.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model_lr.add(Activation('relu'))
model_lr.add(Conv2D(32, (3, 3)))
model_lr.add(Activation('relu'))
model_lr.add(MaxPooling2D(pool_size=(2, 2)))
model_lr.add(Dropout(0.25))

model_lr.add(Conv2D(64, (3, 3), padding='same'))
model_lr.add(Activation('relu'))
model_lr.add(Conv2D(64, (3, 3)))
model_lr.add(Activation('relu'))
model_lr.add(MaxPooling2D(pool_size=(2, 2)))
model_lr.add(Dropout(0.25))

model_lr.add(Flatten())
model_lr.add(Dense(512))
model_lr.add(Activation('relu'))
model_lr.add(Dropout(0.5))
model_lr.add(Dense(num_classes))
model_lr.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = RMSprop(lr=0.0001) # , decay=1e-6

# Let's train the model using RMSprop
model_lr.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [20]:
start = time()

if not data_augmentation:
    print('Not using data augmentation.')
    model_lr.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True, callbacks=[tensorboard, lrscheduler])
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        zca_epsilon=1e-06,  # epsilon for ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        # randomly shift images horizontally (fraction of total width)
        width_shift_range=0.1,
        # randomly shift images vertically (fraction of total height)
        height_shift_range=0.1,
        shear_range=0.,  # set range for random shear
        zoom_range=0.,  # set range for random zoom
        channel_shift_range=0.,  # set range for random channel shifts
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,  # randomly flip images
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format=None,
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.0)

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    model_lr.fit_generator(datagen.flow(x_train, y_train,
                                     batch_size=batch_size),
                        epochs=epochs,
                        validation_data=(x_test, y_test),
                        workers=4,
                        steps_per_epoch=len(x_train)/batch_size, 
                        callbacks=[tensorboard, lrscheduler])

Using real-time data augmentation.
Epoch 1/100

Epoch 00001: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 2/100

Epoch 00002: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 3/100

Epoch 00003: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 4/100

Epoch 00004: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 5/100

Epoch 00005: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 6/100

Epoch 00006: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 7/100

Epoch 00007: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 8/100

Epoch 00008: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 9/100

Epoch 00009: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 10/100

Epoch 00010: LearningRateScheduler setting learning rate to 8.999999772640876e-05.
Epoc

Epoch 38/100

Epoch 00038: LearningRateScheduler setting learning rate to 2.465034885972273e-07.
Epoch 39/100

Epoch 00039: LearningRateScheduler setting learning rate to 1.7970103357356496e-07.
Epoch 40/100

Epoch 00040: LearningRateScheduler setting learning rate to 1.1790184756819144e-07.
Epoch 41/100

Epoch 00041: LearningRateScheduler setting learning rate to 7.735540265940699e-08.
Epoch 42/100

Epoch 00042: LearningRateScheduler setting learning rate to 5.0752879307225384e-08.
Epoch 43/100

Epoch 00043: LearningRateScheduler setting learning rate to 3.3298965229988654e-08.
Epoch 44/100

Epoch 00044: LearningRateScheduler setting learning rate to 2.1847450009104818e-08.
Epoch 45/100

Epoch 00045: LearningRateScheduler setting learning rate to 1.4334111991765042e-08.
Epoch 46/100

Epoch 00046: LearningRateScheduler setting learning rate to 9.404610670227242e-09.
Epoch 47/100

Epoch 00047: LearningRateScheduler setting learning rate to 6.170364899027448e-09.
Epoch 48/100

Epoch 0004

Epoch 76/100

Epoch 00076: LearningRateScheduler setting learning rate to 1.4086720522650144e-16.
Epoch 77/100

Epoch 00077: LearningRateScheduler setting learning rate to 6.737634696408825e-17.
Epoch 78/100

Epoch 00078: LearningRateScheduler setting learning rate to 3.222589888475881e-17.
Epoch 79/100

Epoch 00079: LearningRateScheduler setting learning rate to 1.5413547052794303e-17.
Epoch 80/100

Epoch 00080: LearningRateScheduler setting learning rate to 6.635026950681314e-18.
Epoch 81/100

Epoch 00081: LearningRateScheduler setting learning rate to 2.8561615772941613e-18.
Epoch 82/100

Epoch 00082: LearningRateScheduler setting learning rate to 1.229483914110365e-18.
Epoch 83/100

Epoch 00083: LearningRateScheduler setting learning rate to 5.292525196584083e-19.
Epoch 84/100

Epoch 00084: LearningRateScheduler setting learning rate to 2.2782586491921244e-19.
Epoch 85/100

Epoch 00085: LearningRateScheduler setting learning rate to 9.807156159725298e-20.
Epoch 86/100

Epoch 00086:

In [21]:
# Score trained model.
scores = model_lr.evaluate(x_test, y_test, verbose=1)

end = time()
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
print('Runtime:', str(end-start))

Test loss: 0.8506660752296448
Test accuracy: 0.704
Runtime: 1343.6695251464844


# Custom Callbacks

In [26]:
tensorboard = TensorBoard(log_dir="logs/custom/{}".format(time()))

# Example from https://keras.io/callbacks/
class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        
losshistory = LossHistory()

In [27]:
model_custom = Sequential()
model_custom.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model_custom.add(Activation('relu'))
model_custom.add(Conv2D(32, (3, 3)))
model_custom.add(Activation('relu'))
model_custom.add(MaxPooling2D(pool_size=(2, 2)))
model_custom.add(Dropout(0.25))

model_custom.add(Conv2D(64, (3, 3), padding='same'))
model_custom.add(Activation('relu'))
model_custom.add(Conv2D(64, (3, 3)))
model_custom.add(Activation('relu'))
model_custom.add(MaxPooling2D(pool_size=(2, 2)))
model_custom.add(Dropout(0.25))

model_custom.add(Flatten())
model_custom.add(Dense(512))
model_custom.add(Activation('relu'))
model_custom.add(Dropout(0.5))
model_custom.add(Dense(num_classes))
model_custom.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = RMSprop(lr=0.0001, decay=1e-6) 

# Let's train the model using RMSprop
model_custom.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [28]:
start = time()

if not data_augmentation:
    print('Not using data augmentation.')
    model_custom.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True, callbacks=[tensorboard, losshistory])
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        zca_epsilon=1e-06,  # epsilon for ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        # randomly shift images horizontally (fraction of total width)
        width_shift_range=0.1,
        # randomly shift images vertically (fraction of total height)
        height_shift_range=0.1,
        shear_range=0.,  # set range for random shear
        zoom_range=0.,  # set range for random zoom
        channel_shift_range=0.,  # set range for random channel shifts
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,  # randomly flip images
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format=None,
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.0)

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    model_custom.fit_generator(datagen.flow(x_train, y_train,
                                     batch_size=batch_size),
                        epochs=epochs,
                        validation_data=(x_test, y_test),
                        workers=4,
                        steps_per_epoch=len(x_train)/batch_size, 
                        callbacks=[tensorboard, losshistory])

Using real-time data augmentation.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100

In [29]:
print(losshistory.losses)

[2.3451781, 2.2898335, 2.3097787, 2.285324, 2.3232164, 2.296553, 2.2889848, 2.2912643, 2.2978754, 2.3165379, 2.3292284, 2.3229232, 2.332654, 2.2930734, 2.315714, 2.3411372, 2.2935064, 2.2925484, 2.2936068, 2.3126936, 2.2976334, 2.2727056, 2.3210983, 2.309294, 2.2976635, 2.2749472, 2.2717102, 2.285037, 2.2819738, 2.3044312, 2.295658, 2.2842374, 2.3418126, 2.3070555, 2.2933989, 2.3090186, 2.2910972, 2.2963629, 2.3068137, 2.296558, 2.3261833, 2.2938652, 2.2919254, 2.3071947, 2.2555735, 2.326353, 2.2834296, 2.3140798, 2.2961266, 2.2666984, 2.2795386, 2.2751698, 2.2945352, 2.2660089, 2.2197013, 2.289999, 2.3490124, 2.3531585, 2.28486, 2.2679143, 2.2990327, 2.2598495, 2.2455344, 2.3502173, 2.2851923, 2.2619162, 2.3041801, 2.2716928, 2.2897806, 2.3057585, 2.305815, 2.262783, 2.3182595, 2.3326488, 2.2454364, 2.2600517, 2.3255298, 2.2831776, 2.2754986, 2.2673552, 2.2762022, 2.2793233, 2.2858722, 2.2538147, 2.3345137, 2.2674353, 2.269051, 2.2861621, 2.2947426, 2.2490458, 2.2803817, 2.2736723, 2.

In [30]:
# Score trained model.
scores = model_custom.evaluate(x_test, y_test, verbose=1)

end = time()
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
print('Runtime:', str(end-start))

Test loss: 0.717925118303299
Test accuracy: 0.7592
Runtime: 1349.572916984558


As a second example, let's look at tracking the learning rate scheduler:

In [47]:
tensorboard = TensorBoard(log_dir="logs/custom/{}".format(time()))

# Method from https://github.com/keras-team/keras/issues/7874
class LearningRateHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.lr_history = []
        
    def on_epoch_end(self, epoch, logs=None):
        self.lr_history.append(K.eval(self.model.optimizer.lr))
        
lrhistory = LearningRateHistory()

def schedule(epoch, lr):
    drop = 0.9
    epochs_drop = 10.0
    return lr * math.pow(drop, math.floor((1+epoch)/epochs_drop))
    
lrscheduler = LearningRateScheduler(schedule, verbose=1)

In [48]:
model_custom = Sequential()
model_custom.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model_custom.add(Activation('relu'))
model_custom.add(Conv2D(32, (3, 3)))
model_custom.add(Activation('relu'))
model_custom.add(MaxPooling2D(pool_size=(2, 2)))
model_custom.add(Dropout(0.25))

model_custom.add(Conv2D(64, (3, 3), padding='same'))
model_custom.add(Activation('relu'))
model_custom.add(Conv2D(64, (3, 3)))
model_custom.add(Activation('relu'))
model_custom.add(MaxPooling2D(pool_size=(2, 2)))
model_custom.add(Dropout(0.25))

model_custom.add(Flatten())
model_custom.add(Dense(512))
model_custom.add(Activation('relu'))
model_custom.add(Dropout(0.5))
model_custom.add(Dense(num_classes))
model_custom.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = RMSprop(lr=0.0001, decay=1e-6) 

# Let's train the model using RMSprop
model_custom.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [49]:
start = time()

if not data_augmentation:
    print('Not using data augmentation.')
    model_custom.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True, callbacks=[tensorboard, lrhistory, lrscheduler])
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        zca_epsilon=1e-06,  # epsilon for ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        # randomly shift images horizontally (fraction of total width)
        width_shift_range=0.1,
        # randomly shift images vertically (fraction of total height)
        height_shift_range=0.1,
        shear_range=0.,  # set range for random shear
        zoom_range=0.,  # set range for random zoom
        channel_shift_range=0.,  # set range for random channel shifts
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,  # randomly flip images
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format=None,
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.0)

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    model_custom.fit_generator(datagen.flow(x_train, y_train,
                                     batch_size=batch_size),
                        epochs=epochs,
                        validation_data=(x_test, y_test),
                        workers=4,
                        steps_per_epoch=len(x_train)/batch_size, 
                        callbacks=[tensorboard, lrhistory, lrscheduler])

Using real-time data augmentation.
Epoch 1/100

Epoch 00001: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 2/100

Epoch 00002: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 3/100

Epoch 00003: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 4/100

Epoch 00004: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 5/100

Epoch 00005: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 6/100

Epoch 00006: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 7/100

Epoch 00007: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 8/100

Epoch 00008: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 9/100

Epoch 00009: LearningRateScheduler setting learning rate to 9.999999747378752e-05.
Epoch 10/100

Epoch 00010: LearningRateScheduler setting learning rate to 8.999999772640876e-05.
Epoc

Epoch 38/100

Epoch 00038: LearningRateScheduler setting learning rate to 2.465034885972273e-07.
Epoch 39/100

Epoch 00039: LearningRateScheduler setting learning rate to 1.7970103357356496e-07.
Epoch 40/100

Epoch 00040: LearningRateScheduler setting learning rate to 1.1790184756819144e-07.
Epoch 41/100

Epoch 00041: LearningRateScheduler setting learning rate to 7.735540265940699e-08.
Epoch 42/100

Epoch 00042: LearningRateScheduler setting learning rate to 5.0752879307225384e-08.
Epoch 43/100

Epoch 00043: LearningRateScheduler setting learning rate to 3.3298965229988654e-08.
Epoch 44/100

Epoch 00044: LearningRateScheduler setting learning rate to 2.1847450009104818e-08.
Epoch 45/100

Epoch 00045: LearningRateScheduler setting learning rate to 1.4334111991765042e-08.
Epoch 46/100

Epoch 00046: LearningRateScheduler setting learning rate to 9.404610670227242e-09.
Epoch 47/100

Epoch 00047: LearningRateScheduler setting learning rate to 6.170364899027448e-09.
Epoch 48/100

Epoch 0004

Epoch 76/100

Epoch 00076: LearningRateScheduler setting learning rate to 1.4086720522650144e-16.
Epoch 77/100

Epoch 00077: LearningRateScheduler setting learning rate to 6.737634696408825e-17.
Epoch 78/100

Epoch 00078: LearningRateScheduler setting learning rate to 3.222589888475881e-17.
Epoch 79/100

Epoch 00079: LearningRateScheduler setting learning rate to 1.5413547052794303e-17.
Epoch 80/100

Epoch 00080: LearningRateScheduler setting learning rate to 6.635026950681314e-18.
Epoch 81/100

Epoch 00081: LearningRateScheduler setting learning rate to 2.8561615772941613e-18.
Epoch 82/100

Epoch 00082: LearningRateScheduler setting learning rate to 1.229483914110365e-18.
Epoch 83/100

Epoch 00083: LearningRateScheduler setting learning rate to 5.292525196584083e-19.
Epoch 84/100

Epoch 00084: LearningRateScheduler setting learning rate to 2.2782586491921244e-19.
Epoch 85/100

Epoch 00085: LearningRateScheduler setting learning rate to 9.807156159725298e-20.
Epoch 86/100

Epoch 00086:

In [50]:
# Score trained model.
scores = model_custom.evaluate(x_test, y_test, verbose=1)

end = time()
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
print('Runtime:', str(end-start))

Test loss: 0.8165272733688355
Test accuracy: 0.7139
Runtime: 1359.904479265213


In [51]:
print(lrhistory.lr_history)

[1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 1e-04, 9e-05, 8.1e-05, 7.29e-05, 6.561e-05, 5.9049e-05, 5.31441e-05, 4.782969e-05, 4.304672e-05, 3.8742048e-05, 3.4867844e-05, 2.8242954e-05, 2.2876793e-05, 1.8530201e-05, 1.5009463e-05, 1.2157665e-05, 9.8477085e-06, 7.976644e-06, 6.4610817e-06, 5.2334763e-06, 4.2391157e-06, 3.0903154e-06, 2.25284e-06, 1.6423204e-06, 1.1972515e-06, 8.7279636e-07, 6.3626857e-07, 4.638398e-07, 3.3813922e-07, 2.4650348e-07, 1.7970103e-07, 1.1790185e-07, 7.73554e-08, 5.075288e-08, 3.3298964e-08, 2.184745e-08, 1.4334112e-08, 9.40461e-09, 6.170365e-09, 4.0483767e-09, 2.65614e-09, 1.5684241e-09, 9.261387e-10, 5.4687566e-10, 3.229246e-10, 1.9068375e-10, 1.1259685e-10, 6.6487316e-11, 3.9260095e-11, 2.3182694e-11, 1.3689149e-11, 7.274975e-12, 3.86622e-12, 2.0546676e-12, 1.0919346e-12, 5.8029883e-13, 3.0839459e-13, 1.6389353e-13, 8.709974e-14, 4.6288372e-14, 2.459954e-14, 1.1765883e-14, 5.6275853e-15, 2.6916566e-15, 1.287411e-15, 6.157647e-16, 2.9451833e-16