The most used and well-known optimizer for deep learning is Stochastic Gradient Descent (SGD). Other optimizers are variants of SGD that try to speed up convergence by adding heuristics.

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import SGD, Adadelta, Adam, RMSprop, Adagrad, Nadam, Adamax

In [None]:
n_classes = 5

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1,
                                   horizontal_flip=True,
                                   vertical_flip=False,
                                   validation_split=0.25)

test_datagen = ImageDataGenerator(rescale=1./255)

training_set = train_datagen.flow_from_directory('data',
                                                target_size = (150,150),
                                                 batch_size = batch_size,
                                                 class_mode = 'categorical',
                                                 subset = "training")

validation_set = train_datagen.flow_from_directory('data',
                                            target_size = (150,150),
                                            batch_size = batch_size,
                                            class_mode = 'categorical',
                                            subset = "validation")

Next, we define a function that creates the model:

In [None]:
def create_model(opt):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same', input_shape = (150, 150,3)))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))
    return model

Also, we need to create a function that defines the callbacks we want to use during training:

In [None]:
def create_callbacks(opt):
    callbacks = [EarlyStopping(monitor='val_acc', patience=5, verbose=2),
             ModelCheckpoint('checkpoints/weights.{epoch:02d}-'+opt+'.h5', save_best_only=False, verbose=True),
             TensorBoard()]
    return callbacks

Create a dict of the optimizers we want to try:

In [None]:
 opts = dict({
 'sgd': SGD(),
 'sgd-0001': SGD(lr=0.0001, decay=0.00001),
 'adam': Adam(),
 'adam': Adam(lr=0.0001),
 'adadelta': Adadelta(),
 'rmsprop': RMSprop(),
 'rmsprop-0001': RMSprop(lr=0.0001),
 'nadam': Nadam(),
 'adamax': Adamax()
})

Instead of implementing our own script, we can also use Hyperopt to run different optimizers; see the 04.grid search for parameter tuning.ipynb.

We train our networks and store the results:

In [None]:
n_epochs = 1000
batch_size = 128

results = []
# Loop through the optimizers
for opt in opts:
    model = create_model(opt)
    callbacks = create_callbacks(opt)
    model.compile(loss='categorical_crossentropy', optimizer=opts[opt], metrics=['accuracy'])
    hist = model.fit(training_set, batch_size=batch_size, epochs=n_epochs,
    validation_data = validation_set,
    verbose=1,
    callbacks=callbacks)
    
    best_epoch = np.argmax(hist.history['val_acc'])
    best_acc = hist.history['val_acc'][best_epoch] 
    best_model = create_model(opt)
    
    # Load the model weights with the highest validation accuracy 
    best_model.load_weights('checkpoints/weights.{:02d}-{}.h5'.format(best_epoch, opt))
    best_model.compile(loss='mse', optimizer=opts[opt], metrics=['accuracy'])
    
    score = best_model.evaluate(validation_set, verbose=0)
    results.append([opt, best_epoch, best_acc, score[1]])

Compare the results:

In [None]:
result = pd.DataFrame(results)
result.columns = ['optimizer', 'epochs', 'val_accuracy', 'test_last', 'test_accuracy']
result