# Benchmarking Optimizers

In [None]:
%matplotlib inline

In [None]:
from tensorflow import keras
from tensorflow.keras import datasets
import matplotlib.pylab as plt
import numpy as np

## MNIST

In [None]:
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()

y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

x_train = keras.utils.normalize(x_train, axis=1)
x_test = keras.utils.normalize(x_test, axis=1)

x_train.shape, y_train.shape, x_test.shape, y_test.shape  

In [None]:
def train_mnist(optimizer):
    model = keras.Sequential([
        keras.layers.Flatten(input_shape=x_train[0].shape),
        keras.layers.Dense(250, activation='relu'),
        keras.layers.Dense(10, activation='softmax')
    ])

    model.compile(
        optimizer=optimizer,
        loss=keras.losses.categorical_crossentropy,
        metrics=['accuracy']
    )

    history = model.fit(x_train, y_train,
                    batch_size=128,
                    epochs=10,
                    verbose=False,
                    shuffle=True,
                    validation_data=(x_test, y_test))

    return history, model

In [None]:
mnist = {
  'sgd': {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': [], 'history': []},
  'rmsprop': {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': [], 'history': []},
  'adagrad': {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': [], 'history': []},
  'adam': {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': [], 'history': []}
}
for optimizer in ['sgd', 'rmsprop', 'adagrad', 'adam']:
    print('--- {} ---'.format(optimizer))
    for i in range(10):
        print('starting ', i)
        history, model = train_mnist(optimizer)
        train_loss, train_accuracy = model.evaluate(x_train, y_train, verbose=False)
        val_loss, val_accuracy = model.evaluate(x_test, y_test, verbose=False)
        mnist[optimizer]['loss'].append(train_loss)
        mnist[optimizer]['acc'].append(train_accuracy)
        mnist[optimizer]['val_loss'].append(val_loss)
        mnist[optimizer]['val_acc'].append(val_accuracy)
        mnist[optimizer]['history'].append(history)

In [None]:
for opt in ['sgd', 'rmsprop', 'adagrad', 'adam']:
    for key in ['acc', 'val_acc', 'loss', 'val_loss']:
        print(opt, key, np.mean(mnist[opt][key]))

In [None]:
plt.figure(figsize=(20,4))
for i in range(4):
    plt.subplot(1,4,i+1)
    key = ['acc', 'val_acc', 'loss', 'val_loss'][i]
    title = ['Training Accuracy on MNIST', 'Validation Accuracy on MNIST', 'Training Loss on MNIST', 'Validation Loss on MNIST'][i]
    for opt in ['sgd', 'rmsprop', 'adagrad', 'adam']:
        hist = np.zeros(10)
        for h in mnist[opt]['history']:
              hist += np.array(h.history[key])
        mean = hist / 10
        plt.plot(mean, label=opt)
    plt.legend()
    plt.title(title)
plt.plot()

## CIFAR10

In [None]:
(x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data()

y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

x_train = keras.utils.normalize(x_train, axis=1)
x_test = keras.utils.normalize(x_test, axis=1)

x_train.shape, y_train.shape, x_test.shape, y_test.shape

In [None]:
def train_cifar10(optimizer):
    model = keras.Sequential([
        keras.layers.Conv2D(32, kernel_size=(2, 2), padding='same', activation='relu', input_shape=x_train[0].shape),
        keras.layers.Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(3,3)),
        keras.layers.Dropout(0.3),

        keras.layers.Conv2D(16, kernel_size=(2, 2), padding='same', activation='relu'),
        keras.layers.Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2,2)),
        keras.layers.Dropout(0.3),

        keras.layers.Flatten(),
        keras.layers.Dense(1024, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(10, activation='softmax')
    ])

    model.compile(
        optimizer=optimizer,
        loss=keras.losses.categorical_crossentropy,
        metrics=['accuracy']
    )

    history = model.fit(x_train, y_train,
                      batch_size=128,
                      epochs=30,
                      shuffle=True,
                      verbose=False,
                      validation_data=(x_test, y_test))

    return history, model

In [None]:
cifar10 = {
    'sgd': {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': [], 'history': []},
    'rmsprop': {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': [], 'history': []},
    'adagrad': {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': [], 'history': []},
    'adam': {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': [], 'history': []}
  }
for optimizer in ['sgd', 'rmsprop', 'adagrad', 'adam']:
    print('--- {} ---'.format(optimizer))
    for i in range(10):
        print('starting ', i)
        history, model = train_cifar10(optimizer)
        train_loss, train_accuracy = model.evaluate(x_train, y_train, verbose=False)
        val_loss, val_accuracy = model.evaluate(x_test, y_test, verbose=False)
        cifar10[optimizer]['loss'].append(train_loss)
        cifar10[optimizer]['acc'].append(train_accuracy)
        cifar10[optimizer]['val_loss'].append(val_loss)
        cifar10[optimizer]['val_acc'].append(val_accuracy)
        cifar10[optimizer]['history'].append(history)

In [None]:
plt.figure(figsize=(20,4))
for i in range(4):
    plt.subplot(1,4,i+1)
    key = ['acc', 'val_acc', 'loss', 'val_loss'][i]
    title = ['Training Accuracy on CIFAR10', 'Validation Accuracy on CIFAR10', 'Training Loss on CIFAR10', 'Validation Loss on CIFAR10'][i]
    for opt in ['sgd', 'rmsprop', 'adagrad', 'adam']:
        hist = np.zeros(30)
        for h in cifar10[opt]['history']:
          hist += np.array(h.history[key])
        mean = hist / 10
        plt.plot(mean, label=opt)
    plt.legend()
    plt.title(title)
plt.plot()

In [None]:
for opt in ['sgd', 'rmsprop', 'adagrad', 'adam']:
    for key in ['acc', 'val_acc', 'loss', 'val_loss']:
        print(opt, key, np.mean(cifar10[opt][key]))

## Text classification with preprocessed text: Movie reviews (8k)

https://www.tensorflow.org/tutorials/keras/text_classification

In [None]:
import tensorflow_datasets as tfds
(train_data, test_data), info = tfds.load(
    'imdb_reviews/subwords8k',
    split = (tfds.Split.TRAIN, tfds.Split.TEST),
    as_supervised=True,
    with_info=True)

encoder = info.features['text'].encoder

BUFFER_SIZE = 1000

train_batches = (
    train_data
    .shuffle(BUFFER_SIZE)
    .padded_batch(32, train_data.output_shapes))

test_batches = (
    test_data
    .padded_batch(32, train_data.output_shapes))

In [None]:
def train_imdb(optimizer):
    model = keras.Sequential([
        keras.layers.Embedding(encoder.vocab_size, 16),
        keras.layers.GlobalAveragePooling1D(),
        keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    history = model.fit(train_batches,
                        epochs=10,
                        verbose=False,
                        shuffle=True,
                        validation_data=test_batches,
                        validation_steps=30)

    return history, model

In [None]:
imdb = {
  'sgd': {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': [], 'history': []},
  'rmsprop': {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': [], 'history': []},
  'adagrad': {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': [], 'history': []},
  'adam': {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': [], 'history': []}
}
for optimizer in ['sgd', 'rmsprop', 'adagrad', 'adam']:
    print('--- {} ---'.format(optimizer))
    for i in range(10):
        print('starting ', i)
        history, model = train_imdb(optimizer)
        train_loss, train_accuracy = model.evaluate(train_batches, verbose=False)
        val_loss, val_accuracy = model.evaluate(test_batches, verbose=False)
        imdb[optimizer]['loss'].append(train_loss)
        imdb[optimizer]['acc'].append(train_accuracy)
        imdb[optimizer]['val_loss'].append(val_loss)
        imdb[optimizer]['val_acc'].append(val_accuracy)
        imdb[optimizer]['history'].append(history)

In [None]:
for opt in ['sgd', 'rmsprop', 'adagrad', 'adam']:
    for key in ['acc', 'val_acc', 'loss', 'val_loss']:
        print(opt, key, np.mean(imdb[opt][key]))

In [None]:
plt.figure(figsize=(20,4))
for i in range(4):
    plt.subplot(1,4,i+1)
    key = ['acc', 'val_acc', 'loss', 'val_loss'][i]
    title = ['Training Accuracy on IMDB 8K', 'Validation Accuracy on IMDB 8K', 'Training Loss on IMDB 8K', 'Validation Loss on IMDB 8K'][i]
    for opt in ['sgd', 'rmsprop', 'adagrad', 'adam']:
        hist = np.zeros(10)
        for h in imdb[opt]['history']:
            hist += np.array(h.history[key])
        mean = hist / 10
        plt.plot(mean, label=opt)
    plt.legend()
    plt.title(title)
plt.plot()