In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

import numpy as np

In [2]:
batch_size = 64
shuffle_buffer_size = 100
num_classes = 10
epochs = 6

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_ds = train_ds.shuffle(shuffle_buffer_size).batch(batch_size)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_ds = test_ds.batch(batch_size)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [3]:
def get_model():
    model = Sequential()
    model.add(layers.Conv2D(32, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=input_shape))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))
    return model

In [4]:
def mse_grad(model, inputs, targets):
    with tf.GradientTape() as tape:
        loss_value = tf.keras.losses.mean_squared_error(y_true=targets, y_pred=model(inputs, training=True))
    return tape.gradient(loss_value, model.trainable_variables)

## HJB-AdaGrad Optimization

In [5]:
def hjb_optimize(model, train_ds, test_ds, r=100., epochs=6):
    train_loss_results = []
    train_accuracy_results = []

    for epoch in range(epochs):
        epoch_loss_avg = tf.keras.metrics.Mean()
        epoch_accuracy = tf.keras.metrics.CategoricalAccuracy()

        for x, y in train_ds:
            grads = mse_grad(model, x, y)
            grad_t = None
            for g in grads:
                if grad_t is None:
                    grad_t = tf.reshape(g, [-1])
                else:
                    grad_t = tf.concat([grad_t, tf.reshape(g, [-1])], 0)
            grad_norm_value = tf.norm(grad_t)
            loss_value = model.loss(y_true=y, y_pred=model(x, training=True))
            grad_t *= tf.sqrt(2*loss_value)
            grad_t /= grad_norm_value
            grad_t /= tf.sqrt(r)
            it = 0
            for g in grads:
                len_g = np.prod(g.shape)
                g = tf.reshape(grad_t[it:it+len_g], g.shape)
                it += len_g
            (model.optimizer).apply_gradients(zip(grads, model.trainable_variables))
#             print("Step: {},         Loss: {}".format(optimizer.iterations.numpy(), loss_value))

            # Track progress
            epoch_loss_avg.update_state(loss_value)
            epoch_accuracy.update_state(y, model(x, training=True))

        # End epoch
        train_loss_results.append(epoch_loss_avg.result())
        train_accuracy_results.append(epoch_accuracy.result())

        if epoch % 1 == 0:
            print("Epoch {:02d}: Loss: {:.3f}, Accuracy: {:.3%}".format(epoch+1,
                                                                        epoch_loss_avg.result(),
                                                                        epoch_accuracy.result()))

    test_accuracy = tf.keras.metrics.CategoricalAccuracy()

    for x, y in test_ds:
        test_accuracy.update_state(y, model(x, training=False))

    print("Test set accuracy: {:.3%}".format(test_accuracy.result()))

In [6]:
model = get_model()
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=tf.keras.optimizers.Adagrad())
hjb_optimize(model, train_ds, test_ds, r=100., epochs=epochs)

Epoch 01: Loss: 0.825, Accuracy: 79.543%
Epoch 02: Loss: 0.302, Accuracy: 93.228%
Epoch 03: Loss: 0.258, Accuracy: 94.320%
Epoch 04: Loss: 0.230, Accuracy: 95.005%
Epoch 05: Loss: 0.208, Accuracy: 95.572%
Epoch 06: Loss: 0.190, Accuracy: 95.993%
Test set accuracy: 95.250%


## Standard AdaGrad Optimization

In [7]:
model = get_model()
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
              optimizer=tf.keras.optimizers.Adagrad(),
              metrics=['accuracy'])
model.fit(train_ds, epochs=epochs)

score = model.evaluate(test_ds)
print('Test accuracy:', score[1])

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Test accuracy: 0.9419000148773193
