In [58]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Conv2D, Dropout, BatchNormalization, MaxPooling2D, Flatten, Activation
from keras.datasets import mnist
from keras.utils import np_utils
import tensorflow as tf
from keras import backend as K

In [59]:
NUM_CLASSES = 10

def get_input_datasets(use_bfloat16=False):
    """Downloads the MNIST dataset and creates train and eval dataset objects.

    Args:
      use_bfloat16: Boolean to determine if input should be cast to bfloat16

    Returns:
      Train dataset, eval dataset and input shape.

    """
    # input image dimensions
    img_rows, img_cols = 28, 28
    cast_dtype = tf.bfloat16 if use_bfloat16 else tf.float32

    # the data, split between train and test sets
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

    if tf.keras.backend.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
        input_shape = (1, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
        input_shape = (img_rows, img_cols, 1)

    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255

    # convert class vectors to binary class matrices
    y_train = tf.keras.utils.to_categorical(y_train, NUM_CLASSES)
    y_test = tf.keras.utils.to_categorical(y_test, NUM_CLASSES)

    # train dataset
    train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_ds = train_ds.repeat()
    train_ds = train_ds.map(lambda x, y: (tf.cast(x, cast_dtype), y))
    train_ds = train_ds.batch(64, drop_remainder=True)

    # eval dataset
    eval_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    eval_ds = eval_ds.repeat()
    eval_ds = eval_ds.map(lambda x, y: (tf.cast(x, cast_dtype), y))
    eval_ds = eval_ds.batch(64, drop_remainder=True)

    return train_ds, eval_ds, input_shape


In [60]:
def get_model(input_shape, dropout2_rate=0.5):
   
    # input image dimensions
    img_rows, img_cols = 28, 28

    
    # Define a CNN model to recognize MNIST.
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape, name="conv2d_1"))
    model.add(Conv2D(64, (3, 3), activation='relu', name="conv2d_2"))
    model.add(MaxPooling2D(pool_size=(2, 2), name="maxpool2d_1"))
    model.add(Dropout(0.25, name="dropout_1"))
    model.add(Flatten(name="flatten"))
    model.add(Dense(128, activation='relu', name="dense_1"))
    model.add(Dropout(dropout2_rate, name="dropout_2"))
    model.add(Dense(NUM_CLASSES, activation='softmax', name="dense_2"))
    
    return model

In [61]:
train_ds, eval_ds, input_shape = get_input_datasets()

In [66]:
def fit_with(input_shape, verbose, dropout2_rate, lr):

    # Create the model using a specified hyperparameters.
    model = get_model(input_shape, dropout2_rate)

    # Train the model for a specified number of epochs.
    optimizer= Adam(lr = lr)
    
    model.compile(optimizer=optimizer, 
                  loss    = 'mse', 
                  metrics = ['accuracy'])

    # Train the model with the train dataset.
    model.fit(train_ds,
              validation_data  = eval_ds,
              epochs           = 5,
              validation_steps = 60000 // 32,  
              steps_per_epoch  = 60000 // 32, 
              verbose          = verbose)

    # Evaluate the model with the eval dataset.
    score = model.evaluate(eval_ds, steps = 10, verbose=1)
    
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    print('\n')

    # Return the accuracy.

    return score[1]

In [67]:
from functools import partial

input_shape = input_shape
verbose     = 1

fit_with_partial = partial(fit_with, input_shape, verbose)

The BayesianOptimization object will work out of the box without much tuning needed. The main method you should be aware of is maximize, which does exactly what you think it does.

There are many parameters you can pass to maximize, nonetheless, the most important ones are:

n_iter: How many steps of bayesian optimization you want to perform. The more steps the more likely to find a good maximum you are.
init_points: How many steps of random exploration you want to perform. Random exploration can help by diversifying the exploration space.

In [68]:
# Bounded region of parameter space

bounds = {'lr'           :(1e-4, 1e-2),
          'dropout2_rate':(0.05, 0.5),
          'batch_size'   :(1, 4.001),
          'num_filters'  :(1, 4.001),
          'kernel_size'  :(2, 4.001)}
          

bounds_2 = {'dropout2_rate': (0.1, 0.5), 
            'lr'           : (1e-4, 1e-2)}

In [69]:
from bayes_opt import BayesianOptimization


optimizer = BayesianOptimization(
    f            = fit_with_partial,
    pbounds      = bounds_2,
    verbose      = 1,  # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
    random_state = 1
)

optimizer.maximize(init_points = 10, n_iter = 2,)

for i, res in enumerate(optimizer.res):
    print("Iteration {}: \n\t{}".format(i, res))

print(optimizer.max)

|   iter    |  target   | dropou... |    lr     |
-------------------------------------------------
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss: 0.037187501043081286
Test accuracy: 0.8140625


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss: 0.007987069408409297
Test accuracy: 0.959375


| [95m 2       [0m | [95m 0.9594  [0m | [95m 0.1     [0m | [95m 0.003093[0m |
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss: 0.0018680369035791954
Test accuracy: 0.9890625


| [95m 3       [0m | [95m 0.9891  [0m | [95m 0.1587  [0m | [95m 0.001014[0m |
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss: 0.0074801745126023885
Test accuracy: 0.9625


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss: 0.012812500307336449
Test accuracy: 0.9359375


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss: 0.0628124199807644
Test accuracy: 0.6859375


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss: 0.037187463045120

In [70]:
print(optimizer.max)

{'target': 0.9937499761581421, 'params': {'dropout2_rate': 0.3883228578249672, 'lr': 0.00020765186492235896}}


# References

- https://github.com/fmfn/BayesianOptimization
- https://stackoverflow.com/questions/55586472/mnist-data-set-up-batch
- https://keras.io/examples/mnist_cnn/
- https://www.youtube.com/watch?v=sXdxyUCCm8s
- https://machinelearningapplied.com/hyperparameter-search-with-bayesian-optimization-for-keras-cnn-classification-and-ensembling/
- https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam
- https://stackoverflow.com/questions/55586472/mnist-data-set-up-batch