# Bayesian Optimization on Keras

### MNIST training on Keras with Bayesian optimization
* This notebook runs MNIST training on Keras using Bayesian optimization to find the best hyper parameters.
* The MNIST model here is just a simple one with one input layer, one hidden layer and one output layer, without convolution.
* Hyperparameters of the model include the followings:
* - output shape of the first layer
* - dropout rate of the first layer
* - output shape of the second layer
* - dropout rate of the second layer
* - batch size
* - number of epochs
* - validation rate
* I used GPy and GPyOpt to run Bayesian optimization.

#### Import libraries

In [1]:
import GPy, GPyOpt
import numpy as np
import pandas as pds
import random
from keras.layers import Activation, Dropout, BatchNormalization, Dense
from keras.models import Sequential
from keras.datasets import mnist
from keras.metrics import categorical_crossentropy
from keras.utils import np_utils
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


#### Define MNIST model
* includes data loading function, training function, fit function and evaluation function 

In [2]:
# MNIST class
class MNIST():
    def __init__(self, first_input=784, last_output=10,
                 l1_out=512, 
                 l2_out=512, 
                 l1_drop=0.2, 
                 l2_drop=0.2, 
                 batch_size=100, 
                 epochs=10, 
                 validation_split=0.1):
        self.__first_input = first_input
        self.__last_output = last_output
        self.l1_out = l1_out
        self.l2_out = l2_out
        self.l1_drop = l1_drop
        self.l2_drop = l2_drop
        self.batch_size = batch_size
        self.epochs = epochs
        self.validation_split = validation_split
        self.__x_train, self.__x_test, self.__y_train, self.__y_test = self.mnist_data()
        self.__model = self.mnist_model()
        
    # load mnist data from keras dataset
    def mnist_data(self):
        (X_train, y_train), (X_test, y_test) = mnist.load_data()
        X_train = X_train.reshape(60000, 784)
        X_test = X_test.reshape(10000, 784)

        X_train = X_train.astype('float32')
        X_test = X_test.astype('float32')
        X_train /= 255
        X_test /= 255

        Y_train = np_utils.to_categorical(y_train, 10)
        Y_test = np_utils.to_categorical(y_test, 10)
        return X_train, X_test, Y_train, Y_test
    
    # mnist model
    def mnist_model(self):
        model = Sequential()
        model.add(Dense(self.l1_out, input_shape=(self.__first_input,)))
        model.add(Activation('relu'))
        model.add(Dropout(self.l1_drop))
        model.add(Dense(self.l2_out))
        model.add(Activation('relu'))
        model.add(Dropout(self.l2_drop))
        model.add(Dense(self.__last_output))
        model.add(Activation('softmax'))
        model.compile(loss='categorical_crossentropy',
                      optimizer=Adam(),
                      metrics=['accuracy'])

        return model
    
    # fit mnist model
    def mnist_fit(self):
        early_stopping = EarlyStopping(patience=0, verbose=1)
        
        self.__model.fit(self.__x_train, self.__y_train,
                       batch_size=self.batch_size,
                       epochs=self.epochs,
                       verbose=0,
                       validation_split=self.validation_split,
                       callbacks=[early_stopping])
    
    # evaluate mnist model
    def mnist_evaluate(self):
        self.mnist_fit()
        
        evaluation = self.__model.evaluate(self.__x_test, self.__y_test, batch_size=self.batch_size, verbose=0)
        return evaluation

#### Runner function for the MNIST model

In [3]:
# function to run mnist class
def run_mnist(first_input=784, last_output=10,
              l1_out=512, l2_out=512, 
              l1_drop=0.2, l2_drop=0.2, 
              batch_size=100, epochs=10, validation_split=0.1):
    
    _mnist = MNIST(first_input=first_input, last_output=last_output,
                   l1_out=l1_out, l2_out=l2_out, 
                   l1_drop=l1_drop, l2_drop=l2_drop, 
                   batch_size=batch_size, epochs=epochs, 
                   validation_split=validation_split)
    mnist_evaluation = _mnist.mnist_evaluate()
    return mnist_evaluation

## Bayesian Optimization
#### bounds for hyper parameters

In [4]:
# bounds for hyper-parameters in mnist model
# the bounds dict should be in order of continuous type and then discrete type
bounds = [{'name': 'validation_split', 'type': 'continuous',  'domain': (0.0, 0.3)},
          {'name': 'l1_drop',          'type': 'continuous',  'domain': (0.0, 0.3)},
          {'name': 'l2_drop',          'type': 'continuous',  'domain': (0.0, 0.3)},
          {'name': 'l1_out',           'type': 'discrete',    'domain': (64, 128, 256, 512, 1024)},
          {'name': 'l2_out',           'type': 'discrete',    'domain': (64, 128, 256, 512, 1024)},
          {'name': 'batch_size',       'type': 'discrete',    'domain': (10, 100, 500)},
          {'name': 'epochs',           'type': 'discrete',    'domain': (5, 10, 20)}]

#### Bayesian Optimization

In [5]:
# function to optimize mnist model
def f(x):
    print(x)
    evaluation = run_mnist(
        l1_drop = float(x[:,1]), 
        l2_drop = float(x[:,2]), 
        l1_out = int(x[:,3]),
        l2_out = int(x[:,4]), 
        batch_size = int(x[:,5]), 
        epochs = int(x[:,6]), 
        validation_split = float(x[:,0]))
    print("LOSS:\t{0} \t ACCURACY:\t{1}".format(evaluation[0], evaluation[1]))
    print(evaluation)
    return evaluation[0]

#### Optimizer instance

In [6]:
# optimizer
opt_mnist = GPyOpt.methods.BayesianOptimization(f=f, domain=bounds)

[[  8.28040134e-02   2.94981174e-01   7.51470156e-02   2.56000000e+02
    5.12000000e+02   5.00000000e+02   1.00000000e+01]]
Epoch 00009: early stopping
LOSS:	0.0633744144346565 	 ACCURACY:	0.9799000054597855
[0.063374414434656495, 0.97990000545978551]
[[  1.57578393e-02   2.54071426e-01   2.02050321e-02   6.40000000e+01
    5.12000000e+02   1.00000000e+02   1.00000000e+01]]
Epoch 00006: early stopping
LOSS:	0.08654434870812111 	 ACCURACY:	0.972300005555153
[0.08654434870812111, 0.97230000555515295]
[[  1.28377149e-01   1.96911207e-03   2.11710770e-01   1.02400000e+03
    5.12000000e+02   5.00000000e+02   5.00000000e+00]]
Epoch 00005: early stopping
LOSS:	0.057901502959430216 	 ACCURACY:	0.9818000048398972
[0.057901502959430216, 0.98180000483989716]
[[  1.05757625e-01   3.31146774e-02   2.07325052e-01   5.12000000e+02
    6.40000000e+01   5.00000000e+02   5.00000000e+00]]
LOSS:	0.06972178800497204 	 ACCURACY:	0.9781000047922135
[0.069721788004972043, 0.97810000479221348]
[[  1.71757378

#### Running optimization

In [7]:
# optimize mnist model
opt_mnist.run_optimization(max_iter=10)

[[  0.00000000e+00   3.00000000e-01   0.00000000e+00   1.02400000e+03
    5.12000000e+02   5.00000000e+02   5.00000000e+00]]
LOSS:	0.06117972570937127 	 ACCURACY:	0.9800999999046326
[0.061179725709371267, 0.98009999990463259]
[[  3.00000000e-01   0.00000000e+00   3.00000000e-01   1.02400000e+03
    5.12000000e+02   5.00000000e+02   1.00000000e+01]]
Epoch 00007: early stopping
LOSS:	0.06730220378376543 	 ACCURACY:	0.9805999994277954
[0.067302203783765432, 0.98059999942779541]
[[  1.29882915e-01   2.45356433e-02   2.51330623e-01   5.12000000e+02
    1.28000000e+02   5.00000000e+02   5.00000000e+00]]
LOSS:	0.07530965844634921 	 ACCURACY:	0.9765999972820282
[0.075309658446349209, 0.97659999728202818]
[[  3.00000000e-01  -3.46944695e-18   3.00000000e-01   2.56000000e+02
    5.12000000e+02   5.00000000e+02   2.00000000e+01]]
Epoch 00007: early stopping
LOSS:	0.08680501421913504 	 ACCURACY:	0.9733999997377396
[0.086805014219135043, 0.97339999973773961]
[[   0.    0.    0.  256.  512.  500.   

#### The output

In [20]:
# print optimized mnist model
print("""
Optimized Parameters:
\t{0}:\t{1}
\t{2}:\t{3}
\t{4}:\t{5}
\t{6}:\t{7}
\t{8}:\t{9}
\t{10}:\t{11}
\t{12}:\t{13}
""".format(bounds[0]["name"],opt_mnist.x_opt[0],
           bounds[1]["name"],opt_mnist.x_opt[1],
           bounds[2]["name"],opt_mnist.x_opt[2],
           bounds[3]["name"],opt_mnist.x_opt[3],
           bounds[4]["name"],opt_mnist.x_opt[4],
           bounds[5]["name"],opt_mnist.x_opt[5],
           bounds[6]["name"],opt_mnist.x_opt[6]))
print("optimized loss: {0}".format(opt_mnist.fx_opt))


Optimized Parameters:
	validation_split:	0.1283771492891259
	l1_drop:	0.0019691120669648177
	l2_drop:	0.21171077023226453
	l1_out:	1024.0
	l2_out:	512.0
	batch_size:	500.0
	epochs:	5.0

optimized loss: [ 0.0579015]


In [21]:
opt_mnist.x_opt

array([  1.28377149e-01,   1.96911207e-03,   2.11710770e-01,
         1.02400000e+03,   5.12000000e+02,   5.00000000e+02,
         5.00000000e+00])