# TPE on Keras

### MNIST training on Keras with Tree-structured Parzen Estimator Approach(TPE)
* This notebook runs MNIST training on Keras using TPE to find the best hyper parameters.
* The MNIST model here is just a simple one with one input layer, one hidden layer and one output layer, without convolution.
* Hyperparameters of the model include the followings:
* - output shape of the first layer
* - dropout rate of the first layer
* - output shape of the second layer
* - dropout rate of the second layer
* - use batchnormalization or not
* - batch size
* - number of epochs
* - validation rate
* I used hyperopt to run TPE.

#### Import libraries

In [19]:
from hyperopt import hp, tpe, Trials, fmin
import numpy as np
import pandas as pds
import random
from keras.layers import Activation, Dropout, BatchNormalization, Dense
from keras.models import Sequential
from keras.datasets import mnist
from keras.metrics import categorical_crossentropy
from keras.utils import np_utils
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

#### Define MNIST model
* includes data loading function, training function, fit function and evaluation function 

In [20]:
# MNIST class
class MNIST():
    def __init__(self,
                 l1_out=512, 
                 l2_out=512, 
                 l1_drop=0.2, 
                 l2_drop=0.2, 
                 bn1=0,
                 bn2=0,
                 batch_size=100, 
                 epochs=10, 
                 validation_split=0.1):
        self.l1_out = l1_out
        self.l2_out = l2_out
        self.l1_drop = l1_drop
        self.l2_drop = l2_drop
        self.bn1 = bn1
        self.bn2 = bn2
        self.batch_size = batch_size
        self.epochs = epochs
        self.validation_split = validation_split
        self.__x_train, self.__x_test, self.__y_train, self.__y_test = self.mnist_data()
        self.__model = self.mnist_model()
        params = """
        validation_split:\t{0}
        l1_drop:\t{1}
        l2_drop:\t{2}
        l1_out:\t{3}
        l2_out:\t{4}
        bn1:\t{5}
        bn2:\t{6}
        batch_size:\t{7}
        epochs:\t{8}
        """.format(self.validation_split,
                   self.l1_drop, self.l2_drop,
                   self.l1_out, self.l2_out,
                   self.bn1, self.bn2,
                   self.batch_size, self.epochs)
        print(params)
        
    # load mnist data from keras dataset
    def mnist_data(self):
        (X_train, y_train), (X_test, y_test) = mnist.load_data()
        X_train = X_train.reshape(60000, 784)
        X_test = X_test.reshape(10000, 784)

        X_train = X_train.astype('float32')
        X_test = X_test.astype('float32')
        X_train /= 255
        X_test /= 255

        Y_train = np_utils.to_categorical(y_train, 10)
        Y_test = np_utils.to_categorical(y_test, 10)
        return X_train, X_test, Y_train, Y_test
    
    # mnist model
    def mnist_model(self):
        model = Sequential()
        model.add(Dense(self.l1_out, input_shape=(784,)))
        if self.bn1 == 0:
            model.add(BatchNormalization())
        model.add(Activation('relu'))
        model.add(Dropout(self.l1_drop))
        model.add(Dense(self.l2_out))
        if self.bn2 == 0:
            model.add(BatchNormalization())
        model.add(Activation('relu'))
        model.add(Dropout(self.l2_drop))
        model.add(Dense(10))
        model.add(Activation('softmax'))
        model.compile(loss='categorical_crossentropy',
                      optimizer=Adam(),
                      metrics=['accuracy'])

        return model
    
    # fit mnist model
    def mnist_fit(self):
        early_stopping = EarlyStopping(patience=0, verbose=1)
        
        self.__model.fit(self.__x_train, self.__y_train,
                       batch_size=self.batch_size,
                       epochs=self.epochs,
                       verbose=0,
                       validation_split=self.validation_split,
                       callbacks=[early_stopping])
    
    # evaluate mnist model
    def mnist_evaluate(self):
        self.mnist_fit()
        
        evaluation = self.__model.evaluate(self.__x_test, self.__y_test, batch_size=self.batch_size, verbose=0)
        return evaluation

#### Runner function for the MNIST model

In [21]:
# function to run mnist class
def run_mnist(args):
    _mnist = MNIST(**args)
    mnist_evaluation = _mnist.mnist_evaluate()
    print("loss:{0} \t\t accuracy:{1}".format(mnist_evaluation[0], mnist_evaluation[1]))
    return mnist_evaluation[0]

## TPE
#### hyper parameters

In [24]:
hyperopt_parameters = {
    'validation_split': hp.uniform('validation_split', 0.0, 0.3),
    'l1_drop': hp.uniform('l1_drop', 0.0, 0.3),
    'l2_drop': hp.uniform('l2_drop', 0.0, 0.3),
    'l1_out': hp.choice('l1_out', [64, 128, 256, 512, 1024]),
    'l2_out': hp.choice('l2_out', [64, 128, 256, 512, 1024]),
    'bn1': hp.choice('bn1', [0, 1]),
    'bn2': hp.choice('bn2', [0, 1]),
    'batch_size': hp.choice('batch_size', [10, 100, 500]),
    'epochs': hp.choice('epochs', [5, 10, 20]),
}

In [25]:
# number of evaluation
max_evals = 20
# trials instance
trials = Trials()

best = fmin(
    # function to minimize
    run_mnist,
    # list of hyperparameters
    hyperopt_parameters,
    # optimization logic
    algo=tpe.suggest,
    max_evals=max_evals,
    trials=trials,
    # output evaluations
    verbose=1
)


        validation_split:	0.16570745280773105
        l1_drop:	0.10096781255984878
        l2_drop:	0.15115381196276345
        l1_out:	128
        l2_out:	64
        bn1:	1
        bn2:	1
        batch_size:	500
        epochs:	20
        
Epoch 00014: early stopping
loss:0.07864332050085068 		 accuracy:0.975600004196167

        validation_split:	0.24972794128682058
        l1_drop:	0.27528537158976835
        l2_drop:	0.11792311540238463
        l1_out:	64
        l2_out:	1024
        bn1:	0
        bn2:	0
        batch_size:	500
        epochs:	20
        
Epoch 00006: early stopping
loss:0.10114168524742126 		 accuracy:0.9692999929189682

        validation_split:	0.19141501122352375
        l1_drop:	0.10894339915491573
        l2_drop:	0.21922173273437492
        l1_out:	128
        l2_out:	512
        bn1:	0
        bn2:	1
        batch_size:	100
        epochs:	10
        
Epoch 00004: early stopping
loss:0.07896663922845619 		 accuracy:0.9762000072002411

        validation_s

In [26]:
print(best)

{'bn1': 0, 'batch_size': 2, 'l1_drop': 0.05952755060396381, 'epochs': 2, 'l2_out': 3, 'l1_out': 4, 'l2_drop': 0.16720803638229204, 'bn2': 1, 'validation_split': 0.02744485567313418}
