## Kaggel: Digit Recognizer(MNIST)  by Hyperopt  
Kaggle Digit recognizer: https://www.kaggle.com/c/digit-recognizer  
GPyOpt: https://github.com/SheffieldML/GPyOpt  

### Score:
* max_evals= 10, score: -- ( -- mins: NVIDIA GTX1060)
* max_evals= 20, score: -- ( -- mins: NVIDIA GTX1060)
* max_evals=100, score: -- ( -- mins: NVIDIA GTX1060)

In [76]:
import warnings
warnings.filterwarnings('ignore')

import GPy
import GPyOpt

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Reshape, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.utils import np_utils
import keras

from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

# fix random seed
import tensorflow as tf
import random as rn
import os
os.environ['PYTHONHASHSEED'] = '0'
seed = 123
rn.seed(seed)
np.random.seed(seed)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
from keras import backend as K
tf.set_random_seed(seed)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

## Data preparation: MNIST from Kaggle

In [77]:
train = pd.read_csv('../train.csv')
label = train.label
train = train.drop(['label'], axis=1)

X_train, X_test, Y_train, Y_test = train_test_split(train, label, test_size=0.2, shuffle=True, random_state=seed)
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255
Y_train = np_utils.to_categorical(Y_train, 10)
Y_test = np_utils.to_categorical(Y_test, 10)

## Hyperparameters:

In [78]:
params = [
    {'name': 'Dropout_0',        'type': 'continuous',  'domain': (0.0, 1.0)},
    {'name': 'Dropout_1',        'type': 'continuous',  'domain': (0.0, 1.0)},
    {'name': 'Dropout_2',        'type': 'continuous',  'domain': (0.0, 1.0)},
    {'name': 'Dropout_3',        'type': 'continuous',  'domain': (0.0, 1.0)},
    #{'name': 'BatchNorm_0',      'type': 'discrete',    'domain': (0, 1)},
    #{'name': 'BatchNorm_1',      'type': 'discrete',    'domain': (0, 1)},
    #{'name': 'BatchNorm_2',      'type': 'discrete',    'domain': (0, 1)},
    #{'name': 'BatchNorm_3',      'type': 'discrete',    'domain': (0, 1)},
    {'name': 'Dense_0',          'type': 'discrete',    'domain': (256, 512)},
    {'name': 'Dense_1',          'type': 'discrete',    'domain': (128, 256)},
    {'name': 'validation_split', 'type': 'continuous',  'domain': (0.1, 0.3)},
    {'name': 'batch_size',       'type': 'discrete',    'domain': (8, 16, 32)},
]

In [79]:
# def param(p_name):
#     index = [p['name'] for p in params].index(p_name)
#     return params[index]

# param('Dropout_0')

## CNN Model:

In [80]:
cnt = 0
def cnn_model(x):
    
    def Param(p_name):
        p_index = [p['name'] for p in params].index(p_name)
        p_type = params[p_index]['type']
        if p_type == 'continuous':
            return float(x[:, p_index])
        else:
            return int(x[:, p_index])
    
    initializer = keras.initializers.glorot_uniform(seed=seed)
    
    model = Sequential() 
    model.add(Reshape((28,28,1), input_shape=(784,)))
        
    model.add(Conv2D(32, (5,5), padding='same', activation='relu', kernel_initializer=initializer))
    model.add(Conv2D(32, (5,5), padding='same', activation='relu', kernel_initializer=initializer))
    model.add(MaxPool2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    model.add(Dropout(Param('Dropout_0'), seed=seed))
    
    model.add(Conv2D(64, (3,3), padding='same', activation='relu', kernel_initializer=initializer))
    model.add(Conv2D(64, (3,3), padding='same', activation='relu', kernel_initializer=initializer))
    model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    model.add(BatchNormalization())
    model.add(Dropout(Param('Dropout_1'), seed=seed))

    model.add(Flatten())
    model.add(Dense(Param('Dense_0'), activation="relu", kernel_initializer=initializer))
    model.add(BatchNormalization())
    model.add(Dropout(Param('Dropout_2'), seed=seed))
    model.add(Dense(Param('Dense_1'), activation = "relu", kernel_initializer=initializer))
    model.add(BatchNormalization())
    model.add(Dropout(Param('Dropout_3'), seed=seed))
    
    model.add(Dense(10, activation = "softmax", kernel_initializer=initializer))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=1, min_lr=1e-5,verbose=1, cooldown=1)
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='auto')
    #checkpointer = ModelCheckpoint(filepath='checkpoint'+str(cnt)+'.hdf5', monitor='val_loss', verbose=1, save_best_only=True)

    hist = model.fit(X_train, Y_train,
                     batch_size=Param('batch_size'),
                     epochs=50,
                     verbose=1,
                     shuffle=True,
                     validation_split=Param('validation_split'),
                     callbacks=[reduce_lr, early_stopping])
    
    #loss, acc = model.evaluate(X_test, Y_test, batch_size=32 , verbose=0)
    val_loss = hist.history['val_loss'][-1]
    val_acc = hist.history['val_acc'][-1]
    
    global cnt
    print(cnt, ': Val_loss:',val_loss, ', Val_acc:', val_acc)
    cnt += 1
    #return {'loss': loss, 'acc': acc,  'model': model, 'hist': hist}
    #return val_loss, val_acc, model, hist
    return val_loss

## Bayesian Optimization:

In [81]:
opt = GPyOpt.methods.BayesianOptimization(f=cnn_model, 
                                          domain=params,
                                          acquisition_type='EI')

Train on 28266 samples, validate on 5334 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50

Epoch 00009: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50

Epoch 00013: ReduceLROnPlateau reducing learning rate to 1e-05.
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 00020: early stopping
0 : Val_loss: 0.026838116568562016 , Val_acc: 0.9936257967754031
Train on 25999 samples, validate on 7601 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 7/50
Epoch 8/50
Epoch 9/50

Epoch 00009: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.
Epoch 10/50
Epoch 11/50
Epoch 12/50

Epoch 00012: ReduceLROnPlateau reducing learning 

Epoch 11/50
Epoch 12/50

Epoch 00012: ReduceLROnPlateau reducing learning rate to 1e-05.
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 00029: early stopping
2 : Val_loss: 0.028198241371597994 , Val_acc: 0.9894445840663483
Train on 27325 samples, validate on 6275 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50

Epoch 00003: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50

Epoch 00009: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.
Epoch 10/50
Epoch 11/50
Epoch 12/50

Epoch 00012: ReduceLROnPlateau reducing learning rate to 1e-05.
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 00016: early stopping
3 : Val_loss: 0.040059519495315984 , Val_acc: 0.9877290836653386
Train on 27344 samples, validate on 6256 samples
Epoch 1/50


In [82]:
opt.run_optimization(max_iter=10)

Train on 23520 samples, validate on 10080 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50

Epoch 00012: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50

Epoch 00016: ReduceLROnPlateau reducing learning rate to 1e-05.
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 00020: early stopping
5 : Val_loss: 0.028242620949712877 , Val_acc: 0.9919642857142857
Train on 30240 samples, validate on 3360 samples
Epoch 1/50
Epoch 2/50

Epoch 00002: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 3/50
Epoch 4/50

Epoch 00004: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.
Epoch 5/50
Epoch 6/50
Epoch 7/50

Epoch 00007: ReduceLROnPlateau reducing learning rate to 1e-05.
Epoch 8/50
Epoch 9/50
Epoch 00009: early s

In [103]:
x_best = opt.x_opt
print([i for i in x_best])

y_best = opt.fx_opt
print(y_best)

[1.0, 0.0, 0.0, 0.0, 256.0, 128.0, 0.1, 16.0]
0.022346920996573143


In [122]:
for i in range(len(x_best)):
    params[i]['domain'] = x_best[i]
params

[{'domain': 1.0, 'name': 'Dropout_0', 'type': 'continuous'},
 {'domain': 0.0, 'name': 'Dropout_1', 'type': 'continuous'},
 {'domain': 0.0, 'name': 'Dropout_2', 'type': 'continuous'},
 {'domain': 0.0, 'name': 'Dropout_3', 'type': 'continuous'},
 {'domain': 256.0, 'name': 'Dense_0', 'type': 'discrete'},
 {'domain': 128.0, 'name': 'Dense_1', 'type': 'discrete'},
 {'domain': 0.1, 'name': 'validation_split', 'type': 'continuous'},
 {'domain': 16.0, 'name': 'batch_size', 'type': 'discrete'}]

In [121]:
x_best[0]

1.0

In [123]:
opt.plot_acquisition()