## Kaggel: Digit Recognizer(MNIST)  by GPyOpt  
Kaggle Digit recognizer: https://www.kaggle.com/c/digit-recognizer  
GPyOpt: https://github.com/SheffieldML/GPyOpt  

### Score:
* initial_design_numdata=20, max_iter=50  score: 99.457 (515 mins: NVIDIA GTX1060)

In [1]:
import warnings
warnings.filterwarnings('ignore')

import GPy
import GPyOpt

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Reshape, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.utils import np_utils
import keras

from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

# fix random seed
import tensorflow as tf
import random as rn
import os
os.environ['PYTHONHASHSEED'] = '0'
seed = 123
rn.seed(seed)
np.random.seed(seed)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
from keras import backend as K
tf.set_random_seed(seed)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

Using TensorFlow backend.


## Data preparation: MNIST from Kaggle

In [2]:
train = pd.read_csv('../train.csv')
label = train.label
train = train.drop(['label'], axis=1)

X_train, X_test, Y_train, Y_test = train_test_split(train, label, test_size=0.2, shuffle=True, random_state=seed)
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255
Y_train = np_utils.to_categorical(Y_train, 10)
Y_test = np_utils.to_categorical(Y_test, 10)

## Hyperparameters:

In [3]:
params = [
    {'name': 'Dropout_0',        'type': 'continuous',  'domain': (0.0, 0.5)},
    {'name': 'Dropout_1',        'type': 'continuous',  'domain': (0.0, 0.5)},
    {'name': 'Dropout_2',        'type': 'continuous',  'domain': (0.0, 0.5)},
    {'name': 'Dropout_3',        'type': 'continuous',  'domain': (0.0, 0.5)},
    #{'name': 'BatchNorm_0',      'type': 'discrete',    'domain': (0, 1)},
    #{'name': 'BatchNorm_1',      'type': 'discrete',    'domain': (0, 1)},
    #{'name': 'BatchNorm_2',      'type': 'discrete',    'domain': (0, 1)},
    #{'name': 'BatchNorm_3',      'type': 'discrete',    'domain': (0, 1)},
    {'name': 'Dense_0',          'type': 'discrete',    'domain': (128,256, 512)},
    {'name': 'Dense_1',          'type': 'discrete',    'domain': (64,128, 256)},
    {'name': 'validation_split', 'type': 'continuous',  'domain': (0.1, 0.3)}
    #{'name': 'batch_size',       'type': 'discrete',    'domain': (32, 64)}
]

## CNN Model:

In [4]:
cnt = 0
loss_list = [99]
def cnn_model(x):
    
    def Param(p_name):
        p_index = [p['name'] for p in params].index(p_name)
        p_type = params[p_index]['type']
        
        if type(x) is np.ndarray:
            if p_type == 'continuous':
                return float(x[:, p_index])
            else:
                return int(x[:, p_index])
        else: # list
            if p_type == 'continuous':
                return float(params[p_index]['domain'])
            else:
                return int(params[p_index]['domain'])
    
    initializer = keras.initializers.glorot_uniform(seed=seed)
    
    model = Sequential() 
    model.add(Reshape((28,28,1), input_shape=(784,)))
        
    model.add(Conv2D(32, (5,5), padding='same', activation='relu', kernel_initializer=initializer))
    model.add(Conv2D(32, (5,5), padding='same', activation='relu', kernel_initializer=initializer))
    model.add(MaxPool2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    model.add(Dropout(Param('Dropout_0'), seed=seed))
    
    model.add(Conv2D(64, (3,3), padding='same', activation='relu', kernel_initializer=initializer))
    model.add(Conv2D(64, (3,3), padding='same', activation='relu', kernel_initializer=initializer))
    model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    model.add(BatchNormalization())
    model.add(Dropout(Param('Dropout_1'), seed=seed))

    model.add(Flatten())
    model.add(Dense(Param('Dense_0'), activation="relu", kernel_initializer=initializer))
    model.add(BatchNormalization())
    model.add(Dropout(Param('Dropout_2'), seed=seed))
    model.add(Dense(Param('Dense_1'), activation = "relu", kernel_initializer=initializer))
    model.add(BatchNormalization())
    model.add(Dropout(Param('Dropout_3'), seed=seed))
    
    model.add(Dense(10, activation = "softmax", kernel_initializer=initializer))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
    
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=0, min_lr=1e-5,verbose=1, cooldown=1)
    early_stopping = EarlyStopping(monitor='val_loss', patience=1, verbose=1, mode='auto')
    #checkpointer = ModelCheckpoint(filepath='checkpoint'+str(cnt)+'.hdf5', monitor='val_loss', verbose=1, save_best_only=True)

    hist = model.fit(X_train, Y_train,
                     batch_size=32,
                     epochs=50,
                     verbose=1,
                     shuffle=True,
                     validation_split=Param('validation_split'),
                     callbacks=[reduce_lr, early_stopping])
    
    #loss, acc = model.evaluate(X_test, Y_test, batch_size=Param('batch_size') , verbose=0)
    loss = hist.history['val_loss'][-1]
    acc = hist.history['val_acc'][-1]
    
    global cnt
    print(cnt, ': Test_loss:', loss, ', Test_acc:', acc)
    print('Model: ', model, '\n\n')
    cnt += 1
    
    return loss, acc, model, hist

## Model fitting function:

In [5]:
def f(x):
    x = np.atleast_2d(x)
    fs = np.zeros((x.shape[0],1))
    for i in range(x.shape[0]):
        loss, acc, model, hist = cnn_model(x)
        fs[i] += np.log(acc)*(-1)
    return fs

## Bayesian Optimization:

In [6]:
opt = GPyOpt.methods.BayesianOptimization(f=f, 
                                          domain=params,
                                          initial_design_numdata=3,
                                          model_type='GP_MCMC',
                                          acquisition_type='EI_MCMC',
                                          exact_feval=True)

Train on 25280 samples, validate on 8320 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 00006: early stopping
0 : Test_loss: 0.05201436398112072 , Test_acc: 0.9846153846153847
Model:  <keras.engine.sequential.Sequential object at 0x7f8c23ce4898> 


Train on 29013 samples, validate on 4587 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 00004: early stopping
1 : Test_loss: 0.05294726197210881 , Test_acc: 0.9827774144320907
Model:  <keras.engine.sequential.Sequential object at 0x7f8bfc183710> 


Train on 29060 samples, validate on 4540 samples
Epoch 1/50
Epoch 2/50

Epoch 00002: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 00002: early stopping
2 : Test_loss: 0.1081506973468916 , Test_acc: 0.964977973568282
Model:  <keras.engine.sequential.Sequential object a

In [7]:
cnt = 0
opt.run_optimization(max_iter=5)

reconstraining parameters GP_regression.rbf
reconstraining parameters GP_regression.Gaussian_noise.variance


Train on 23520 samples, validate on 10080 samples
Epoch 1/50
Epoch 2/50

Epoch 00002: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 00002: early stopping
0 : Test_loss: 0.0912989710604528 , Test_acc: 0.9737103174603174
Model:  <keras.engine.sequential.Sequential object at 0x7f8a67257f98> 


Train on 23520 samples, validate on 10080 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 00004: early stopping
1 : Test_loss: 0.06432711085515275 , Test_acc: 0.9808531746031746
Model:  <keras.engine.sequential.Sequential object at 0x7f8a481109b0> 


Train on 25649 samples, validate on 7951 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 00004: early stopping
2 : Test_loss: 0.06295149293208689 , Test_acc: 0.9811344484970443
Model:  <keras.engine.sequential.Sequential object at 0x7f8a4147dfd0> 


## Results:

In [8]:
x_best = opt.x_opt
print([i for i in x_best])

y_best = opt.fx_opt
print(y_best)

[0.24926097013931897, 0.4878302952129035, 0.17440831641662402, 0.18874847856231725, 512.0, 256.0, 0.242406918078481]
0.013223946026999119


## Apply the best hyperparameters for the model:

In [None]:
best_params = params
p_type = [p['type'] for p in best_params]

for i in range(len(x_best)):
    best_params[i]['domain'] = x_best[i]
    if p_type[i] == 'discrete':
        best_params[i]['domain'] = int(best_params[i]['domain'])
        
best_params

## Train with the best hyperparameters:

In [None]:
val_loss, val_acc, model, hist = cnn_model(best_params)

## Prediction for the submission:

In [None]:
test = pd.read_csv('../test.csv')
test_index = test.index
test = test.values.astype('float32') / 255.0

pred = model.predict(test, verbose=1)
result = pred.argmax(axis=1)

## Submission file output:

In [None]:
submission = pd.DataFrame({'ImageId': test_index+1, 'Label': result})
submission.to_csv('gpyopt_submission.csv', index=False)

### approximate accuracy:

In [None]:
# comparison with the best score
prev_cnn = pd.read_csv('../cnn_submission.csv', index_col=0)
res = pd.read_csv('gpyopt_submission.csv', index_col=0)
diff_num = np.sum(prev_cnn.Label.values != res.Label.values)
acc = (len(res) - diff_num) / len(res) #* 0.998276
print('Approx. accuracy: {0:.5f}'.format(acc))

In [None]:
plt.plot(hist.history['val_acc'], label='val_loss')
plt.plot(hist.history['acc'], label='loss')
plt.legend()