# Keras with scikit-learn grid search hyperparameter optimization example

In [1]:
# System imports
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
import os

# General data analytics imports
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV

# Deep learning imports
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.losses import categorical_crossentropy
from keras.wrappers.scikit_learn import KerasClassifier
from keras import backend as K
import tensorflow as tf

Using TensorFlow backend.


In [2]:
# Data config
n_classes = 10
img_rows, img_cols = 28, 28
keras.backend.set_image_data_format('channels_last')

## Load the data

We're going to use MNIST for demonstration purposes.

In [3]:
# Read the data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Reformat and scale the data
input_shape = (img_rows, img_cols, 1)
x_train = x_train.reshape(x_train.shape[0], *input_shape).astype(np.float32) / 255
x_test = x_test.reshape(x_test.shape[0], *input_shape).astype(np.float32) / 255

# Convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, n_classes)
y_test = keras.utils.to_categorical(y_test, n_classes)

print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

x_train shape: (60000, 28, 28, 1)
x_test shape: (10000, 28, 28, 1)
y_train shape: (60000, 10)
y_test shape: (10000, 10)


## Define the model

Wrap the model definition in a function to pass to the sklearn API.

In [4]:
def build_model(h1=4, h2=8, h3=32, dropout=0.5):
    """Construct our Keras model"""
    model = Sequential()
    model.add(Conv2D(h1, (3, 3), activation='relu', input_shape=input_shape))
    model.add(Conv2D(h2, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout))
    model.add(Flatten())
    model.add(Dense(h3, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(n_classes, activation='softmax'))
    opt = keras.optimizers.Adadelta()
    model.compile(optimizer=opt, loss=categorical_crossentropy, metrics=['accuracy'])
    return model

## Traditional model training, without hyper-parameter optimization

In [5]:
# Concurrency configuration
n_threads = 3
config = tf.ConfigProto(intra_op_parallelism_threads=n_threads,
                        inter_op_parallelism_threads=n_threads)
session = tf.Session(config=config)
K.set_session(session)

In [6]:
# Training config
batch_size = 128
n_epochs = 16

In [7]:
# Build the model
model = build_model()
model.summary()
# Train the model
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=n_epochs)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 4)         40        
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 8)         296       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 8)         0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 12, 12, 8)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1152)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                36896     
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
__________

In [8]:
# Compute accuracy on the test set
o_test = model.predict_classes(x_test, verbose=0)
accuracy = (o_test == y_test.argmax(axis=1)).mean()
print('Test set accuracy:', accuracy)

Test set accuracy: 0.9721


## Train the model with grid search

In [None]:
# Wrap model with sklearn wrapper
sk_model = KerasClassifier(build_fn=build_model,
                           batch_size=batch_size,
                           epochs=n_epochs, verbose=0)
# Define the k-fold grid-search wrapper
param_grid = dict(
    h1=[8, 16, 32],
    h2=[16, 32],
    h3=[16, 32],
    dropout=[0., 0.25, 0.5])
grid_model = GridSearchCV(sk_model, param_grid, verbose=2)

In [None]:
# Run the k-fold grid-search training
grid_model.fit(x_train, y_train)

Fitting 3 folds for each of 36 candidates, totalling 108 fits
[CV] dropout=0.0, h1=8, h2=16, h3=16 .................................
[CV] .................. dropout=0.0, h1=8, h2=16, h3=16, total= 3.4min
[CV] dropout=0.0, h1=8, h2=16, h3=16 .................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  3.5min remaining:    0.0s


[CV] .................. dropout=0.0, h1=8, h2=16, h3=16, total= 3.4min
[CV] dropout=0.0, h1=8, h2=16, h3=16 .................................
[CV] .................. dropout=0.0, h1=8, h2=16, h3=16, total= 3.3min
[CV] dropout=0.0, h1=8, h2=16, h3=32 .................................
[CV] .................. dropout=0.0, h1=8, h2=16, h3=32, total= 3.4min
[CV] dropout=0.0, h1=8, h2=16, h3=32 .................................
[CV] .................. dropout=0.0, h1=8, h2=16, h3=32, total= 3.4min
[CV] dropout=0.0, h1=8, h2=16, h3=32 .................................
[CV] .................. dropout=0.0, h1=8, h2=16, h3=32, total= 3.4min
[CV] dropout=0.0, h1=8, h2=32, h3=16 .................................
[CV] .................. dropout=0.0, h1=8, h2=32, h3=16, total= 5.1min
[CV] dropout=0.0, h1=8, h2=32, h3=16 .................................
[CV] .................. dropout=0.0, h1=8, h2=32, h3=16, total= 5.3min
[CV] dropout=0.0, h1=8, h2=32, h3=16 .................................
[CV] .

In [None]:
results = pd.DataFrame(grid_model.cv_results_)

In [None]:
df_keys = ['param_dropout',
           'param_h1', 'param_h2', 'param_h3',
           'mean_test_score', 'mean_train_score'
          ]
results[df_keys]