# Model Cross-validation with Keras
Sean Wade

In [2]:
from __future__ import absolute_import, division, print_function
import numpy as np
import pickle
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import GridSearchCV

Using TensorFlow backend.


In [3]:
# Load the data
(x_train, y_train_num), (x_test, y_test_num) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype('float32')
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1).astype('float32')
x_train /= 255
x_test /= 255
y_train = np_utils.to_categorical(y_train_num, 10)
y_test = np_utils.to_categorical(y_test_num, 10)

## Model Function

To integrate with sklearn we must make a function that creates the model.  The function should take the parameters we want to test for cross validation.  This code tests for width, depth and init_mode of a network.

In [4]:
def create_model(optimizer='adam'):
    # Model
    model = Sequential()
    model.add(Convolution2D(32, 3, 3, input_shape=(28, 28, 1)))
    model.add(Activation('relu'))
    model.add(Convolution2D(32, 3, 3))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))

    # Loss and Optimizer
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

## Grid Parameters

Next we create a dictionary of parameters.  Each parameter containing a list we want to test.

In [5]:
optimizer_list = ['adam', 'rmsprop', 'sgd']
param_grid = dict(optimizer=optimizer_list)

## Grid Search

In [6]:
model = KerasClassifier(build_fn=create_model, nb_epoch=1, batch_size=128, verbose=True)
grid = GridSearchCV(estimator=model, param_grid=param_grid, verbose=True, cv=2)
grid_result = grid.fit(x_train, y_train)

Fitting 2 folds for each of 3 candidates, totalling 6 fits
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed: 14.3min finished


In [7]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.965650 using {'optimizer': 'rmsprop'}
0.962850 (0.003517) with: {'optimizer': 'adam'}
0.965650 (0.001150) with: {'optimizer': 'rmsprop'}
0.813983 (0.015483) with: {'optimizer': 'sgd'}


In [10]:
grid_result.cv_results_

{'mean_fit_time': array([  76.45673764,   74.32631612,  101.2042805 ]),
 'mean_score_time': array([ 29.74063933,  28.50668097,  30.26880097]),
 'mean_test_score': array([ 0.96285   ,  0.96565   ,  0.81398333]),
 'mean_train_score': array([ 0.96735   ,  0.96983333,  0.81366667]),
 'param_optimizer': masked_array(data = ['adam' 'rmsprop' 'sgd'],
              mask = [False False False],
        fill_value = ?),
 'params': ({'optimizer': 'adam'},
  {'optimizer': 'rmsprop'},
  {'optimizer': 'sgd'}),
 'rank_test_score': array([2, 1, 3], dtype=int32),
 'split0_test_score': array([ 0.96636667,  0.9645    ,  0.82946667]),
 'split0_train_score': array([ 0.97043333,  0.96813333,  0.83646667]),
 'split1_test_score': array([ 0.95933333,  0.9668    ,  0.7985    ]),
 'split1_train_score': array([ 0.96426667,  0.97153333,  0.79086667]),
 'std_fit_time': array([ 2.40596855,  0.34335208,  6.52176058]),
 'std_score_time': array([ 3.66890848,  2.42151594,  3.217031  ]),
 'std_test_score': array([ 0.00351