# Model Cross-validation with Keras
Sean Wade

In [1]:
from __future__ import absolute_import, division, print_function
import numpy as np
import pickle
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten
from keras.layers import Convolution2D, MaxPooling2D, Reshape
from keras.optimizers import SGD
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import GridSearchCV
from sklearn.externals import joblib

Using TensorFlow backend.


In [2]:
# Load the data
(x_train, y_train_num), (x_test, y_test_num) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype('float32')
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1).astype('float32')
x_train /= 255
x_test /= 255
y_train = np_utils.to_categorical(y_train_num, 10)
y_test = np_utils.to_categorical(y_test_num, 10)

## Model Function

To integrate with sklearn we must make a function that creates the model.  The function should take the parameters we want to test for cross validation.  This code tests for width, depth and init_mode of a network.

In [3]:
def create_model(optimizer='adam'):
    model = Sequential()
    model.add(Reshape((784,), input_shape=(28, 28, 1)))
    model.add(Dense(output_dim=128, init='he_normal', bias=True))
    model.add(Activation("relu"))
    model.add(Dense(output_dim=10, init='he_normal', bias=True))
    model.add(Activation("softmax"))
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

## Grid Parameters

Next we create a dictionary of parameters.  Each parameter containing a list we want to test.

In [4]:
optimizer_list = ['adam', 'rmsprop', 'sgd']
batch_size_list = [32, 64, 128]
param_grid = dict(optimizer=optimizer_list, batch_size=batch_size_list)

## Grid Search

In [5]:
import warnings
warnings.filterwarnings('ignore')
model = KerasClassifier(build_fn=create_model, nb_epoch=1, batch_size=batch_size_list, verbose=True)
grid = GridSearchCV(estimator=model, param_grid=param_grid, verbose=True, cv=2)
grid_result = grid.fit(x_train, y_train)

Fitting 2 folds for each of 9 candidates, totalling 18 fits
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:  1.5min finished


In [6]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.940650 using {'optimizer': 'adam', 'batch_size': 32}
0.940650 (0.001517) with: {'optimizer': 'adam', 'batch_size': 32}
0.932817 (0.001550) with: {'optimizer': 'rmsprop', 'batch_size': 32}
0.872433 (0.001267) with: {'optimizer': 'sgd', 'batch_size': 32}
0.929483 (0.001083) with: {'optimizer': 'adam', 'batch_size': 64}
0.933850 (0.001783) with: {'optimizer': 'rmsprop', 'batch_size': 64}
0.846550 (0.000883) with: {'optimizer': 'sgd', 'batch_size': 64}
0.923367 (0.002767) with: {'optimizer': 'adam', 'batch_size': 128}
0.916233 (0.000500) with: {'optimizer': 'rmsprop', 'batch_size': 128}
0.788350 (0.010250) with: {'optimizer': 'sgd', 'batch_size': 128}


In [7]:
d = grid_result.cv_results_
joblib.dump(d, 'out.p')

['out.p']

In [8]:
a = joblib.load('out.p')

In [9]:
a

{'mean_fit_time': array([ 4.56389248,  4.91426647,  4.55470991,  3.06347215,  2.70140946,
         2.49186158,  2.08880997,  1.96029449,  1.89702857]),
 'mean_score_time': array([ 1.16054356,  1.65659046,  1.4370805 ,  0.88751686,  1.01908946,
         0.84377599,  0.64867747,  0.76791549,  0.72550595]),
 'mean_test_score': array([ 0.94065   ,  0.93281667,  0.87243333,  0.92948333,  0.93385   ,
         0.84655   ,  0.92336667,  0.91623333,  0.78835   ]),
 'mean_train_score': array([ 0.94996667,  0.94198333,  0.87468333,  0.93656667,  0.94135   ,
         0.84821667,  0.92971667,  0.92363333,  0.79263333]),
 'param_batch_size': masked_array(data = [32 32 32 64 64 64 128 128 128],
              mask = [False False False False False False False False False],
        fill_value = ?),
 'param_optimizer': masked_array(data = ['adam' 'rmsprop' 'sgd' 'adam' 'rmsprop' 'sgd' 'adam' 'rmsprop' 'sgd'],
              mask = [False False False False False False False False False],
        fill_value

In [10]:
pickle.__version__

'$Revision: 72223 $'