Grid search hyperparameter optimalization

In [1]:
%tensorflow_version 1.x
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import Dense, Input, Dropout
from keras import Sequential

print(tf.version.VERSION)

1.15.0


Using TensorFlow backend.


In [9]:
fm_dataset = tf.keras.datasets.fashion_mnist

(train_x, train_y), (test_x, test_y) = fm_dataset.load_data()

train_x, test_x = train_x/255.0, test_x/255.0

train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=0.2, random_state=42)

# the data are in the form of 28x28 pixes with values 0-255.
print('Train data shape: ', train_x.shape, train_y.shape)
print('Validation data shape: ', valid_x.shape, valid_y.shape)
print('Test data shape:  ', test_x.shape, test_y.shape)

class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boo']
class_count = len(class_names)
class_count

Train data shape:  (48000, 28, 28) (48000,)
Validation data shape:  (12000, 28, 28) (12000,)
Test data shape:   (10000, 28, 28) (10000,)


10

In [0]:
def create_model(dropout=0.1, init='uniform', dense_nparams=64, dense_layer_count=2):
  model = keras.Sequential()

  model.add(keras.layers.Flatten(input_shape=(28,28)))

  for x in range(int(dense_layer_count / 2)):
    model.add(keras.layers.Dense(dense_nparams, activation=tf.nn.relu))

  model.add(keras.layers.Dropout(dropout))

  for x in range(int(dense_layer_count / 2)):
    model.add(keras.layers.Dense(dense_nparams, activation=tf.nn.relu))

  model.add(keras.layers.Dense(class_count, activation=tf.nn.softmax))

  model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                optimizer='adam',
                metrics=['accuracy'])
  
  return model

In [0]:
baseModel = create_model()
baseModelHistory = baseModel.fit(train_x, train_y, validation_data=(valid_x, valid_y), epochs=10)

In [0]:
show_history(baseModelHistory)
test_loss, test_acc = baseModel.evaluate(test_x, test_y)
print('Test accuracy: ', test_acc)

In [0]:
def show_history(history):
    plt.figure()
    for key in history.history.keys():
        plt.plot(history.epoch, history.history[key], label=key)
    plt.legend()
    plt.tight_layout()

In [0]:
keras_classifier = KerasClassifier(build_fn=create_model, verbose=1)

In [0]:
estimator = Pipeline([("kc", keras_classifier)])

In [0]:
#def create_model(dropout=0.1, init='uniform',  dense_nparams=64, dense_layer_count=2):
param_grid = {
    #'kc__epochs': [10,20,30],
    'kc__dense_nparams': [32, 64, 128, 256, 512],
    'kc__init': [ 'uniform', 'zeros', 'normal', ], 
    'kc__dense_layer_count':[2, 4, 8, 16],
    'kc__dropout': [0.5, 0.4, 0.3, 0.2, 0.1, 0]
}
X = train_x
y = train_y
kfold_splits = 5

In [0]:
grid = GridSearchCV(estimator=estimator,  
                    n_jobs=1, 
                    verbose=1,
                    return_train_score=True,
                    cv=kfold_splits,  #StratifiedKFold(n_splits=kfold_splits, shuffle=True)
                    param_grid=param_grid)

In [48]:
grid_result = grid.fit(X, y) #callbacks=[tbCallBack]

Fitting 5 folds for each of 360 candidates, totalling 1800 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples
Train on 38400 samples

KeyboardInterrupt: ignored

In [0]:

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))