# How to Grid Search Hyperparameters for Deep Learning Models

## 14.4 How to Tune Batch Size and Number of Epochs

In [1]:
# Use scikit-learn to grid search the batch size and epochs
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model():
# create model
    model = Sequential()
    model.add(Dense(12, input_shape=(8,), activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, verbose=0)
# define the grid search parameters
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.709635 using {'batch_size': 10, 'epochs': 100}
0.635417 (0.034401) with: {'batch_size': 10, 'epochs': 10}
0.649740 (0.068133) with: {'batch_size': 10, 'epochs': 50}
0.709635 (0.013279) with: {'batch_size': 10, 'epochs': 100}
0.604167 (0.031304) with: {'batch_size': 20, 'epochs': 10}
0.680990 (0.018688) with: {'batch_size': 20, 'epochs': 50}
0.707031 (0.013902) with: {'batch_size': 20, 'epochs': 100}
0.503906 (0.052698) with: {'batch_size': 40, 'epochs': 10}
0.638021 (0.032106) with: {'batch_size': 40, 'epochs': 50}
0.665365 (0.028940) with: {'batch_size': 40, 'epochs': 100}
0.548177 (0.028940) with: {'batch_size': 60, 'epochs': 10}
0.619792 (0.031948) with: {'batch_size': 60, 'epochs': 50}
0.682292 (0.014382) with: {'batch_size': 60, 'epochs': 100}
0.595052 (0.080075) with: {'batch_size': 80, 'epochs': 10}
0.635417 (0.051658) with: {'batch_size': 80, 'epochs': 50}
0.634115 (0.045814) with: {'batch_size': 80, 'epochs': 100}
0.449219 (0.058550) with: {'batch_size': 100, 'epochs':

## 14.5 How to Tune the Training Optimization Algorithm

In [2]:
# Use scikit-learn to grid search the optimization algorithms
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model():
    # create model
    model = Sequential()
    model.add(Dense(12, input_shape=(8,), activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # return model without compile
    return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, loss="binary_crossentropy",

epochs=100, batch_size=10, verbose=0)

# define the grid search parameters
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.695312 using {'optimizer': 'Adam'}
0.677083 (0.022628) with: {'optimizer': 'SGD'}
0.686198 (0.028940) with: {'optimizer': 'RMSprop'}
0.540365 (0.067004) with: {'optimizer': 'Adagrad'}
0.565104 (0.120624) with: {'optimizer': 'Adadelta'}
0.695312 (0.008438) with: {'optimizer': 'Adam'}
0.627604 (0.079438) with: {'optimizer': 'Adamax'}
0.695312 (0.003189) with: {'optimizer': 'Nadam'}


## 14.6 How to Tune Learning Rate and Momentum

In [3]:
# Use scikit-learn to grid search the learning rate and momentum
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model():
# create model
    model = Sequential()
    model.add(Dense(12, input_shape=(8,), activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    return model

# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, loss="binary_crossentropy",
optimizer="SGD", epochs=100, batch_size=10, verbose=0)

# define the grid search parameters
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
param_grid = dict(optimizer__learning_rate=learn_rate, optimizer__momentum=momentum)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.677083 using {'optimizer__learning_rate': 0.01, 'optimizer__momentum': 0.0}
0.661458 (0.016053) with: {'optimizer__learning_rate': 0.001, 'optimizer__momentum': 0.0}
0.635417 (0.043420) with: {'optimizer__learning_rate': 0.001, 'optimizer__momentum': 0.2}
0.669271 (0.009207) with: {'optimizer__learning_rate': 0.001, 'optimizer__momentum': 0.4}
0.657552 (0.041626) with: {'optimizer__learning_rate': 0.001, 'optimizer__momentum': 0.6}
0.653646 (0.044804) with: {'optimizer__learning_rate': 0.001, 'optimizer__momentum': 0.8}
0.656250 (0.020915) with: {'optimizer__learning_rate': 0.001, 'optimizer__momentum': 0.9}
0.677083 (0.006639) with: {'optimizer__learning_rate': 0.01, 'optimizer__momentum': 0.0}
0.653646 (0.015733) with: {'optimizer__learning_rate': 0.01, 'optimizer__momentum': 0.2}
0.649740 (0.006639) with: {'optimizer__learning_rate': 0.01, 'optimizer__momentum': 0.4}
0.647135 (0.004872) with: {'optimizer__learning_rate': 0.01, 'optimizer__momentum': 0.6}
0.651042 (0.001841) 

## 14.7 How to Tune Network Weight Initialization

In [4]:
# Use scikit-learn to grid search the weight initialization
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model(init_mode='uniform'):
# create model
    model = Sequential()
    model.add(Dense(12, input_shape=(8,), kernel_initializer=init_mode,

activation='relu'))

    model.add(Dense(1, kernel_initializer=init_mode, activation='sigmoid'))
# Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, epochs=100, batch_size=10, verbose=0)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal',

'glorot_uniform', 'he_normal', 'he_uniform']

param_grid = dict(model__init_mode=init_mode)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.712240 using {'model__init_mode': 'uniform'}
0.712240 (0.030978) with: {'model__init_mode': 'uniform'}
0.687500 (0.038670) with: {'model__init_mode': 'lecun_uniform'}
0.712240 (0.027126) with: {'model__init_mode': 'normal'}
0.651042 (0.001841) with: {'model__init_mode': 'zero'}
0.670573 (0.048824) with: {'model__init_mode': 'glorot_normal'}
0.690104 (0.032106) with: {'model__init_mode': 'glorot_uniform'}
0.695312 (0.011500) with: {'model__init_mode': 'he_normal'}
0.682292 (0.019225) with: {'model__init_mode': 'he_uniform'}


## 14.8 How to Tune the Neuron Activation Function

In [5]:
# Use scikit-learn to grid search the activation function
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model(activation='relu'):
# create model
    model = Sequential()
    model.add(Dense(12, input_shape=(8,), kernel_initializer='uniform',

    activation=activation))

    model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)

# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, epochs=100, batch_size=10, verbose=0)
# define the grid search parameters
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid',

'hard_sigmoid', 'linear']
param_grid = dict(model__activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.727865 using {'model__activation': 'softplus'}
0.647135 (0.007366) with: {'model__activation': 'softmax'}
0.727865 (0.037377) with: {'model__activation': 'softplus'}
0.678385 (0.013279) with: {'model__activation': 'softsign'}
0.713542 (0.017566) with: {'model__activation': 'relu'}
0.665365 (0.018688) with: {'model__activation': 'tanh'}
0.683594 (0.013902) with: {'model__activation': 'sigmoid'}
0.696615 (0.045592) with: {'model__activation': 'hard_sigmoid'}
0.710938 (0.015947) with: {'model__activation': 'linear'}


## 14.9 How to Tune Dropout Regularization

In [6]:
# Use scikit-learn to grid search the dropout rate
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.constraints import MaxNorm
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model(dropout_rate, weight_constraint):
# create model
    model = Sequential()
    model.add(Dense(12, input_shape=(8,), kernel_initializer='uniform',

    activation='linear', kernel_constraint=MaxNorm(weight_constraint)))

    model.add(Dropout(dropout_rate))
    model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
print(dataset.dtype, dataset.shape)
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, epochs=100, batch_size=10, verbose=0)
# define the grid search parameters
weight_constraint = [1.0, 2.0, 3.0, 4.0, 5.0]
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
param_grid = dict(model__dropout_rate=dropout_rate,

model__weight_constraint=weight_constraint)
#param_grid = dict(model__dropout_rate=dropout_rate)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

float64 (768, 9)
Best: 0.723958 using {'model__dropout_rate': 0.0, 'model__weight_constraint': 1.0}
0.723958 (0.015733) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 1.0}
0.708333 (0.006639) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 2.0}
0.700521 (0.006639) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 3.0}
0.708333 (0.004872) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 4.0}
0.713542 (0.010253) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 5.0}
0.712240 (0.016367) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 1.0}
0.720052 (0.017566) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 2.0}
0.713542 (0.014382) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 3.0}
0.710938 (0.022999) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 4.0}
0.691406 (0.005524) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 5.0}
0.699219 (0.003189) 

## 14.10 How to Tune the Number of Neurons in the Hidden Layer

In [7]:
# Use scikit-learn to grid search the number of neurons
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.constraints import MaxNorm
# Function to create model, required for KerasClassifier
def create_model(neurons):
# create model
    model = Sequential()
    model.add(Dense(neurons, input_shape=(8,), kernel_initializer='uniform',
    activation='linear', kernel_constraint=MaxNorm(4)))

    model.add(Dropout(0.2))
    model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
# fix random seed for reproducibility

seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, epochs=100, batch_size=10, verbose=0)
# define the grid search parameters
neurons = [1, 5, 10, 15, 20, 25, 30]
param_grid = dict(model__neurons=neurons)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.726562 using {'model__neurons': 10}
0.701823 (0.016367) with: {'model__neurons': 1}
0.694010 (0.003683) with: {'model__neurons': 5}
0.726562 (0.019401) with: {'model__neurons': 10}
0.700521 (0.004872) with: {'model__neurons': 15}
0.699219 (0.019918) with: {'model__neurons': 20}
0.697917 (0.022402) with: {'model__neurons': 25}
0.692708 (0.025976) with: {'model__neurons': 30}
