# How to Grid Search Hyperparameters for Deep Learning Models in Python with Keras
https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/

# How to Use Keras Models in scikit-learn

In [None]:
Keras models can be used in scikit-learn by wrapping them with the KerasClassifier or KerasRegressor class from the module SciKeras. You may need to run the command pip install scikeras first to install the module.

To use these wrappers, you must define a function that creates and returns your Keras sequential model, then pass this function to the model argument when constructing the KerasClassifier class.

For example:

def create_model():
	...
	return model
 
model = KerasClassifier(model=create_model)
The constructor for the KerasClassifier class can take default arguments that are passed on to the calls to model.fit(), such as the number of epochs and the batch size.

For example:

def create_model():
	...
	return model
 
model = KerasClassifier(model=create_model, epochs=10)
The constructor for the KerasClassifier class can also take new arguments that can be passed to your custom create_model() function. These new arguments must also be defined in the signature of your create_model() function with default parameters.

For example:

def create_model(dropout_rate=0.0):
	...
	return model
 
model = KerasClassifier(model=create_model, dropout_rate=0.2)
You can learn more about these from the SciKeras documentation.



# How to Use Grid Search in scikit-learn

# Problem Description

# Note on Parallelizing Grid Search

# How to Tune Batch Size and Number of Epochs

In [11]:

# Use scikit-learn to grid search the batch size and epochs
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model():
	# create model
	model = Sequential()
	model.add(Dense(12, input_shape=(8,), activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, verbose=0)
# define the grid search parameters
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100, 200]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.716146 using {'batch_size': 20, 'epochs': 200}
0.678385 (0.028764) with: {'batch_size': 10, 'epochs': 10}
0.651042 (0.009744) with: {'batch_size': 10, 'epochs': 50}
0.678385 (0.010253) with: {'batch_size': 10, 'epochs': 100}
0.695312 (0.022326) with: {'batch_size': 10, 'epochs': 200}
0.610677 (0.045143) with: {'batch_size': 20, 'epochs': 10}
0.653646 (0.032264) with: {'batch_size': 20, 'epochs': 50}
0.666667 (0.012890) with: {'batch_size': 20, 'epochs': 100}
0.716146 (0.027866) with: {'batch_size': 20, 'epochs': 200}
0.609375 (0.026107) with: {'batch_size': 40, 'epochs': 10}
0.660156 (0.028348) with: {'batch_size': 40, 'epochs': 50}
0.657552 (0.021236) with: {'batch_size': 40, 'epochs': 100}
0.696615 (0.015073) with: {'batch_size': 40, 'epochs': 200}
0.535156 (0.012758) with: {'batch_size': 60, 'epochs': 10}
0.651042 (0.034104) with: {'batch_size': 60, 'epochs': 50}
0.651042 (0.020752) with: {'batch_size': 60, 'epochs': 100}
0.678385 (0.013279) with: {'batch_size': 60, 'epochs'

# You can see that the batch size of 20 and 200 epochs achieved the best result of about 70% accuracy.



# How to Tune the Training Optimization Algorithm

In [10]:
# Use scikit-learn to grid search the batch size and epochs
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model():
	# create model
	model = Sequential()
	model.add(Dense(12, input_shape=(8,), activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# return model without compile
	return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, loss="binary_crossentropy", epochs=200, batch_size=20, verbose=0)
# define the grid search parameters
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.710938 using {'optimizer': 'Adam'}
0.680990 (0.026557) with: {'optimizer': 'SGD'}
0.703125 (0.016877) with: {'optimizer': 'RMSprop'}
0.585938 (0.046329) with: {'optimizer': 'Adagrad'}
0.449219 (0.132736) with: {'optimizer': 'Adadelta'}
0.710938 (0.017758) with: {'optimizer': 'Adam'}
0.673177 (0.045814) with: {'optimizer': 'Adamax'}
0.701823 (0.021236) with: {'optimizer': 'Nadam'}


# The KerasClassifier wrapper will not compile your model again if the model is already compiled. Hence the other way to run GridSearchCV is to set the optimizer as an argument to the create_model() function, which returns an appropriately compiled model like the following: 

(i.e., Instead of passing optimizer =  'Adam' into KerasClassifier constructor class, we set the optimizer as an argument to the create_model() function)

In [9]:
# Use scikit-learn to grid search the batch size and epochs
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model(optimizer='Adam'):
	# create model
	model = Sequential()
	model.add(Dense(12, input_shape=(8,), activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
	return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, epochs=200, batch_size=20, verbose=0)
# define the grid search parameters
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(model__optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.714844 using {'model__optimizer': 'Adam'}
0.669271 (0.027126) with: {'model__optimizer': 'SGD'}
0.669271 (0.041504) with: {'model__optimizer': 'RMSprop'}
0.548177 (0.061873) with: {'model__optimizer': 'Adagrad'}
0.548177 (0.141826) with: {'model__optimizer': 'Adadelta'}
0.714844 (0.012758) with: {'model__optimizer': 'Adam'}
0.662760 (0.007366) with: {'model__optimizer': 'Adamax'}
0.705729 (0.004872) with: {'model__optimizer': 'Nadam'}


# How to Tune Learning Rate and Momentum

In [12]:
# Use scikit-learn to grid search the learning rate and momentum
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model():
	# create model
	model = Sequential()
	model.add(Dense(12, input_shape=(8,), activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, loss="binary_crossentropy", optimizer="SGD", epochs=100, batch_size=10, verbose=0)
# define the grid search parameters
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
param_grid = dict(optimizer__learning_rate=learn_rate, optimizer__momentum=momentum)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.678385 using {'optimizer__learning_rate': 0.001, 'optimizer__momentum': 0.0}
0.678385 (0.017566) with: {'optimizer__learning_rate': 0.001, 'optimizer__momentum': 0.0}
0.652344 (0.039192) with: {'optimizer__learning_rate': 0.001, 'optimizer__momentum': 0.2}
0.675781 (0.016877) with: {'optimizer__learning_rate': 0.001, 'optimizer__momentum': 0.4}
0.662760 (0.039879) with: {'optimizer__learning_rate': 0.001, 'optimizer__momentum': 0.6}
0.652344 (0.011500) with: {'optimizer__learning_rate': 0.001, 'optimizer__momentum': 0.8}
0.627604 (0.019488) with: {'optimizer__learning_rate': 0.001, 'optimizer__momentum': 0.9}
0.611979 (0.051658) with: {'optimizer__learning_rate': 0.01, 'optimizer__momentum': 0.0}
0.645833 (0.016053) with: {'optimizer__learning_rate': 0.01, 'optimizer__momentum': 0.2}
0.665365 (0.011201) with: {'optimizer__learning_rate': 0.01, 'optimizer__momentum': 0.4}
0.638021 (0.017566) with: {'optimizer__learning_rate': 0.01, 'optimizer__momentum': 0.6}
0.651042 (0.001841)

In [19]:
# Use scikit-learn to grid search the learning rate and momentum
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model():
	# create model
	model = Sequential()
	model.add(Dense(12, input_shape=(8,), activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, loss="binary_crossentropy", optimizer="Adam", epochs=200, batch_size=20, verbose=0)
# define the grid search parameters
learning_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
beta_1 = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
beta_2 = [0.997, 0.998, 0.999, 1.0]
epsilon= [1e-07, 1e-08, 1e-09]
param_grid = dict(optimizer__learning_rate=learn_rate, optimizer__beta_1=beta_1, optimizer__beta_2=beta_2, optimizer__epsilon=epsilon)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.765625 using {'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.703125 (0.030758) with: {'optimizer__beta_1': 0.0, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-07, 'optimizer__learning_rate': 0.001}
0.720052 (0.021710) with: {'optimizer__beta_1': 0.0, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-07, 'optimizer__learning_rate': 0.01}
0.649740 (0.003683) with: {'optimizer__beta_1': 0.0, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-07, 'optimizer__learning_rate': 0.1}
0.651042 (0.001841) with: {'optimizer__beta_1': 0.0, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-07, 'optimizer__learning_rate': 0.2}
0.651042 (0.001841) with: {'optimizer__beta_1': 0.0, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-07, 'optimizer__learning_rate': 0.3}
0.697917 (0.023939) with: {'optimizer__beta_1': 0.0, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.001

# You can see that the model with Adam optimizer that had a learning_rate of 0.01, beta1 of of 0.8, beta2 of 0.997, and epsilon of 1e-08 achieved the best result of about 76% accuracy.

# How to Tune Network Weight Initialization

In [33]:

# Use scikit-learn to grid search the weight initialization
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model(init_mode='uniform'):
	# create model
	model = Sequential()
	model.add(Dense(12, input_shape=(8,), kernel_initializer=init_mode, activation='relu'))
	model.add(Dense(1, kernel_initializer=init_mode, activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, epochs=100, batch_size=10, verbose=0)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
param_grid = dict(model__init_mode=init_mode)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.716146 using {'model__init_mode': 'glorot_normal'}
0.712240 (0.033197) with: {'model__init_mode': 'uniform'}
0.700521 (0.014382) with: {'model__init_mode': 'lecun_uniform'}
0.709635 (0.018688) with: {'model__init_mode': 'normal'}
0.651042 (0.001841) with: {'model__init_mode': 'zero'}
0.716146 (0.029635) with: {'model__init_mode': 'glorot_normal'}
0.666667 (0.009744) with: {'model__init_mode': 'glorot_uniform'}
0.716146 (0.021236) with: {'model__init_mode': 'he_normal'}
0.714844 (0.011500) with: {'model__init_mode': 'he_uniform'}


# Below I try to tune Network Weight Initialization with the optimization algorithm Adam, above fine tuned optimization hyperparameters of learning_rate = 0.01, beta1 = 0.8, beta2 = 0.997, and epsilon = 1e-08, and model hyperparameters of epochs=200, batch_size=20.

In [30]:

# Use scikit-learn to grid search the weight initialization
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model(init_mode='uniform'):
	# create model
	model = Sequential()
	model.add(Dense(12, input_shape=(8,), kernel_initializer=init_mode, activation='relu'))
	model.add(Dense(1, kernel_initializer=init_mode, activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, epochs=200, batch_size=20, verbose=0)
# define the grid search parameters
learning_rate = [0.01]
beta_1 = [0.8]
beta_2 = [0.997]
epsilon= [1e-08]
init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
param_grid = dict(model__init_mode=init_mode,optimizer__learning_rate=learning_rate, optimizer__beta_1=beta_1, optimizer__beta_2=beta_2, optimizer__epsilon=epsilon)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.735677 using {'model__init_mode': 'normal', 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.713542 (0.032734) with: {'model__init_mode': 'uniform', 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.686198 (0.034104) with: {'model__init_mode': 'lecun_uniform', 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.735677 (0.031948) with: {'model__init_mode': 'normal', 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.651042 (0.001841) with: {'model__init_mode': 'zero', 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.726562 (0.022097) with: {'model__init_mode': 'glorot_normal', 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer_

# How to Tune the Neuron Activation Function

In [38]:

# Use scikit-learn to grid search the activation function
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model(activation='relu'):
	# create model
	model = Sequential()
	model.add(Dense(12, input_shape=(8,), kernel_initializer='uniform', activation=activation))
	model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, epochs=200, batch_size=20, verbose=0)
# define the grid search parameters
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid = dict(model__activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.747396 using {'model__activation': 'softplus'}
0.664062 (0.019401) with: {'model__activation': 'softmax'}
0.747396 (0.034401) with: {'model__activation': 'softplus'}
0.701823 (0.031466) with: {'model__activation': 'softsign'}
0.718750 (0.024080) with: {'model__activation': 'relu'}
0.707031 (0.011500) with: {'model__activation': 'tanh'}
0.683594 (0.005524) with: {'model__activation': 'sigmoid'}
0.703125 (0.019401) with: {'model__activation': 'hard_sigmoid'}
0.731771 (0.024150) with: {'model__activation': 'linear'}


# Below I try to tune the Neuron Activation Function with the optimization algorithm Adam, fine tuned optimization hyperparameters of learning_rate = 0.01, beta1 = 0.8, beta2 = 0.997, and epsilon = 1e-08, and model hyper-parameters of epochs=200, batch_size=20, and kernal_initializer = normal.

In [43]:
# Use scikit-learn to grid search the activation function
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model(activation='relu'):
	# create model
	model = Sequential()
	model.add(Dense(12, input_shape=(8,), kernel_initializer='normal', activation=activation))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, epochs=200, batch_size=20, verbose=0)
# define the grid search parameters
learning_rate = [0.01]
beta_1 = [0.8]
beta_2 = [0.997]
epsilon= [1e-08]
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid = dict(model__activation=activation,optimizer__learning_rate=learning_rate, optimizer__beta_1=beta_1, optimizer__beta_2=beta_2, optimizer__epsilon=epsilon)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.738281 using {'model__activation': 'relu', 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.671875 (0.017758) with: {'model__activation': 'softmax', 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.735677 (0.026557) with: {'model__activation': 'softplus', 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.695312 (0.016573) with: {'model__activation': 'softsign', 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.738281 (0.036782) with: {'model__activation': 'relu', 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.688802 (0.029635) with: {'model__activation': 'tanh', 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon

# How to Tune Dropout Regularization

In [22]:

# Use scikit-learn to grid search the dropout rate
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.constraints import MaxNorm
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model(dropout_rate, weight_constraint):
	# create model
	model = Sequential()
	model.add(Dense(12, input_shape=(8,), kernel_initializer='uniform', activation='linear', kernel_constraint=MaxNorm(weight_constraint)))
	model.add(Dropout(dropout_rate))
	model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
print(dataset.dtype, dataset.shape)
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, epochs=100, batch_size=10, verbose=0)
# define the grid search parameters
weight_constraint = [1.0, 2.0, 3.0, 4.0, 5.0]
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
param_grid = dict(model__dropout_rate=dropout_rate, model__weight_constraint=weight_constraint)
#param_grid = dict(model__dropout_rate=dropout_rate)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

float64 (768, 9)
Best: 0.723958 using {'model__dropout_rate': 0.5, 'model__weight_constraint': 5.0}
0.714844 (0.000000) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 1.0}
0.710938 (0.006379) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 2.0}
0.694010 (0.006639) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 3.0}
0.718750 (0.019401) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 4.0}
0.696615 (0.009207) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 5.0}
0.712240 (0.011201) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 1.0}
0.700521 (0.009744) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 2.0}
0.707031 (0.011500) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 3.0}
0.701823 (0.006639) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 4.0}
0.709635 (0.015073) with: {'model__dropout_rate': 0.1, 'model__weight_constraint': 5.0}
0.714844 (0.011500) 

# Below I try to tune the Dropout Regularization with the optimization algorithm Adam, fine tuned optimization hyperparameters of learning_rate = 0.01, beta1 = 0.8, beta2 = 0.997, and epsilon = 1e-08, and model hyper-parameters of epochs=200, batch_size=20, kernal_initializer = normal, and activation = relu.

In [44]:

# Use scikit-learn to grid search the dropout rate
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.constraints import MaxNorm
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model(dropout_rate, weight_constraint):
	# create model
	model = Sequential()
	model.add(Dense(12, input_shape=(8,), kernel_initializer='normal', activation='relu', kernel_constraint=MaxNorm(weight_constraint)))
	model.add(Dropout(dropout_rate))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
print(dataset.dtype, dataset.shape)
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, epochs=200, batch_size=20, verbose=0)
# define the grid search parameters
learning_rate = [0.01]
beta_1 = [0.8]
beta_2 = [0.997]
epsilon= [1e-08]
weight_constraint = [1.0, 2.0, 3.0, 4.0, 5.0]
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
param_grid = dict(model__dropout_rate=dropout_rate, model__weight_constraint=weight_constraint, optimizer__learning_rate=learning_rate, optimizer__beta_1=beta_1, optimizer__beta_2=beta_2, optimizer__epsilon=epsilon)
#param_grid = dict(model__dropout_rate=dropout_rate, model__weight_constraint=weight_constraint)
#param_grid = dict(model__dropout_rate=dropout_rate)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

float64 (768, 9)
Best: 0.731771 using {'model__dropout_rate': 0.0, 'model__weight_constraint': 2.0, 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.722656 (0.019401) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 1.0, 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.731771 (0.029635) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 2.0, 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.714844 (0.029232) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 3.0, 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.723958 (0.025976) with: {'model__dropout_rate': 0.0, 'model__weight_constraint': 4.0, 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-0

# How to Tune the Number of Neurons in the Hidden Layer

In [24]:

# Use scikit-learn to grid search the number of neurons
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.constraints import MaxNorm
# Function to create model, required for KerasClassifier
def create_model(neurons):
	# create model
	model = Sequential()
	model.add(Dense(neurons, input_shape=(8,), kernel_initializer='uniform', activation='linear', kernel_constraint=MaxNorm(4)))
	model.add(Dropout(0.2))
	model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, epochs=100, batch_size=10, verbose=0)
# define the grid search parameters
neurons = [1, 5, 10, 15, 20, 25, 30]
param_grid = dict(model__neurons=neurons)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.723958 using {'model__neurons': 25}
0.701823 (0.009207) with: {'model__neurons': 1}
0.707031 (0.011500) with: {'model__neurons': 5}
0.699219 (0.014616) with: {'model__neurons': 10}
0.707031 (0.014616) with: {'model__neurons': 15}
0.708333 (0.009744) with: {'model__neurons': 20}
0.723958 (0.015733) with: {'model__neurons': 25}
0.700521 (0.011201) with: {'model__neurons': 30}


# Below I try to tune the Number of Neurons in the Hidden Layer with the optimization algorithm Adam, fine tuned optimization hyperparameters of learning_rate = 0.01, beta1 = 0.8, beta2 = 0.997, and epsilon = 1e-08, and model hyper-parameters of epochs=200, batch_size=20, kernal_initializer = normal, and activation = relu.

In [45]:

# Use scikit-learn to grid search the number of neurons
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.constraints import MaxNorm
# Function to create model, required for KerasClassifier
def create_model(neurons):
	# create model
	model = Sequential()
	model.add(Dense(neurons, input_shape=(8,), kernel_initializer='normal', activation='relu', kernel_constraint=MaxNorm(2)))
	model.add(Dropout(0.0))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(model=create_model, epochs=200, batch_size=20, verbose=0)
# define the grid search parameters
learning_rate = [0.01]
beta_1 = [0.8]
beta_2 = [0.997]
epsilon= [1e-08]
neurons = [1, 5, 10, 15, 20, 25, 30]
param_grid = dict(model__neurons=neurons, optimizer__learning_rate=learning_rate, optimizer__beta_1=beta_1, optimizer__beta_2=beta_2, optimizer__epsilon=epsilon)
#param_grid = dict(model__neurons=neurons)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.740885 using {'model__neurons': 15, 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.682292 (0.036966) with: {'model__neurons': 1, 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.720052 (0.036966) with: {'model__neurons': 5, 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.723958 (0.025976) with: {'model__neurons': 10, 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.740885 (0.018414) with: {'model__neurons': 15, 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.720052 (0.025780) with: {'model__neurons': 20, 'optimizer__beta_1': 0.8, 'optimizer__beta_2': 0.997, 'optimizer__epsilon': 1e-08, 'optimizer__learning_rate': 0.01}
0.721354 (0

# Tips for Hyperparameter Optimization

# https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/