# Neural Network Hyperparameter Tuning 

In [8]:
# Importing the necessary packages
import pandas as pd
import numpy as np
import keras
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import classification_report, accuracy_score
from keras.models import Sequential
from keras.layers import Dense,Dropout
from tensorflow.keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasClassifier
import warnings
warnings.filterwarnings("ignore")

In [2]:

# load the dataset
dataset = np.loadtxt("/pima-indians-diabetes.csv", delimiter=",")

In [3]:
# split data into X and Y variables
X = dataset[:,0:8]
y = dataset[:,8]

In [4]:
# Standardization,fit and transform
a = StandardScaler()
a.fit(X)
X_standardized = a.transform(X)

In [5]:
#describe the dataset
pd.DataFrame(X_standardized).describe()

Unnamed: 0,0,1,2,3,4,5,6,7
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,-7.748432e-17,3.614007e-18,-1.3272440000000001e-17,7.762888000000001e-17,-5.4932910000000006e-17,2.972738e-15,1.924387e-15,2.19298e-16
std,1.000652,1.000652,1.000652,1.000652,1.000652,1.000652,1.000652,1.000652
min,-1.141852,-3.783654,-3.572597,-1.288212,-0.6928906,-4.060474,-1.189553,-1.041549
25%,-0.8448851,-0.6852363,-0.3673367,-1.288212,-0.6928906,-0.5955785,-0.6889685,-0.7862862
50%,-0.2509521,-0.1218877,0.1496408,0.1545332,-0.4280622,0.0009419788,-0.3001282,-0.3608474
75%,0.6399473,0.6057709,0.5632228,0.7190857,0.4120079,0.5847705,0.4662269,0.6602056
max,3.906578,2.444478,2.734528,4.921866,6.652839,4.455807,5.883565,4.063716


#### Tuning of Hyperparameters :- Batch Size and Epochs

In [6]:
# create model
def create_model():
    model = Sequential() #layers-8,12,8,1
    model.add(Dense(12, input_dim=8,kernel_initializer='uniform', activation='relu'))# 1st hidden layer,i/p layer
    model.add(Dense(8, kernel_initializer='uniform', activation='relu')) # 2nd hidden layer
    model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))# o/p layer
    
    adam=Adam(lr=0.01)
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
    return model

In [7]:
# Create the model
model = KerasClassifier(build_fn = create_model,verbose = 0)
# Define the grid search parameters
batch_size = [10,20,40]
epochs = [10,50,100]
# Make a dictionary of the grid search parameters
param_grid = dict(batch_size = batch_size,epochs = epochs)
# Build and fit the GridSearchCV
grid = GridSearchCV(estimator = model,param_grid = param_grid,cv = KFold(),verbose = 10) # verbose is the type of o/p required
grid_result = grid.fit(X_standardized,y)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5; 1/9] START batch_size=10, epochs=10....................................
[CV 1/5; 1/9] END .....batch_size=10, epochs=10;, score=0.727 total time=   2.2s
[CV 2/5; 1/9] START batch_size=10, epochs=10....................................
[CV 2/5; 1/9] END .....batch_size=10, epochs=10;, score=0.695 total time=   1.6s
[CV 3/5; 1/9] START batch_size=10, epochs=10....................................
[CV 3/5; 1/9] END .....batch_size=10, epochs=10;, score=0.740 total time=   2.1s
[CV 4/5; 1/9] START batch_size=10, epochs=10....................................
[CV 4/5; 1/9] END .....batch_size=10, epochs=10;, score=0.830 total time=   1.8s
[CV 5/5; 1/9] START batch_size=10, epochs=10....................................
[CV 5/5; 1/9] END .....batch_size=10, epochs=10;, score=0.752 total time=   2.1s
[CV 1/5; 2/9] START batch_size=10, epochs=50....................................
[CV 1/5; 2/9] END .....batch_size=10, epochs=50;,

In [9]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

Best : 0.7670316576957703, using {'batch_size': 40, 'epochs': 10}
0.7488074183464051,0.044863989153786935 with: {'batch_size': 10, 'epochs': 10}
0.7552839398384095,0.0402080531692326 with: {'batch_size': 10, 'epochs': 50}
0.7357100486755371,0.0203954307276346 with: {'batch_size': 10, 'epochs': 100}
0.7670146942138671,0.042652964991045685 with: {'batch_size': 20, 'epochs': 10}
0.7617944121360779,0.03933897179107619 with: {'batch_size': 20, 'epochs': 50}
0.7436041116714478,0.05273053289931881 with: {'batch_size': 20, 'epochs': 100}
0.7670316576957703,0.045223619696716726 with: {'batch_size': 40, 'epochs': 10}
0.7657159805297852,0.03828702797656299 with: {'batch_size': 40, 'epochs': 50}
0.7448688626289368,0.04135356718586148 with: {'batch_size': 40, 'epochs': 100}


#### Tuning of Hyperparameters:- Learning rate and Drop out rate

In [10]:
# Defining the model

def create_model(learning_rate,dropout_rate):
    model = Sequential() #layers-8,8,4,1
    model.add(Dense(8,input_dim = 8,kernel_initializer = 'normal',activation = 'relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(4,input_dim = 8,kernel_initializer = 'normal',activation = 'relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1,activation = 'sigmoid'))
    
    adam = Adam(lr = learning_rate)
    model.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model

# Create the model

model = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 40,epochs = 10)

# Define the grid search parameters

learning_rate = [0.001,0.01,0.1]
dropout_rate = [0.0,0.1,0.2]

# Make a dictionary of the grid search parameters

param_grids = dict(learning_rate = learning_rate,dropout_rate = dropout_rate)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(X_standardized,y)


Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 1/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=0.747 total time=   1.0s
[CV 2/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 2/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=0.714 total time=   1.1s
[CV 3/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 3/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=0.773 total time=   1.4s
[CV 4/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 4/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=0.850 total time=   1.0s
[CV 5/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 5/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=0.752 total time=   1.1s
[CV 1/5; 2/9] START dropout_rate=0.0, learning_rate=0.01........................
[CV 1/5; 

In [11]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

Best : 0.7722010016441345, using {'dropout_rate': 0.0, 'learning_rate': 0.01}
0.7670146942138671,0.04537069124453173 with: {'dropout_rate': 0.0, 'learning_rate': 0.001}
0.7722010016441345,0.033042078313148045 with: {'dropout_rate': 0.0, 'learning_rate': 0.01}
0.7513963222503662,0.049615322962416775 with: {'dropout_rate': 0.0, 'learning_rate': 0.1}
0.7241235852241517,0.06999153657229158 with: {'dropout_rate': 0.1, 'learning_rate': 0.001}
0.7617859244346619,0.03515667535738698 with: {'dropout_rate': 0.1, 'learning_rate': 0.01}
0.748739492893219,0.056459390262239374 with: {'dropout_rate': 0.1, 'learning_rate': 0.1}
0.7264833211898803,0.053314547447349474 with: {'dropout_rate': 0.2, 'learning_rate': 0.001}
0.7683218836784362,0.03872201725447312 with: {'dropout_rate': 0.2, 'learning_rate': 0.01}
0.7293268799781799,0.06819463525590194 with: {'dropout_rate': 0.2, 'learning_rate': 0.1}


#### Tuning of Hyperparameters:- Activation Function and Kernel Initializer

In [12]:
# Defining the model

def create_model(activation_function,init):
    model = Sequential()
    model.add(Dense(8,input_dim = 8,kernel_initializer = init,activation = activation_function))
    model.add(Dropout(0.1))
    model.add(Dense(4,input_dim = 8,kernel_initializer = init,activation = activation_function))
    model.add(Dropout(0.1))
    model.add(Dense(1,activation = 'sigmoid'))
    
    adam = Adam(lr = 0.001)
    model.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model

# Create the model

model = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 40,epochs = 10)

# Define the grid search parameters
activation_function = ['softmax','relu','tanh','linear']
init = ['uniform','normal','zero']

# Make a dictionary of the grid search parameters
param_grids = dict(activation_function = activation_function,init = init)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(X_standardized,y)



Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5; 1/12] START activation_function=softmax, init=uniform..................
[CV 1/5; 1/12] END activation_function=softmax, init=uniform;, score=0.649 total time=   1.1s
[CV 2/5; 1/12] START activation_function=softmax, init=uniform..................
[CV 2/5; 1/12] END activation_function=softmax, init=uniform;, score=0.416 total time=   1.2s
[CV 3/5; 1/12] START activation_function=softmax, init=uniform..................
[CV 3/5; 1/12] END activation_function=softmax, init=uniform;, score=0.630 total time=   1.2s
[CV 4/5; 1/12] START activation_function=softmax, init=uniform..................
[CV 4/5; 1/12] END activation_function=softmax, init=uniform;, score=0.745 total time=   1.2s
[CV 5/5; 1/12] START activation_function=softmax, init=uniform..................
[CV 5/5; 1/12] END activation_function=softmax, init=uniform;, score=0.647 total time=   1.2s
[CV 1/5; 2/12] START activation_function=softmax, init=normal...

In [13]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

Best : 0.763101601600647, using {'activation_function': 'tanh', 'init': 'uniform'}
0.6173924148082733,0.10871275688841411 with: {'activation_function': 'softmax', 'init': 'uniform'}
0.519353199005127,0.15882346593393804 with: {'activation_function': 'softmax', 'init': 'normal'}
0.5531194269657135,0.15092304368625437 with: {'activation_function': 'softmax', 'init': 'zero'}
0.7344877362251282,0.04969249716502077 with: {'activation_function': 'relu', 'init': 'uniform'}
0.7604617595672607,0.04086773814471942 with: {'activation_function': 'relu', 'init': 'normal'}
0.6511586427688598,0.05244526932680711 with: {'activation_function': 'relu', 'init': 'zero'}
0.763101601600647,0.035058527814160245 with: {'activation_function': 'tanh', 'init': 'uniform'}
0.7605126857757568,0.043128645218943204 with: {'activation_function': 'tanh', 'init': 'normal'}
0.6511586427688598,0.05244526932680711 with: {'activation_function': 'tanh', 'init': 'zero'}
0.7553348779678345,0.0502737517276643 with: {'activation

#### Tuning of Hyperparameter :-Number of Neurons in activation layer

In [14]:
# Defining the model

def create_model(neuron1,neuron2):
    model = Sequential()
    model.add(Dense(neuron1,input_dim = 8,kernel_initializer = 'uniform',activation = 'tanh'))
    model.add(Dropout(0.1))
    model.add(Dense(neuron2,input_dim = neuron1,kernel_initializer = 'uniform',activation = 'tanh'))
    model.add(Dropout(0.1))
    model.add(Dense(1,activation = 'sigmoid'))
    
    adam = Adam(lr = 0.001)
    model.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model

# Create the model

model = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 40,epochs = 10)

# Define the grid search parameters

neuron1 = [4,8,16]
neuron2 = [2,4,8]

# Make a dictionary of the grid search parameters

param_grids = dict(neuron1 = neuron1,neuron2 = neuron2)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(X_standardized,y)


Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 1/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.740 total time=   1.1s
[CV 2/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 2/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.675 total time=   1.2s
[CV 3/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 3/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.747 total time=   1.1s
[CV 4/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 4/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.791 total time=   1.2s
[CV 5/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 5/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.758 total time=   1.1s
[CV 1/5; 2/9] START neuron1=4, neuron2=4........................................
[CV 1/5; 2/9] END .........neuron1=4, neuron2=4;,

In [15]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

Best : 0.763101601600647, using {'neuron1': 16, 'neuron2': 4}
0.7422714710235596,0.0377322437890995 with: {'neuron1': 4, 'neuron2': 2}
0.74879891872406,0.04151706842603851 with: {'neuron1': 4, 'neuron2': 4}
0.7513878226280213,0.04334523613512694 with: {'neuron1': 4, 'neuron2': 8}
0.7500891208648681,0.04317246135490419 with: {'neuron1': 8, 'neuron2': 2}
0.7605126857757568,0.041151043042602545 with: {'neuron1': 8, 'neuron2': 4}
0.7592055082321167,0.03786917795836905 with: {'neuron1': 8, 'neuron2': 8}
0.7592139959335327,0.041461152717342166 with: {'neuron1': 16, 'neuron2': 2}
0.763101601600647,0.03741623714544651 with: {'neuron1': 16, 'neuron2': 4}
0.7630846261978149,0.035177049936164294 with: {'neuron1': 16, 'neuron2': 8}


#### Training model with optimum values of Hyperparameters

In [16]:


# Defining the model

def create_model():
    model = Sequential()
    model.add(Dense(16,input_dim = 8,kernel_initializer = 'uniform',activation = 'tanh'))
    model.add(Dropout(0))
    model.add(Dense(8,input_dim = 16,kernel_initializer = 'uniform',activation = 'tanh'))
    model.add(Dropout(0))
    model.add(Dense(1,activation = 'sigmoid'))
    
    adam = Adam(lr = 0.01) #sgd = SGD(lr=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False)
    model.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model

# Create the model

model = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 20,epochs = 100)

# Fitting the model

model.fit(X_standardized,y)

# Predicting using trained model

y_predict = model.predict(X_standardized)

# Printing the metrics
print(accuracy_score(y,y_predict))

0.87109375


# Hyperparameters all at once


The hyperparameter optimization was carried out by taking 2 hyperparameters at once. We may have missed the best values. The performance can be further improved by finding the optimum values of hyperparameters all at once given by the code snippet below.
#### This process is computationally expensive.

In [None]:
def create_model(learning_rate,dropout_rate,activation_function,init,neuron1,neuron2):
    model = Sequential()
    model.add(Dense(neuron1,input_dim = 8,kernel_initializer = init,activation = activation_function))
    model.add(Dropout(dropout_rate))
    model.add(Dense(neuron2,input_dim = neuron1,kernel_initializer = init,activation = activation_function))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1,activation = 'sigmoid'))
    
    adam = Adam(lr = learning_rate)
    model.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model

# Create the model

model = KerasClassifier(build_fn = create_model,verbose = 0)

# Define the grid search parameters

batch_size = [10,20,40]
epochs = [10,50,100]
learning_rate = [0.001,0.01,0.1]
dropout_rate = [0.0,0.1,0.2]
activation_function = ['softmax','relu','tanh','linear']
init = ['uniform','normal','zero']
neuron1 = [4,8,16]
neuron2 = [2,4,8]

# Make a dictionary of the grid search parameters

param_grids = dict(batch_size = batch_size,epochs = epochs,learning_rate = learning_rate,dropout_rate = dropout_rate,
                   activation_function = activation_function,init = init,neuron1 = neuron1,neuron2 = neuron2)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(X_standardized,y)

# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

Fitting 5 folds for each of 8748 candidates, totalling 43740 fits
[CV 1/5; 1/8748] START activation_function=softmax, batch_size=10, dropout_rate=0.0, epochs=10, init=uniform, learning_rate=0.001, neuron1=4, neuron2=2
[CV 1/5; 1/8748] END activation_function=softmax, batch_size=10, dropout_rate=0.0, epochs=10, init=uniform, learning_rate=0.001, neuron1=4, neuron2=2;, score=0.649 total time=   2.1s
[CV 2/5; 1/8748] START activation_function=softmax, batch_size=10, dropout_rate=0.0, epochs=10, init=uniform, learning_rate=0.001, neuron1=4, neuron2=2
[CV 2/5; 1/8748] END activation_function=softmax, batch_size=10, dropout_rate=0.0, epochs=10, init=uniform, learning_rate=0.001, neuron1=4, neuron2=2;, score=0.584 total time=   2.1s
[CV 3/5; 1/8748] START activation_function=softmax, batch_size=10, dropout_rate=0.0, epochs=10, init=uniform, learning_rate=0.001, neuron1=4, neuron2=2
[CV 3/5; 1/8748] END activation_function=softmax, batch_size=10, dropout_rate=0.0, epochs=10, init=uniform, lear