# Hyperparameter tuning for Keras Model with both Dense and Conv layer

### Load libray

In [None]:
!pip install scikit-optimize
#imports we know we'll need
import skopt
from skopt import gbrt_minimize, gp_minimize
from skopt.utils import use_named_args
from skopt.space import Real, Categorical, Integer
from tensorflow.python.keras import backend as K
from tensorflow.python.framework import ops
from tensorflow.keras.datasets import cifar10
from keras.utils import np_utils
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras import Input
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D

import pandas as pd
import random
import numpy as np

Collecting scikit-optimize
  Downloading scikit_optimize-0.9.0-py2.py3-none-any.whl (100 kB)
[?25l[K     |███▎                            | 10 kB 27.4 MB/s eta 0:00:01[K     |██████▌                         | 20 kB 32.4 MB/s eta 0:00:01[K     |█████████▉                      | 30 kB 33.4 MB/s eta 0:00:01[K     |█████████████                   | 40 kB 16.0 MB/s eta 0:00:01[K     |████████████████▍               | 51 kB 13.8 MB/s eta 0:00:01[K     |███████████████████▋            | 61 kB 16.1 MB/s eta 0:00:01[K     |██████████████████████▉         | 71 kB 15.9 MB/s eta 0:00:01[K     |██████████████████████████▏     | 81 kB 15.3 MB/s eta 0:00:01[K     |█████████████████████████████▍  | 92 kB 16.9 MB/s eta 0:00:01[K     |████████████████████████████████| 100 kB 8.0 MB/s 
[?25hCollecting pyaml>=16.9
  Downloading pyaml-21.10.1-py2.py3-none-any.whl (24 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-21.10.1 scikit-optimize-0.9.0


### Load data

In [None]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


Scale the data to between 0 & 1

In [None]:
X_train = X_train/ 255
X_test = X_test/ 255
print(X_train.min(),X_train.max())

0.0 1.0


Convert the y's to used with softmax function

In [None]:
y_train.shape

(50000, 1)

In [None]:
input_shape= X_train[0].shape
print(input_shape)

(32, 32, 3)


In [None]:
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)

## Checking result against a baseline


In [None]:
model =Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(32, 32, 3)))
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(16, activation='relu',name = 'input_layer'))
model.add(Dense(16, activation='relu', name="hidden_layer"))

model.add(Dense(10,activation='softmax',name="output_layer"))
model.compile(optimizer = SGD(learning_rate = 0.001), loss='categorical_crossentropy', metrics=["accuracy"])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 32, 32, 32)        896       
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 32)        9248      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 16, 16, 32)       0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 8192)              0         
                                                                 
 input_layer (Dense)         (None, 16)                131088    
                                                                 
 hidden_layer (Dense)        (None, 16)                272       
                                                        

In [None]:
early_S = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
blackbox = model.fit(X_train, y_train, batch_size=128, epochs = 30, validation_split=.15, callbacks = [early_S])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
accuracy = model.evaluate(X_test,y_test)[1]
print(accuracy)

0.3564999997615814


### Hyperparameter space

In [None]:
dim_learning_rate = Real(low=1e-4, high=1e-1, prior='log-uniform',
                         name='learning_rate')
dim_num_conv_layers = Integer(low=1, high=3, name='num_conv_layers')
dim_num_dense_layers = Integer(low=1, high=5, name='num_dense_layers')
dim_num_dense_nodes = Integer(low=1, high=28, name='num_dense_nodes')
dim_activation = Categorical(categories=['relu', 'sigmoid'],
                             name='activation')
dim_batch_size = Integer(low=1, high=128, name='batch_size')

dimensions = [dim_learning_rate,
              dim_num_conv_layers,
              dim_num_dense_layers,
              dim_num_dense_nodes,
              dim_activation,
              dim_batch_size
             ]
default_parameters = [1e-3,1, 1, 13, 'relu',64]

### Create Model

In [None]:
def create_model(learning_rate, num_conv_layers, num_dense_layers,
                 num_dense_nodes, activation):
    #start the model making process and create our first layer
    model = Sequential()
    model.add(Input(shape=(32,32,3)))
    #create a loop making a new dense layer for the amount passed to this model.
    #naming the layers helps avoid tensorflow error deep in the stack trace.

    for i in range(num_conv_layers):
        name = 'layer_conv_1_{0}'.format(i+1)
        if i == 0: node = 32
        elif i == 1: node = 64
        else: node = 128
        model.add(Conv2D(node, (3, 3), activation='relu', 
                         kernel_initializer='he_uniform', padding='same',
                         name=name))
        name = 'layer_conv_2_{0}'.format(i+1)
        model.add(Conv2D(node, (3, 3), activation='relu', 
                         kernel_initializer='he_uniform', padding='same',
                         name=name))
        model.add(MaxPooling2D((2, 2)))
        
    model.add(Flatten())

    for i in range(num_dense_layers):
        name = 'layer_dense_{0}'.format(i+1)
        model.add(Dense(num_dense_nodes,
                 activation=activation,
                        name=name
                 ))
    #add our classification layer.
    model.add(Dense(10,activation='softmax'))
    
    #setup our optimizer and compile
    sgd = SGD(learning_rate=learning_rate)
    model.compile(optimizer=sgd, loss='categorical_crossentropy',
                 metrics=['accuracy'])
    return model

### Objective function

In [None]:
@use_named_args(dimensions=dimensions)
def fitness(learning_rate, num_conv_layers, num_dense_layers,  
            num_dense_nodes,activation, batch_size):

    model = create_model(learning_rate=learning_rate,
                         num_conv_layers = num_conv_layers,
                         num_dense_layers=num_dense_layers,
                         num_dense_nodes=num_dense_nodes,
                         activation=activation
                        )
    early_S = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
    #named blackbox becuase it represents the structure
    blackbox = model.fit(x=X_train,
                        y=y_train,
                        epochs=25,
                        batch_size=batch_size,
                        validation_split=0.15,
                        callbacks = [early_S]
                        )
    #return the validation accuracy for the last epoch.
    accuracy = blackbox.history['val_accuracy'][-1]

    # Print the classification accuracy.
    print()
    print("Accuracy: {0:.2%}".format(accuracy))
    print()


    # Delete the Keras model with these hyper-parameters from memory.
    del model
    
    # Clear the Keras session, otherwise it will keep adding new
    # models to the same TensorFlow graph each time we create
    # a model with a different set of hyper-parameters.
    K.clear_session()
    ops.reset_default_graph()
    
    return -accuracy

### Run the optimizer

In [None]:
gp_result = gp_minimize(func=fitness,
                            dimensions=dimensions,
                            n_calls=50,
                            noise= 0.01,
                            n_jobs=-1,
                            verbose=True,
                            kappa = 5,
                            x0=default_parameters)

Iteration No: 1 started. Evaluating function at provided point.
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25

Accuracy: 50.28%

Iteration No: 1 ended. Evaluation done at provided point.
Time taken: 85.3309
Function value obtained: -0.5028
Current minimum: -0.5028
Iteration No: 2 started. Evaluating function at random point.
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25

Accuracy: 65.19%

Iteration No: 2 ended. Evaluation done at random point.
Time taken: 143.9982
Function value obtained: -0.6519
Current 

### The best results

In [None]:
gp_result.x

[0.04130837784848988, 3, 4, 26, 'relu', 33]

In [None]:
print("best accuracy was " + str(round(gp_result.fun *-100,2))+"%.")

best accuracy was 75.75%.


### Model results with best tuned parameters

In [None]:
gp_model = create_model(gp_result.x[0],gp_result.x[1],gp_result.x[2],gp_result.x[3],gp_result.x[4])
early_S = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights= True)
#named blackbox becuase it represents the structure
blackbox = gp_model.fit(x=X_train,
                    y=y_train,
                    epochs=25,
                    batch_size=gp_result.x[5],
                    validation_split=0.15
                    callbacks = [early_S]
                    )
#return the validation accuracy for the last epoch.
accuracy = blackbox.history['val_accuracy'][-1]

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [None]:
accuracy

0.7577333450317383

In [None]:
gp_model.evaluate(X_test, y_test)



[1.1113765239715576, 0.7519999742507935]

In [None]:
gp_result.func_vals

In [None]:
gp_result.x_iters

### Save the results

In [None]:
df_temp = pd.concat([pd.DataFrame(gp_result.x_iters, columns = ["learning_rate", "num_conv_layers", "num_dense_layers",  
            "num_dense_nodes","activation", "batch_size"]), pd.Series(gp_result.func_vals*-100, name="accuracy")], axis=1)

In [None]:
df_temp

In [None]:
df_temp.to_csv("DF_cifar10_bayesian.csv")

## PT using Random search

In [None]:
param_grid = {
              'dim_learning_rate' : list(np.logspace(np.log(0.005), np.log(0.2), base = np.exp(1), num = 1000)),
              'dim_num_conv_layers' : list(range(1, 3)),
              'dim_num_dense_layers' : list(range(1, 5)),
              'dim_num_dense_nodes' : list(range(1, 28)),
              'dim_activation' : ['relu', 'sigmoid'],
              'dim_batch_size' : list(range(1, 128))
}

In [None]:
random.seed(50)

# Iterate through the specified number of evaluations
for i in range(5):
    
    # Randomly sample parameters for gbm
    params = [random.sample(value, 1)[0] for key, value in param_grid.items()]
    
    print(params)
    

[0.03275177220475209, 2, 3, 21, 'relu', 89]
[0.029863672437724486, 2, 1, 18, 'sigmoid', 29]
[0.06437322298735856, 1, 2, 12, 'relu', 126]
[0.01854686888813489, 2, 2, 7, 'relu', 125]
[0.13128416279545488, 2, 4, 21, 'relu', 118]


### Objective funtion for random search

In [None]:
def fitness(learning_rate, num_conv_layers, num_dense_layers,  
            num_dense_nodes,activation, batch_size):

    model = create_model(learning_rate=learning_rate,
                         num_conv_layers = num_conv_layers,
                         num_dense_layers=num_dense_layers,
                         num_dense_nodes=num_dense_nodes,
                         activation=activation
                        )

    early_S = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
    #named blackbox becuase it represents the structure
    blackbox = model.fit(x=X_train,
                        y=y_train,
                        epochs=25,
                        batch_size=batch_size,
                        validation_split=0.15,
                        callbacks = [early_S]
                        )
    #return the validation accuracy for the last epoch.
    accuracy = blackbox.history['val_accuracy'][-1]

    # Print the classification accuracy.
    print()
    print("Accuracy: {0:.2%}".format(accuracy))
    print()


    # Delete the Keras model with these hyper-parameters from memory.
    del model
    
    # Clear the Keras session, otherwise it will keep adding new
    # models to the same TensorFlow graph each time we create
    # a model with a different set of hyper-parameters.
    K.clear_session()
    ops.reset_default_graph()
    
    return [learning_rate, num_conv_layers, num_dense_layers,  
            num_dense_nodes,activation, batch_size, accuracy]

### Run random search

In [None]:
random.seed(50)

results = []
best_results = [0, 0, 0, 0, 0, 0, 0]

# Iterate through the specified number of evaluations
for i in range(50):

    print("Iteration: ", i+1)
    
    # Randomly sample parameters for gbm
    params = [random.sample(value, 1)[0] for key, value in param_grid.items()]
    
    result = fitness(params[0], params[1], params[2], params[3], params[4], params[5])
    results.append(result)
    if(best_results[6] < result[6]):
      best_results = result

### Results using random search

In [None]:
results

In [None]:
df_random = pd.DataFrame(results, 
                         columns = ['learning_rate', 'num_conv_layers', 'num_dense_layers',  
                              'num_dense_nodes','activation', 'batch_size', 'accuracy'])

In [None]:
df_random.head()

Unnamed: 0,learning_rate,num_conv_layers,num_dense_layers,num_dense_nodes,activation,batch_size,accuracy
0,0.032752,2,3,21,relu,89,0.6684
1,0.029864,2,1,18,sigmoid,29,0.7044
2,0.064373,1,2,12,relu,126,0.586
3,0.018547,2,2,7,relu,125,0.644667
4,0.131284,2,4,21,relu,118,0.670133


In [None]:
df_random.to_csv("df_Cifar10_random.csv")

In [None]:
gp_model = create_model(gp_result.x[0],gp_result.x[1],gp_result.x[2],gp_result.x[3],gp_result.x[4],gp_result.x[6])
gp_model.summary()

In [None]:
model.summary()

### Retrain the best model architecture for random search

In [None]:
gp_model.fit(X_train, y_train, batch_size=gp_result.x[5], epochs =20, validation_split=0.15)
gp_model.evaluate(X_train,y_train)

In [None]:
gp_model.evaluate(X_test,y_test)



[0.0829724669456482, 0.9836000204086304]

In [None]:
best_results

[0.029863672437724486, 2, 1, 18, 'sigmoid', 29, 0.7044000029563904]

In [None]:
model = create_model(best_results[0],best_results[1],best_results[2],best_results[3],best_results[4])

early_S = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
#named blackbox becuase it represents the structure
blackbox = model.fit(x=X_train,
                    y=y_train,
                    epochs=25,
                    batch_size=best_results[5],
                    validation_split=0.15,
                    callbacks = [early_S]
                    )
#return the validation accuracy for the last epoch.
accuracy = blackbox.history['val_accuracy'][-1]

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25


In [None]:
model.evaluate(X_test, y_test)



[0.8549267649650574, 0.7141000032424927]