<!--NAVIGATION-->
<a href="https://colab.research.google.com/github/ruslanmv/Neural-Networks-from-Scratch/blob/master/3_How_to_tune_parameters_in_neural_networks_colab.ipynb"> <img align="left" src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab" title="Open in Google Colaboratory"></a>
<a href="https://github.com/ruslanmv/Neural-Networks-from-Scratch/blob/master/3_How_to_tune_parameters_in_neural_networks_colab.ipynb"> <img align="left" src="https://img.shields.io/badge/Github-Download-blue.svg" alt="Download" title="Download Notebook"></a>

# Tunning parameters in Neural Networks

Hello, today we are going to tune some hyperparemters in Neural Networks by using Keras.

Previously we have created a new neural network in TFLEARN for the MMORPG-AI problem.
and we have rewritten in Keras.

## Tune Batch Size and Number of Epochs

In [1]:
# Use scikit-learn to grid search the batch size and epochs
import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier

In [2]:
#Importing library
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
import numpy as np
from tensorflow.keras.layers import BatchNormalization

In [3]:
#Helper functions about mmorpg-ai project
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import io
from IPython.display import clear_output, Image, display
import PIL.Image
from matplotlib import pyplot as plt
import logging, sys
logging.disable(sys.maxsize)
import os

def df_to_numpy_image(df_image_clean,index):
    #select the row with index label 'index'
    image_clean=df_image_clean.loc[[index]].T.to_numpy()
    lists =image_clean.tolist()
    # Nested List Comprehension to flatten a given 2-D matrix
    # 2-D List
    matrix = lists
    flatten_matrix = [val.tolist() for sublist in matrix for val in sublist]
    # converting list to array
    arr = np.array(flatten_matrix)
    return arr
def df_to_numpy_input(df_input,index): 
    # flattening a 2d numpy array
    # into 1d array
    # and remove dtype at the end of numpy array
    lista=df_input.loc[[index]].values.tolist()
    arr=np.array(lista).ravel()
    return arr
#round a float up to next even number
import math
def roundeven(f):
    return math.ceil(f / 2.) * 2


In [4]:
#We define the parameters
width= 480
height= 270
ncolors=3
#Normalization Parameter
Norma        = 29/1000
#Paramters                          Operation 
filters1     =  roundeven(96*Norma)   #1
kernel1      =  11       
stride1      =  4
kernel2      =  3                     #2
stride2      =  2
filters3     =  roundeven(256*Norma)  #3
kernel3      =  5
kernel4      =  3                     #4
stride4      =  2
filters5     =  roundeven(384*Norma)  #5
kernel5      =  3
filters6     =  roundeven(384*Norma)  #6
kernel6      =  3
filters7     =  roundeven(256*Norma)  #7
kernel7      =  3
kernel8      =  3                      #8
stride8      =  2 
activation9  =  roundeven(4096*Norma)  #9
activation10 =  roundeven(4096*Norma)  #10
outputs11    =  int(1000*Norma)   #11

dropout13=0.5
dropout15=0.5
learning_rate17=0.001
np.random.seed(1000)

In [5]:
# Function to create model, required for KerasClassifier
def create_model():
    # create model
    #model = Sequential()
    #model.add(Dense(12, input_dim=8, activation='relu'))
    #model.add(Dense(1, activation='sigmoid'))
    #Instantiation
    MmoNet = Sequential()
    MmoNet.add(Conv2D(filters=filters1, input_shape=(height, width, ncolors), kernel_size=(11,11), strides=(stride1,stride1), padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    MmoNet.add(MaxPooling2D(pool_size=(kernel2,kernel2), strides=(stride2,stride2), padding='same'))
    MmoNet.add(Conv2D(filters=filters3, kernel_size=(kernel3, kernel3), padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    MmoNet.add(MaxPooling2D(pool_size=(kernel4,kernel4), strides=(stride4,stride4), padding='same'))
    #3rd Convolutional Layer
    MmoNet.add(Conv2D(filters=filters5, kernel_size=(kernel5,kernel5), padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    #4th Convolutional Layer
    MmoNet.add(Conv2D(filters=filters6, kernel_size=( kernel6, kernel6), padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    #5th Convolutional Layer
    MmoNet.add(Conv2D(filters=filters7, kernel_size=(kernel7,kernel7),  padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    MmoNet.add(MaxPooling2D(pool_size=(kernel8,kernel8), strides=(stride8,stride8), padding='same'))
    #Passing it to a Fully Connected layer
    MmoNet.add(Flatten())
    # 1st Fully Connected Layer
    MmoNet.add(Dense(activation9, input_shape=(270, 480, 3,)))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    # Add Dropout to prevent overfitting
    MmoNet.add(Dropout(dropout13))
    #2nd Fully Connected Layer
    MmoNet.add(Dense(activation10))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    #Add Dropout
    MmoNet.add(Dropout(dropout15))
    #3rd Fully Connected Layer
    MmoNet.add(Dense(1000))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    #Add Dropout
    MmoNet.add(Dropout(dropout15))
    #Output Layer
    MmoNet.add(Dense(outputs11))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('softmax'))
    # Compile model
    MmoNet.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return MmoNet

In [5]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

We load the datasets to work

In [6]:
import os
user = "ruslanmv"
repo = "Neural-Networks-from-Scratch"
# remove local directory if it already exists
if os.path.isdir(repo):
    !rm -rf {repo}
!git clone https://github.com/{user}/{repo}.git

Cloning into 'Neural-Networks-from-Scratch'...
remote: Enumerating objects: 144, done.[K
remote: Counting objects: 100% (60/60), done.[K
remote: Compressing objects: 100% (47/47), done.[K
remote: Total 144 (delta 37), reused 27 (delta 13), pack-reused 84[K
Receiving objects: 100% (144/144), 317.29 MiB | 33.24 MiB/s, done.
Resolving deltas: 100% (62/62), done.
Checking out files: 100% (45/45), done.


In [7]:
import os
os.chdir('Neural-Networks-from-Scratch')

In [8]:
!ls /content/Neural-Networks-from-Scratch/data


dfx-0.pkl  dfx-1.pkl  dfx-2.pkl  dfy-0.pkl  dfy-1.pkl  dfy-2.pkl


In [10]:
#Importing Gamepad library
#We load the images of the gameplay
x_training_data=pd.read_pickle('data/dfx-0.pkl')  
#We load the inputs of the of the gameplay
y_training_data=pd.read_pickle('data/dfy-0.pkl')

In [11]:
X_train, X_valid, y_train, y_valid = train_test_split(x_training_data, y_training_data, test_size=0.2, random_state=6)
# Train Image part ( 4 Dimensional)
X_image = np.array([df_to_numpy_image(X_train,i) for i in X_train.index])
X=X_image.reshape(-1,width,height,3)
#Train Input part ( 1 Dimensional )
Y = [df_to_numpy_input(y_train,i) for i in y_train.index]
# Test Image part ( 4 Dimensional)
test_image = np.array([df_to_numpy_image(X_valid,i) for i in X_valid.index])
test_x=test_image.reshape(-1,width,height,3)
## Test Input part( 1 Dimensional )
test_y = [df_to_numpy_input(y_valid,i) for i in y_valid.index]
  

In [12]:
# create model
model = KerasClassifier(build_fn=create_model, verbose=0)

  


In [13]:
# define the grid search parameters
batch_size = [10, 20, 40, 80, 100]
epochs = [10, 50, 100, 200]

In [14]:
# define test the grid search parameters
#batch_size = [10,  40 ]
#epochs = [10,  100]

In [15]:
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)

In [16]:
grid_result = grid.fit(X_image, Y)



In [17]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.784336 using {'batch_size': 80, 'epochs': 100}
0.045304 (0.044657) with: {'batch_size': 10, 'epochs': 10}
0.484622 (0.133055) with: {'batch_size': 10, 'epochs': 50}
0.477461 (0.349394) with: {'batch_size': 10, 'epochs': 100}
0.099502 (0.071406) with: {'batch_size': 10, 'epochs': 200}
0.442786 (0.343031) with: {'batch_size': 20, 'epochs': 10}
0.681441 (0.418576) with: {'batch_size': 20, 'epochs': 50}
0.462687 (0.384793) with: {'batch_size': 20, 'epochs': 100}
0.035203 (0.031159) with: {'batch_size': 20, 'epochs': 200}
0.039801 (0.056287) with: {'batch_size': 40, 'epochs': 10}
0.758480 (0.216292) with: {'batch_size': 40, 'epochs': 50}
0.731193 (0.358937) with: {'batch_size': 40, 'epochs': 100}
0.382708 (0.386830) with: {'batch_size': 40, 'epochs': 200}
0.000000 (0.000000) with: {'batch_size': 80, 'epochs': 10}
0.512739 (0.384310) with: {'batch_size': 80, 'epochs': 50}
0.784336 (0.135371) with: {'batch_size': 80, 'epochs': 100}
0.646842 (0.447046) with: {'batch_size': 80, 'epochs'

# Tune the Training Optimization Algorithm

In [9]:
# Use scikit-learn to grid search the batch size and epochs
import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model(optimizer='adam'):
    # create model
    MmoNet = Sequential()
    MmoNet.add(Conv2D(filters=filters1, input_shape=(height, width, ncolors), kernel_size=(11,11), strides=(stride1,stride1), padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    MmoNet.add(MaxPooling2D(pool_size=(kernel2,kernel2), strides=(stride2,stride2), padding='same'))
    MmoNet.add(Conv2D(filters=filters3, kernel_size=(kernel3, kernel3), padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    MmoNet.add(MaxPooling2D(pool_size=(kernel4,kernel4), strides=(stride4,stride4), padding='same'))
    #3rd Convolutional Layer
    MmoNet.add(Conv2D(filters=filters5, kernel_size=(kernel5,kernel5), padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    #4th Convolutional Layer
    MmoNet.add(Conv2D(filters=filters6, kernel_size=( kernel6, kernel6), padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    #5th Convolutional Layer
    MmoNet.add(Conv2D(filters=filters7, kernel_size=(kernel7,kernel7),  padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    MmoNet.add(MaxPooling2D(pool_size=(kernel8,kernel8), strides=(stride8,stride8), padding='same'))
    #Passing it to a Fully Connected layer
    MmoNet.add(Flatten())
    # 1st Fully Connected Layer
    MmoNet.add(Dense(activation9, input_shape=(270, 480, 3,)))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    # Add Dropout to prevent overfitting
    MmoNet.add(Dropout(dropout13))
    #2nd Fully Connected Layer
    MmoNet.add(Dense(activation10))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    #Add Dropout
    MmoNet.add(Dropout(dropout15))
    #3rd Fully Connected Layer
    MmoNet.add(Dense(1000))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    #Add Dropout
    MmoNet.add(Dropout(dropout15))
    #Output Layer
    MmoNet.add(Dense(outputs11))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('softmax'))
    # Compile model
    MmoNet.compile(loss='binary_crossentropy',  optimizer=optimizer, metrics=['accuracy'])
    return MmoNet
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
#We load the images of the gameplay
x_training_data=pd.read_pickle('data/dfx-0.pkl')  
#We load the inputs of the of the gameplay
y_training_data=pd.read_pickle('data/dfy-0.pkl')  
X_train, X_valid, y_train, y_valid = train_test_split(x_training_data, y_training_data, test_size=0.2, random_state=6)
# Train Image part ( 4 Dimensional)
X_image = np.array([df_to_numpy_image(X_train,i) for i in X_train.index])
X=X_image.reshape(-1,width,height,3)
#Train Input part ( 1 Dimensional )
Y = [df_to_numpy_input(y_train,i) for i in y_train.index]
# Test Image part ( 4 Dimensional)
test_image = np.array([df_to_numpy_image(X_valid,i) for i in X_valid.index])
test_x=test_image.reshape(-1,width,height,3)
## Test Input part( 1 Dimensional )
test_y = [df_to_numpy_input(y_valid,i) for i in y_valid.index]
  

In [10]:
# create model
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=80, verbose=0)
# define the grid search parameters
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)

  


In [11]:
grid_result = grid.fit(X_image, Y)

In [12]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.694784 using {'optimizer': 'Nadam'}
0.074702 (0.095133) with: {'optimizer': 'SGD'}
0.373134 (0.445933) with: {'optimizer': 'RMSprop'}
0.000000 (0.000000) with: {'optimizer': 'Adagrad'}
0.050505 (0.071425) with: {'optimizer': 'Adadelta'}
0.060606 (0.085710) with: {'optimizer': 'Adam'}
0.000000 (0.000000) with: {'optimizer': 'Adamax'}
0.694784 (0.233556) with: {'optimizer': 'Nadam'}


# Tune Learning Rate and Momentum

In [9]:
# Use scikit-learn to grid search the learning rate and momentum
import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.optimizers import SGD


# Function to create model, required for KerasClassifier
def create_model(learn_rate=0.01, momentum=0):
    # create model
    MmoNet = Sequential()
    MmoNet.add(Conv2D(filters=filters1, input_shape=(height, width, ncolors), kernel_size=(11,11), strides=(stride1,stride1), padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    MmoNet.add(MaxPooling2D(pool_size=(kernel2,kernel2), strides=(stride2,stride2), padding='same'))
    MmoNet.add(Conv2D(filters=filters3, kernel_size=(kernel3, kernel3), padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    MmoNet.add(MaxPooling2D(pool_size=(kernel4,kernel4), strides=(stride4,stride4), padding='same'))
    #3rd Convolutional Layer
    MmoNet.add(Conv2D(filters=filters5, kernel_size=(kernel5,kernel5), padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    #4th Convolutional Layer
    MmoNet.add(Conv2D(filters=filters6, kernel_size=( kernel6, kernel6), padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    #5th Convolutional Layer
    MmoNet.add(Conv2D(filters=filters7, kernel_size=(kernel7,kernel7),  padding='same'))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    MmoNet.add(MaxPooling2D(pool_size=(kernel8,kernel8), strides=(stride8,stride8), padding='same'))
    #Passing it to a Fully Connected layer
    MmoNet.add(Flatten())
    # 1st Fully Connected Layer
    MmoNet.add(Dense(activation9, input_shape=(270, 480, 3,)))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    # Add Dropout to prevent overfitting
    MmoNet.add(Dropout(dropout13))
    #2nd Fully Connected Layer
    MmoNet.add(Dense(activation10))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    #Add Dropout
    MmoNet.add(Dropout(dropout15))
    #3rd Fully Connected Layer
    MmoNet.add(Dense(1000))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('relu'))
    #Add Dropout
    MmoNet.add(Dropout(dropout15))
    #Output Layer
    MmoNet.add(Dense(outputs11))
    MmoNet.add(BatchNormalization())
    MmoNet.add(Activation('softmax'))

    # Compile model
    optimizer = SGD(lr=learn_rate, momentum=momentum)
    MmoNet.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return MmoNet


In [10]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
#We load the images of the gameplay
x_training_data=pd.read_pickle('data/dfx-0.pkl')  
#We load the inputs of the of the gameplay
y_training_data=pd.read_pickle('data/dfy-0.pkl')  
X_train, X_valid, y_train, y_valid = train_test_split(x_training_data, y_training_data, test_size=0.2, random_state=6)
# Train Image part ( 4 Dimensional)
X_image = np.array([df_to_numpy_image(X_train,i) for i in X_train.index])
X=X_image.reshape(-1,width,height,3)
#Train Input part ( 1 Dimensional )
Y = [df_to_numpy_input(y_train,i) for i in y_train.index]
# Test Image part ( 4 Dimensional)
test_image = np.array([df_to_numpy_image(X_valid,i) for i in X_valid.index])
test_x=test_image.reshape(-1,width,height,3)
## Test Input part( 1 Dimensional )
test_y = [df_to_numpy_input(y_valid,i) for i in y_valid.index]



In [13]:
# create model
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=80, verbose=0)
# define the grid search parameters

#Too much memory for Colab
#learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
#momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]


learn_rate = [0.001, 0.01, 0.1]
momentum = [0.0]



param_grid = dict(learn_rate=learn_rate, momentum=momentum)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)


  


In [14]:
grid_result = grid.fit(X_image, Y)


  super(SGD, self).__init__(name, **kwargs)


In [15]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.432685 using {'learn_rate': 0.1, 'momentum': 0.0}
0.000000 (0.000000) with: {'learn_rate': 0.001, 'momentum': 0.0}
0.000000 (0.000000) with: {'learn_rate': 0.01, 'momentum': 0.0}
0.432685 (0.398801) with: {'learn_rate': 0.1, 'momentum': 0.0}


References :
<a href="https://pyimagesearch.com/2018/12/31/keras-conv2d-and-convolutional-layers">1</a> <a href="https:/machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/">2</a>
