Bayesian Optimization for finding optimal layer sizes ( Hyperparameters ) for a neural network with predefined number of layers and activation function

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

1.15.0


In [0]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plot

from sklearn.model_selection import train_test_split


import sklearn.gaussian_process as gp
from sklearn.model_selection import cross_val_score


Importing the MNIST Dataset

In [3]:
fashion_mnist = keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [0]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

Data Preprocessing : Scaling the training and test feature vectors.

Reshaping the 3-D data so that it can be passed on to the neural network

In [0]:
train_images = train_images / 255.0

test_images = test_images / 255.0

X_train=train_images
y_train=train_labels
X_test=test_images
y_test=test_labels
X_train = X_train.reshape(X_train.shape[0], 784)
X_test = X_test.reshape(X_test.shape[0], 784)

In [14]:
from sklearn.neural_network import MLPClassifier


#Initializing the MLPClassifier
NNclassifier = MLPClassifier(hidden_layer_sizes=(5,5,5), max_iter=100,activation = 'relu',solver='sgd',random_state=1)

#Fitting the training data to the network
NNclassifier.fit(X_train, y_train)

print("In-sample score = ",NNclassifier.score(X_train,y_train))
print("Out-sample score = ",NNclassifier.score(X_test,y_test))
print("cross val score = ",cross_val_score(NNclassifier,X=X_train,y=y_train,scoring='accuracy', cv=3))



In-sample score =  0.8248166666666666
Out-sample score =  0.8066




cross val score =  [0.80745 0.80345 0.8098 ]




Test Accuracy for a neural network with 3 layers and layer sizes(5,5,5) = 80.6%

In [7]:
from google.colab import files
src = list(files.upload().values())[0]

Saving gp.py to gp.py


In [0]:
open('gp.py','wb').write(src)
import gp

%load gp.py
%run gp.py

In [0]:
## Define loss function 

def sample_loss_NN(params):
  h1 = np.int(params[0])
  h2 = np.int(params[1])
  h3 = np.int(params[2])
  
  # Sample C and gamma on the log-uniform scale
  model = MLPClassifier(hidden_layer_sizes=(h1,h2,h3), max_iter=100,activation = 'relu',solver='sgd',random_state=1)
  model.fit(X_train, y_train)
  # Sample parameters on a log scale
  return cross_val_score(model,X=X_train,y=y_train,scoring='accuracy', cv=3).mean()

Perform Bayesian Optimization within the defined bounds

In [11]:


bounds = np.array([[2, 100], [2, 150], [2,150]])
print(bounds)

xp, yp = bayesian_optimisation(n_iters=15, sample_loss=sample_loss_NN, 
                               bounds=bounds,
                               n_pre_samples=10)

[[  2 100]
 [  2 150]
 [  2 150]]




The Optimal Hyperparameters(Hidden layer sizes) are : 

In [12]:
print(xp)
print(yp)

# The maximum is at:
xp_hat = np.round(xp[np.array(yp).argmax(), :])

print(np.round(xp_hat))

[[ 19.27285713  92.28713396  28.61297043]
 [ 62.04435249  29.79398188 127.04938309]
 [ 44.19778407 121.72867543  28.24037948]
 [ 90.60686509  33.89703895  94.89812467]
 [ 24.41978461  61.05088208 142.16286669]
 [ 38.59274435 147.91037982  75.1295951 ]
 [ 41.40372383  10.3248189   16.50166835]
 [ 95.84406672  23.15774114 116.1329798 ]
 [  4.82408787  97.63441989  16.49801777]
 [ 64.22743664 100.76620744 146.06652291]
 [ 55.98135269  84.99682877  53.90189165]
 [ 46.75351229 150.         150.        ]
 [100.          34.89074152   2.00000001]
 [ 34.26920965   5.58813717  61.0064906 ]
 [ 89.00498064 145.30583923  86.31568801]
 [ 43.85783518  87.07606563  32.6975562 ]
 [ 95.90042598 134.54705273 146.4392322 ]
 [ 37.35264664 144.40914187  81.97904042]
 [ 29.93472794  87.96822961  70.11655816]
 [ 96.07315744   6.22547389 147.00751228]
 [  4.40831938   2.04917901 145.83791528]
 [ 69.65813501  57.56802667 132.58158986]
 [ 87.46731626   3.65853156  61.36154968]
 [ 96.02167912  65.23459088 148.92

In [13]:
#Initializing the MLPClassifier
NNclassifier_best = MLPClassifier(hidden_layer_sizes=(96,135,146), max_iter=100,activation = 'relu',solver='sgd',random_state=1)

#Fitting the training data to the network
NNclassifier_best.fit(X_train, y_train)

print("In-sample score = ",NNclassifier_best.score(X_train,y_train))
print("Out-sample score = ",NNclassifier_best.score(X_test,y_test))



In-sample score =  0.9116
Out-sample score =  0.8766


Test Accuracy - 87.66%

Training Accuracy - 91.16%

Optimal Hyper Parameters - Hidden Layer Sizes = (96,135,146)