In [None]:
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras.datasets import mnist
from keras.layers import LeakyReLU


(a)  Download MNIST data using the built-in functionsin Keras

In [None]:
data = mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


(b) Data Set is split into trainingData and testData, ValidationData can be splitted from trainingData while training the model.

In [None]:
trainingDataImages = data[0][0]
trainingDataLabels = data[0][1]
testDataImages = data[1][0]
testDataLabels = data[1][1]

In [None]:
class createModel:
  def __init__(self,num_hidden_layers,node_list,activation):
    
    self.num_hidden_layers = num_hidden_layers
    
    self.activation_function = activation
    
    self.node_list = node_list
    
    self.model = keras.Sequential()
    
    self.model.add(keras.layers.Flatten(input_shape=(28, 28)))
    
    for i in range(0,self.num_hidden_layers):
      
        self.model.add(keras.layers.Dense(self.node_list[i],activation=self.activation_function))
        
    self.model.add(keras.layers.Dense(10,activation="softmax"))
    
    self.weights = self.model.get_weights()
    
    
  def train_model(self,x_train,y_train,epochs,learning_rate):
    
    opt = keras.optimizers.SGD(learning_rate)
    
    self.model.compile(optimizer=opt,loss='sparse_categorical_crossentropy',metrics=['accuracy'])
    
    #validation_split = 1/12 splits the trainingData(60000) into trainingData(55000) and validationData(5000)
    self.training_history = self.model.fit(x_train,y_train,epochs=epochs,shuffle=True,validation_split=1/12 )
    
    self.weights = self.model.get_weights()
    
    
  def test_model(self,x_test,y_test):
    (self.test_loss,self.test_accuracy)=self.model.evaluate(x_test,y_test)
    
    print("test_loss:",self.test_loss,"test_accuracy:",self.test_accuracy)
    

(c)

i . 
Unnormalized model with one hidden layer(32 nodes) trained using stochastic gradient descent optimizer with learning rate 0.1

In [None]:
model1 = createModel(1,[32],"sigmoid")

model1.model.summary()
model1.train_model(trainingDataImages,trainingDataLabels,10,0.1)
print("\nTesting Model :")
model1.test_model(testDataImages,testDataLabels)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 32)                25120     
_________________________________________________________________
dense_1 (Dense)              (None, 10)                330       
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________
Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Testing Model :
test_loss: 0.9166439739227294 test_accuracy: 0.7044


ii . 
Normalized model with one hidden layer(32 nodes) trained using stochastic gradient descent optimizer with learning rate 0.1

In [None]:
model2 = createModel(1,[32],"sigmoid")
trainingDataImagesNorm = trainingDataImages/255
testDataImagesNorm = testDataImages/255

model2.model.summary()
model2.train_model(trainingDataImagesNorm,trainingDataLabels,10,0.1)
print("\nTesting Model :")
model2.test_model(testDataImagesNorm,testDataLabels)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                25120     
_________________________________________________________________
dense_3 (Dense)              (None, 10)                330       
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________
Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Testing Model :
test_loss: 0.1586121327459812 test_accuracy: 0.9529


iii. The best performing model among i and ii is the the one in which data is normalized. Now we are going to train models model3a and model3b with 2 and 3 hidden layers respectively.

In [None]:
trainingDataImages = trainingDataImagesNorm
testDataImages = testDataImagesNorm

#model with 2 hidden layers
model3a = createModel(2,[64,64],"sigmoid")

model3a.model.summary()
model3a.train_model(trainingDataImages,trainingDataLabels,10,0.1)
print("\nTesting Model :")
model3a.test_model(testDataImages,testDataLabels)
print()

#model with 3 hidden layers
model3b = createModel(3,[64,64,64],"sigmoid")

model3b.model.summary()
model3b.train_model(trainingDataImages,trainingDataLabels,10,0.1)
print("\nTesting Model :")
model3b.test_model(testDataImages,testDataLabels)


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 64)                50240     
_________________________________________________________________
dense_5 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_6 (Dense)              (None, 10)                650       
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________
Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Testing Model :
test_loss: 0.12738648625165225 test_accuracy: 0.9616

Model: "sequentia

iv.  The model which is having 2 hidden layers is the best performing model in iii. Now we are going to train this model with learning rate of 0.001 and 0.0001.

In [None]:
#model with learning rate 0.001
model4a = createModel(2,[64,64],"sigmoid")

model4a.model.summary()
model4a.train_model(trainingDataImages,trainingDataLabels,10,0.001)
print("\nTesting Model :")
model4a.test_model(testDataImages,testDataLabels)
print()

#model with learning rate 0.0001
model4b = createModel(2,[64,64],"sigmoid")

model4b.model.summary()
model4b.train_model(trainingDataImages,trainingDataLabels,10,0.0001)
print("\nTesting Model :")
model4b.test_model(testDataImages,testDataLabels)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_4 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 64)                50240     
_________________________________________________________________
dense_12 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_13 (Dense)             (None, 10)                650       
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________
Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Testing Model :
test_loss: 2.122042381286621 test_accuracy: 0.5315

Model: "sequential_

Obsservations: The accuracy is very low because of the low learning rate.

v . The best performing model for iv. is with the learning rate 0.001. Training the model with the number of nodes in hidden layers with 128 and 64. 

In [None]:
#model with nodes 128 and 64
model5a = createModel(2,[128,64],"sigmoid")

model5a.model.summary()
model5a.train_model(trainingDataImages,trainingDataLabels,10,0.001)
print("\nTesting Model :")
model5a.test_model(testDataImages,testDataLabels)
print()

#model with nodes 64 and 128
model5b = createModel(2,[64,128],"sigmoid")

model5b.model.summary()
model5b.train_model(trainingDataImages,trainingDataLabels,10,0.001)
print("\nTesting Model :")
model5b.test_model(testDataImages,testDataLabels)

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_6 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_17 (Dense)             (None, 128)               100480    
_________________________________________________________________
dense_18 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_19 (Dense)             (None, 10)                650       
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
_________________________________________________________________
Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Testing Model :
test_loss: 2.0218931818008423 test_accuracy: 0.5637

Model: "sequenti

vi. The best performing model in v is the hidden layers with nodes 128 and 64. Training models with tanh, relu and leaky relu

In [None]:
#model with tanh
model6a = createModel(2,[128,64],"tanh")

model6a.model.summary()
model6a.train_model(trainingDataImages,trainingDataLabels,10,0.001)
print("\nTesting Model :")
model6a.test_model(testDataImages,testDataLabels)
print()

#model with relu
model6b = createModel(2,[128,64],"relu")

model6b.model.summary()
model6b.train_model(trainingDataImages,trainingDataLabels,10,0.001)
print("\nTesting Model :")
model6b.test_model(testDataImages,testDataLabels)
print()

#model with leaky relu
model6c = createModel(2,[128,64],LeakyReLU(alpha=0.1))

model6c.model.summary()
model6c.train_model(trainingDataImages,trainingDataLabels,10,0.001)
print("\nTesting Model :")
model6c.test_model(testDataImages,testDataLabels)

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_8 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_23 (Dense)             (None, 128)               100480    
_________________________________________________________________
dense_24 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_25 (Dense)             (None, 10)                650       
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
_________________________________________________________________
Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Testing Model :
test_loss: 0.34992577372789385 test_accuracy: 0.9064

Model: "sequent

vii. Among all the hyperparameters lr = 0.1 is best performing.

In [None]:
model7 = createModel(2,[128,64],LeakyReLU(alpha=0.1))

model7.model.summary()
model7.train_model(trainingDataImages,trainingDataLabels,10,0.01)
print("\nTesting Model :")
model7.test_model(testDataImages,testDataLabels)

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_11 (Flatten)         (None, 784)               0         
_________________________________________________________________
dense_32 (Dense)             (None, 128)               100480    
_________________________________________________________________
dense_33 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_34 (Dense)             (None, 10)                650       
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
_________________________________________________________________
Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Testing Model :
test_loss: 0.13192787636220454 test_accuracy: 0.9604


viii. Among all the models trained above the model with learning rate 0.1 , 2 hidden layers with 128 and 64 nodes , leaky relu activation function for the hidden layers. I will choose the best model based on the least loss and higher accuracy.

In [None]:
model8 = createModel(2,[128,64],LeakyReLU(alpha=0.1))

model8.model.summary()
model8.train_model(trainingDataImages,trainingDataLabels,10,0.01)
print("\nTesting Model :")
model8.test_model(testDataImages,testDataLabels)

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_12 (Flatten)         (None, 784)               0         
_________________________________________________________________
dense_35 (Dense)             (None, 128)               100480    
_________________________________________________________________
dense_36 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_37 (Dense)             (None, 10)                650       
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
_________________________________________________________________
Train on 55000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Testing Model :
test_loss: 0.1384312619239092 test_accuracy: 0.9591
