# IST597:- Multi-Layer Perceptron

## Load the libraries

In [194]:
import os
import numpy as np
import time
import tensorflow as tf
np.random.seed(5510)
tf.random.set_seed(5510)

In [195]:
tf.config.list_physical_devices('GPU')

[]

#Get number of Gpu's and id's in the system or else you can also use Nvidia-smi in command prompt.

## Generate random data

In [196]:
size_input = 784
size_hidden =[128, 64]
size_output = 10
number_of_train_examples = 60000
number_of_test_examples = 10000
from tensorflow.keras.datasets import mnist
# load dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()


In [197]:
y_test

array([7, 2, 1, ..., 4, 5, 6], dtype=uint8)

In [198]:
y_train = tf.keras.utils.to_categorical(y_train)

In [199]:
y_test = tf.keras.utils.to_categorical(y_test)

In [200]:
y_train[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [201]:
X_train = X_train.reshape(60000, 28*28)
X_test = X_test.reshape(10000, 28*28)

In [None]:
X_train.shape

(60000, 784)

In [None]:
X_test.shape

(10000, 784)

In [202]:
X_train = X_train / 255
X_test = X_test / 255

In [None]:
X_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
X_train.shape

(60000, 784)

In [None]:
y_train.shape

(60000, 10)

In [None]:
X_train.size

47040000

In [None]:
y_train.size

600000

In [None]:
y_test.shape

(10000, 10)

In [None]:
X_test.size

7840000

In [None]:
y_test.size

100000

In [None]:
#X_train = np.random.randn(number_of_train_examples , size_input)
#y_train = np.random.randn(number_of_train_examples)
#X_test = np.random.randn(number_of_test_examples, size_input)
#y_test = np.random.randn(number_of_test_examples)

In [203]:
# Split dataset into batches
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(16)
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(4)

## Build MLP using Eager Execution

In [None]:
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.device =\
    size_input, size_hidden, size_output, device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      current_loss = self.loss(predicted, y_train)
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    output = tf.nn.softmax(output)
    return output

## Train Model

In [None]:
# Set number of epochs
NUM_EPOCHS = 10

In [None]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))

Number of Epoch = 1 - Categorical Cross-Entropy:= 0.7146720703125
Number of Epoch = 1 - Accuracy:= 10.915017700195312
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.6942576822916666
Number of Epoch = 2 - Accuracy:= 13.48499247233073
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.678141015625
Number of Epoch = 3 - Accuracy:= 15.509998575846353
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.6642876953125
Number of Epoch = 4 - Accuracy:= 17.191678873697917
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.6521606119791666
Number of Epoch = 5 - Accuracy:= 18.74502970377604
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.6392409505208333
Number of Epoch = 6 - Accuracy:= 20.266695149739583
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.6273642578125
Number of Epoch = 7 - Accuracy:= 21.786692301432293
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.6180056640625
Number of Epoch = 8 - Accuracy:= 22.96668904622396
Number of Epoch = 9 - Categorical Cross-Entropy:= 0

In [None]:
print(loss_total_gpu)

tf.Tensor([[36068.285]], shape=(1, 1), dtype=float32)


In [None]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0012
24.89 %


In [None]:
print(y_test[:5])
print(preds[:5])

print(y_true[:5])
print(y_pred[:5])

[[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]
tf.Tensor(
[[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 1.0000000e+00 0.0000000e+00 0.0000000e+00]
 [0.0000000e+00 8.4785869e-28 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 4.0407927e-33 1.0000000e+00 0.0000000e+00 0.0000000e+00]
 [0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 1.0000000e+00 0.0000000e+00 0.0000000e+00]
 [0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  4.4565869e-18 1.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]
 [0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00
  0.0000000e+00 1.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]], shape=(5, 10), dtype=float32)
[7 2 1 0 4]
[7 7 7 6 6]


Using Dropout Layer to avoid overfitting


In [None]:
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.device =\
    size_input, size_hidden, size_output, device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      current_loss = self.loss(predicted, y_train)
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

    self.dropout_layer = keras.layers.Dropout(rate=0.5)
def call(self, input, training=None):
  X_tf = tf.cast(X, dtype=tf.float32)
  X_tf = self.dropout_layer(X_tf)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer
  what = tf.matmul(X_tf, self.W1) + self.b1
  hhat = tf.nn.relu(what)
  hhat = self.dropout_layer(hhat, training = training)
    #Compute the hidden
  what2 = tf.matmul(hhat, self.W2) + self.b2
  hhat2 = tf.nn.relu(what2)
  hhat2 = self.dropout_layer(hhat2, training = training)
    # Compute output
  output = tf.matmul(hhat2, self.W3) + self.b3
  output = tf.nn.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
  return output

In [None]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))

In [None]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

L2 Regularization


In [None]:
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [None]:
# Set number of epochs
NUM_EPOCHS = 10

In [None]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))

Number of Epoch = 1 - Categorical Cross-Entropy:= 0.7026961588541667
Number of Epoch = 1 - Accuracy:= 12.421675618489584
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.6622991536458334
Number of Epoch = 2 - Accuracy:= 17.348350016276044
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.6262250651041666
Number of Epoch = 3 - Accuracy:= 21.835028076171874
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.6029328125
Number of Epoch = 4 - Accuracy:= 24.751688639322918
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.5882063802083334
Number of Epoch = 5 - Accuracy:= 26.61334228515625
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.5769110677083333
Number of Epoch = 6 - Accuracy:= 28.058341471354165
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.5672538411458333
Number of Epoch = 7 - Accuracy:= 29.258294677734376
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.5578458984375
Number of Epoch = 8 - Accuracy:= 30.42334798177083
Number of Epoch = 9 - Categorical Cross-Ent

## One Step Inference

In [None]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0011
33.15 %


Hyperparameter Optimization; Since I am getting better accuracy with the L2 Regularization, therefore I am going to perform hyperparamete tuning.

In [None]:
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.leaky_relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.leaky_relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [None]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.694659765625
Number of Epoch = 1 - Accuracy:= 13.3783447265625
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.6614013671875
Number of Epoch = 2 - Accuracy:= 17.473345947265624
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.6340060546875
Number of Epoch = 3 - Accuracy:= 20.808378092447917
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.6094662760416667
Number of Epoch = 4 - Accuracy:= 23.876688639322914
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.5864924479166667
Number of Epoch = 5 - Accuracy:= 26.75334269205729
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.5666244791666667
Number of Epoch = 6 - Accuracy:= 29.216674804687496
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.5495546223958333
Number of Epoch = 7 - Accuracy:= 31.321638997395834
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.53460498046875
Number of Epoch = 8 - Accuracy:= 33.161659749348956
Number of Epoch = 9 - Categorical Cross-Entropy

2. Changing the Learning Rate

In [None]:
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.05)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.leaky_relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.leaky_relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [None]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.5683600260416667
Number of Epoch = 1 - Accuracy:= 29.41667073567708
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.44155390625
Number of Epoch = 2 - Accuracy:= 45.15502115885416
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.40423111979166665
Number of Epoch = 3 - Accuracy:= 49.7633056640625
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.33054612630208335
Number of Epoch = 4 - Accuracy:= 58.80325927734374
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.30035400390625
Number of Epoch = 5 - Accuracy:= 62.60998942057292
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.29451624348958333
Number of Epoch = 6 - Accuracy:= 63.325008138020834
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.28211653645833334
Number of Epoch = 7 - Accuracy:= 64.85165201822916
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.27899697265625
Number of Epoch = 8 - Accuracy:= 65.21839599609375
Number of Epoch = 9 - Categorical Cross-Entro

In [None]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0004
76.02 %


Changing Activation function to Relu instead Leaky_Relu and changing the learnin rate to 0.6


In [None]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [None]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.6267244791666666
Number of Epoch = 1 - Accuracy:= 22.15669962565104
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.39377301432291667
Number of Epoch = 2 - Accuracy:= 51.01829427083333
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.3199173828125
Number of Epoch = 3 - Accuracy:= 60.20001220703125
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.29841015625
Number of Epoch = 4 - Accuracy:= 62.88002115885417
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.29119697265625
Number of Epoch = 5 - Accuracy:= 63.78494873046875
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.27932350260416666
Number of Epoch = 6 - Accuracy:= 65.24838053385416
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.273338671875
Number of Epoch = 7 - Accuracy:= 65.96993001302084
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.221247802734375
Number of Epoch = 8 - Accuracy:= 72.24005533854167
Number of Epoch = 9 - Categorical Cross-Entropy:= 0.1

4. Lets increase the epochs.

In [None]:
NUM_EPOCHS = 15

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.72158359375
Number of Epoch = 1 - Accuracy:= 10.460022989908854
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.7217572265625
Number of Epoch = 2 - Accuracy:= 10.441695149739584
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.7217569010416667
Number of Epoch = 3 - Accuracy:= 10.441706339518229
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.72175703125
Number of Epoch = 4 - Accuracy:= 10.441696166992188
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.7217568359375
Number of Epoch = 5 - Accuracy:= 10.441683959960937
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.7217577473958333
Number of Epoch = 6 - Accuracy:= 10.441691080729166
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.50200693359375
Number of Epoch = 7 - Accuracy:= 34.16334228515625
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.017688210042317708
Number of Epoch = 8 - Accuracy:= 90.0225341796875
Number of Epoch = 9 - Categorical Cross-Entropy:= 0

In [None]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0000
96.57 %


5. Let's try L1 regularization with No. of epochs as 12 and learning rate as 0.05

In [None]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L1= (tf.reduce_sum(self.W1)+ tf.reduce_sum(self.W2)+tf.reduce_sum(self.W3)) 
      current_loss = self.loss(predicted, y_train) + 0.01 * L1 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [None]:
NUM_EPOCHS = 12

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.17444921875
Number of Epoch = 1 - Accuracy:= 11.131665039062499
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.11697017415364583
Number of Epoch = 2 - Accuracy:= 9.246722412109374
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.1165115234375
Number of Epoch = 3 - Accuracy:= 9.358384195963541
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.11631156412760417
Number of Epoch = 4 - Accuracy:= 9.385040283203125
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.116121875
Number of Epoch = 5 - Accuracy:= 9.343365478515626
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.11597390950520833
Number of Epoch = 6 - Accuracy:= 9.546696980794271
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.11585550944010417
Number of Epoch = 7 - Accuracy:= 9.706705729166666
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.11573255208333333
Number of Epoch = 8 - Accuracy:= 9.57003173828125
Number of Epoch = 9 - Categorical Cross-Entropy:= 

In [None]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0002
11.35 %


6. Taking L2 regularization, learning rate = 0.6 and epochs as 12
 

In [None]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [None]:
NUM_EPOCHS = 12

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.664136328125
Number of Epoch = 1 - Accuracy:= 17.55501708984375
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.6180444661458333
Number of Epoch = 2 - Accuracy:= 23.27833455403646
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.5873873046875
Number of Epoch = 3 - Accuracy:= 27.088319905598958
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.5721716796875
Number of Epoch = 4 - Accuracy:= 28.949945068359373
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.5018830403645833
Number of Epoch = 5 - Accuracy:= 37.648356119791664
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.4923470703125
Number of Epoch = 6 - Accuracy:= 38.839978027343754
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.42974576822916666
Number of Epoch = 7 - Accuracy:= 46.21836344401042
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.10949156901041666
Number of Epoch = 8 - Accuracy:= 79.539501953125
Number of Epoch = 9 - Categorical Cross-Entropy:=

7.Changing the optimizer to adam.

In [204]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [205]:
NUM_EPOCHS = 12

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.13606853841145833
Number of Epoch = 1 - Accuracy:= 39.12495524088542
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.122042919921875
Number of Epoch = 2 - Accuracy:= 24.961732991536458
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.12182252604166667
Number of Epoch = 3 - Accuracy:= 30.61329549153646
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.13426326497395832
Number of Epoch = 4 - Accuracy:= 29.980037434895834
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.13995704752604166
Number of Epoch = 5 - Accuracy:= 30.783345540364582
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.1355442138671875
Number of Epoch = 6 - Accuracy:= 31.97164916992187
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.1412935546875
Number of Epoch = 7 - Accuracy:= 32.98833414713542
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.153947021484375
Number of Epoch = 8 - Accuracy:= 27.666609700520834
Number of Epoch = 9 - Categorical Cro

8. Changing the batch size and changing the optimizer back to SGD


In [207]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [208]:
NUM_EPOCHS = 12

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(10)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 10 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 1.3462825520833333
Number of Epoch = 1 - Accuracy:= 16.45828145345052
Number of Epoch = 2 - Categorical Cross-Entropy:= 1.3095045572916666
Number of Epoch = 2 - Accuracy:= 18.748126220703128
Number of Epoch = 3 - Categorical Cross-Entropy:= 1.2975274739583333
Number of Epoch = 3 - Accuracy:= 19.491448974609373
Number of Epoch = 4 - Categorical Cross-Entropy:= 1.2945876302083332
Number of Epoch = 4 - Accuracy:= 19.67147216796875
Number of Epoch = 5 - Categorical Cross-Entropy:= 1.30822421875
Number of Epoch = 5 - Accuracy:= 18.796429443359376
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.3752591796875
Number of Epoch = 6 - Accuracy:= 69.2760009765625
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.018576680501302085
Number of Epoch = 7 - Accuracy:= 94.44149576822916
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.012700848388671875
Number of Epoch = 8 - Accuracy:= 96.12504069010417
Number of Epoch = 9 - Categorical Cross-En

Changing the batch size to 30

In [209]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [210]:
NUM_EPOCHS = 12

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(30)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 30 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.4811564453125
Number of Epoch = 1 - Accuracy:= 10.441703796386719
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.4811638671875
Number of Epoch = 2 - Accuracy:= 10.44170913696289
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.48116412760416666
Number of Epoch = 3 - Accuracy:= 10.441705322265625
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.48116373697916665
Number of Epoch = 4 - Accuracy:= 10.441706848144532
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.4811639973958333
Number of Epoch = 5 - Accuracy:= 10.44171371459961
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.4811639973958333
Number of Epoch = 6 - Accuracy:= 10.441694641113282
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.48116393229166665
Number of Epoch = 7 - Accuracy:= 10.44170150756836
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.48116396484375
Number of Epoch = 8 - Accuracy:= 10.441700744628907
Number of Epoch = 9 - Categorical Cross-

In [211]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [212]:
NUM_EPOCHS = 12

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(30)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 30 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.40784563802083335
Number of Epoch = 1 - Accuracy:= 24.006585693359376
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.29243151041666665
Number of Epoch = 2 - Accuracy:= 45.49493408203125
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.28340244140625
Number of Epoch = 3 - Accuracy:= 47.168328857421876
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.2780332356770833
Number of Epoch = 4 - Accuracy:= 48.18335876464844
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.2749978515625
Number of Epoch = 5 - Accuracy:= 48.7417236328125
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.27373271484375
Number of Epoch = 6 - Accuracy:= 48.974954223632814
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.27136612955729167
Number of Epoch = 7 - Accuracy:= 49.40827331542968
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.23519226888020833
Number of Epoch = 8 - Accuracy:= 55.95648193359375
Number of Epoch = 9 - Categorical Cross-E

In [None]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")