# IST597:- Multi-Layer Perceptron

## Load the libraries

In [13]:
import os
import numpy as np
import time
import tensorflow as tf
np.random.seed(5510)
tf.random.set_seed(5510)

In [14]:
tf.config.list_physical_devices('GPU')

[]

#Get number of Gpu's and id's in the system or else you can also use Nvidia-smi in command prompt.

## Generate random data

In [4]:
size_input = 784
size_hidden =[128, 64]
size_output = 10
number_of_train_examples = 60000
number_of_test_examples = 10000
from tensorflow.keras.datasets import fashion_mnist
# load dataset
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()


In [5]:
y_test

array([9, 2, 1, ..., 8, 1, 5], dtype=uint8)

In [6]:
y_train = tf.keras.utils.to_categorical(y_train)

In [7]:
y_test = tf.keras.utils.to_categorical(y_test)

In [88]:
y_train[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.], dtype=float32)

In [16]:
X_train = X_train.reshape(60000, 28*28)
X_test = X_test.reshape(10000, 28*28)

In [90]:
X_train.shape

(60000, 784)

In [91]:
X_test.shape

(10000, 784)

In [17]:
X_train = X_train / 255
X_test = X_test / 255

In [93]:
X_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [94]:
X_train.shape

(60000, 784)

In [95]:
y_train.shape

(60000, 10)

In [96]:
X_train.size

47040000

In [97]:
y_train.size

600000

In [98]:
y_test.shape

(10000, 10)

In [99]:
X_test.size

7840000

In [100]:
y_test.size

100000

In [101]:
#X_train = np.random.randn(number_of_train_examples , size_input)
#y_train = np.random.randn(number_of_train_examples)
#X_test = np.random.randn(number_of_test_examples, size_input)
#y_test = np.random.randn(number_of_test_examples)

In [19]:
# Split dataset into batches
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(16)
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(4)

## Build MLP using Eager Execution

In [20]:
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.device =\
    size_input, size_hidden, size_output, device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      current_loss = self.loss(predicted, y_train)
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    output = tf.nn.softmax(output)
    return output

## Train Model

In [104]:
# Set number of epochs
NUM_EPOCHS = 10

In [105]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, a =np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, s=(np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.7399565104166667
Number of Epoch = 1 - Accuracy:= 8.040072631835937
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.7281026041666666
Number of Epoch = 2 - Accuracy:= 9.590027872721354
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.7266733723958333
Number of Epoch = 3 - Accuracy:= 9.783367919921874
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.726030859375
Number of Epoch = 4 - Accuracy:= 9.868362426757812
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.7256635416666667
Number of Epoch = 5 - Accuracy:= 9.915020751953126
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.7254348307291667
Number of Epoch = 6 - Accuracy:= 9.95167744954427
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.7252866536458333
Number of Epoch = 7 - Accuracy:= 9.971687825520833
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.7251377604166667
Number of Epoch = 8 - Accuracy:= 9.993363444010416
Number of Epoch = 9 - Categorical Cross-Entro

In [106]:
print(loss_total_gpu
      )

tf.Tensor([[43495.2]], shape=(1, 1), dtype=float32)


In [107]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(p=(np.sum(test_loss_total.numpy()) / X_test.shape[0])))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)

Test Categorical entropy loss: 0.0014
10.05 %


In [108]:
print(y_test[:5])
print(preds[:5])

print(y_true[:5])
print(y_pred[:5])

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]]
tf.Tensor(
[[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]], shape=(5, 10), dtype=float32)
[9 2 1 1 6]
[7 7 7 7 7]


In [109]:
# Initialize model using CPU
mlp_on_cpu = MLP(size_input, size_hidden, size_output, device='cpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start


print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))
#For per epoch_time = Total_Time / Number_of_epochs

Number of Epoch = 1 - Categorical Cross-Entropy:= 0.7248715494791667
Number of Epoch = 1 - Accuracy:= 10.033356730143229
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.724843359375
Number of Epoch = 2 - Accuracy:= 10.038351440429688
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.7248111979166667
Number of Epoch = 3 - Accuracy:= 10.043360392252604
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.7247806640625
Number of Epoch = 4 - Accuracy:= 10.04168904622396
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.724720703125
Number of Epoch = 5 - Accuracy:= 10.04168701171875
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.7246442057291667
Number of Epoch = 6 - Accuracy:= 10.045008341471354
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.7245557291666667
Number of Epoch = 7 - Accuracy:= 10.06168924967448
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.7244735677083334
Number of Epoch = 8 - Accuracy:= 10.063364664713543
Number of Epoch = 9 - Categorical Cross-Entrop

In [110]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_cpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_cpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0014
15.299999999999999 %


In [111]:
#TPU mode
mlp_on_tpu = MLP(size_input, size_hidden, size_output, device='tpu')


time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start


print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))
#For per epoch_time = Total_Time / Number_of_epochs

Number of Epoch = 1 - Categorical Cross-Entropy:= 0.7238244791666667
Number of Epoch = 1 - Accuracy:= 10.120020548502605
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.7235535807291666
Number of Epoch = 2 - Accuracy:= 10.145016479492188
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.7233486979166667
Number of Epoch = 3 - Accuracy:= 10.18335673014323
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.7230155598958333
Number of Epoch = 4 - Accuracy:= 10.218353271484375
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.7226201171875
Number of Epoch = 5 - Accuracy:= 10.240017700195313
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.7217220052083333
Number of Epoch = 6 - Accuracy:= 10.341678873697917
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.7209531901041667
Number of Epoch = 7 - Accuracy:= 10.413339233398437
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.7200380859375
Number of Epoch = 8 - Accuracy:= 10.510025024414062
Number of Epoch = 9 - Categorical Cross

In [112]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_tpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_tpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0013
16.06 %


Using Dropout Layer to avoid overfitting


In [113]:
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.device =\
    size_input, size_hidden, size_output, device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      current_loss = self.loss(predicted, y_train)
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

    self.dropout_layer = keras.layers.Dropout(rate=0.5)
def call(self, input, training=None):
  X_tf = tf.cast(X, dtype=tf.float32)
  X_tf = self.dropout_layer(X_tf)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer
  what = tf.matmul(X_tf, self.W1) + self.b1
  hhat = tf.nn.relu(what)
  hhat = self.dropout_layer(hhat, training = training)
    #Compute the hidden
  what2 = tf.matmul(hhat, self.W2) + self.b2
  hhat2 = tf.nn.relu(what2)
  hhat2 = self.dropout_layer(hhat2, training = training)
    # Compute output
  output = tf.matmul(hhat2, self.W3) + self.b3
  output = tf.nn.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
  return output

In [114]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))

Number of Epoch = 1 - Categorical Cross-Entropy:= 0.334763671875
Number of Epoch = 1 - Accuracy:= 9.610038248697917
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.247984619140625
Number of Epoch = 2 - Accuracy:= 9.831691487630208
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.20563922526041667
Number of Epoch = 3 - Accuracy:= 9.915024820963541
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.2129431640625
Number of Epoch = 4 - Accuracy:= 9.96002909342448
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.3347446614583333
Number of Epoch = 5 - Accuracy:= 11.095002237955729
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.3733469401041667
Number of Epoch = 6 - Accuracy:= 11.784975179036458
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.35632203776041665
Number of Epoch = 7 - Accuracy:= 11.528352864583333
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.35210146484375
Number of Epoch = 8 - Accuracy:= 11.351681518554688
Number of Epoch = 9 - Categorical Cross-Entro

In [115]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0007
11.06 %


L2 Regularization


In [116]:
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [117]:
# Set number of epochs
NUM_EPOCHS = 10

In [118]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))

Number of Epoch = 1 - Categorical Cross-Entropy:= 0.6928215494791666
Number of Epoch = 1 - Accuracy:= 13.69834696451823
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.6509770182291666
Number of Epoch = 2 - Accuracy:= 18.878346761067707
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.6142603515625
Number of Epoch = 3 - Accuracy:= 23.381683349609375
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.5738859375
Number of Epoch = 4 - Accuracy:= 28.386651611328123
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.5503410807291667
Number of Epoch = 5 - Accuracy:= 31.344968668619792
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.5363466796875
Number of Epoch = 6 - Accuracy:= 33.15667928059896
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.5280036458333334
Number of Epoch = 7 - Accuracy:= 34.168355305989586
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.5221714192708333
Number of Epoch = 8 - Accuracy:= 34.92664794921875
Number of Epoch = 9 - Categorical Cross-Entropy

## One Step Inference

In [119]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0010
35.74 %


In [120]:
#TPU mode
mlp_on_tpu = MLP(size_input, size_hidden, size_output, device='tpu')


time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start


print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))
#For per epoch_time = Total_Time / Number_of_epochs

Number of Epoch = 1 - Categorical Cross-Entropy:= 0.5095127278645833
Number of Epoch = 1 - Accuracy:= 36.51162516276042
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.5065806640625
Number of Epoch = 2 - Accuracy:= 36.89665934244792
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.5030660807291667
Number of Epoch = 3 - Accuracy:= 37.318339029947914
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.5004604817708334
Number of Epoch = 4 - Accuracy:= 37.68501383463542
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.4981304361979167
Number of Epoch = 5 - Accuracy:= 37.96495361328125
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.4954019856770833
Number of Epoch = 6 - Accuracy:= 38.30671793619791
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.4938117838541667
Number of Epoch = 7 - Accuracy:= 38.49336751302083
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.49145784505208334
Number of Epoch = 8 - Accuracy:= 38.81832275390625
Number of Epoch = 9 - Categorical Cross-E

In [121]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_tpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_tpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0014
9.969999999999999 %


Hyperparameter Optimization; Since I am getting better accuracy with the L2 Regularization, therefore I am going to perform hyperparamete tuning.

In [122]:
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.leaky_relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.leaky_relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [123]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.769164453125
Number of Epoch = 1 - Accuracy:= 4.385030619303385
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.760647265625
Number of Epoch = 2 - Accuracy:= 5.4500579833984375
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.752294921875
Number of Epoch = 3 - Accuracy:= 6.493408203125001
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.7465244140625
Number of Epoch = 4 - Accuracy:= 7.243416849772136
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.741366796875
Number of Epoch = 5 - Accuracy:= 7.901751200358073
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.7337123046875
Number of Epoch = 6 - Accuracy:= 8.81007080078125
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.6949836588541667
Number of Epoch = 7 - Accuracy:= 13.588326009114585
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.6784076822916667
Number of Epoch = 8 - Accuracy:= 15.711690266927084
Number of Epoch = 9 - Categorical Cross-Entropy:= 0.67433561

2. Changing the Learning Rate

In [124]:
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.05)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.leaky_relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.leaky_relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [125]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.7256953776041667
Number of Epoch = 1 - Accuracy:= 9.951706949869791
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.7253164713541667
Number of Epoch = 2 - Accuracy:= 10.00004374186198
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.7253152994791666
Number of Epoch = 3 - Accuracy:= 10.000034586588542
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.7253160807291666
Number of Epoch = 4 - Accuracy:= 10.0000244140625
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.7253161458333334
Number of Epoch = 5 - Accuracy:= 10.000014241536459
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.7253158203125
Number of Epoch = 6 - Accuracy:= 10.000023396809896
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.7253161458333334
Number of Epoch = 7 - Accuracy:= 10.00003662109375
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.7253166015625
Number of Epoch = 8 - Accuracy:= 10.000038655598958
Number of Epoch = 9 - Categorical Cross-Ent

In [126]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0015
10.0 %


Changing Activation function to Relu instead Leaky_Relu and changing the learnin rate to 0.6


In [127]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [128]:
# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.70774296875
Number of Epoch = 1 - Accuracy:= 12.168369547526042
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.7034765625
Number of Epoch = 2 - Accuracy:= 12.706581624348958
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.6640889322916667
Number of Epoch = 3 - Accuracy:= 17.58834431966146
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.6667590494791666
Number of Epoch = 4 - Accuracy:= 17.26332804361979
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.662168359375
Number of Epoch = 5 - Accuracy:= 17.823368326822916
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.6736038411458334
Number of Epoch = 6 - Accuracy:= 16.37663879394531
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.7253161458333334
Number of Epoch = 7 - Accuracy:= 10.00003662109375
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.7253166015625
Number of Epoch = 8 - Accuracy:= 10.000038655598958
Number of Epoch = 9 - Categorical Cross-Entropy:= 0.72

In [129]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0015
10.0 %


4. Lets increase the epochs.

In [130]:
NUM_EPOCHS = 15

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.7252026041666667
Number of Epoch = 1 - Accuracy:= 10.008348592122395
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.7253169921875
Number of Epoch = 2 - Accuracy:= 10.000028483072915
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.7253162109375
Number of Epoch = 3 - Accuracy:= 10.000015258789062
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.7253164713541667
Number of Epoch = 4 - Accuracy:= 10.000020345052084
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.7253154947916667
Number of Epoch = 5 - Accuracy:= 10.0000244140625
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.7253171223958333
Number of Epoch = 6 - Accuracy:= 10.000031534830729
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.7253168619791667
Number of Epoch = 7 - Accuracy:= 10.000022379557292
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.7253160807291666
Number of Epoch = 8 - Accuracy:= 10.000015258789062
Number of Epoch = 9 - Categorical Cross-

In [131]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0000
85.45 %


5. Let's try L1 regularization with No. of epochs as 12 and learning rate as 0.05

In [132]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L1= (tf.reduce_sum(self.W1)+ tf.reduce_sum(self.W2)+tf.reduce_sum(self.W3)) 
      current_loss = self.loss(predicted, y_train) + 0.01 * L1 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [133]:
NUM_EPOCHS = 12

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.15945930989583335
Number of Epoch = 1 - Accuracy:= 10.943347167968751
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.1171131103515625
Number of Epoch = 2 - Accuracy:= 9.878368123372395
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.11654532063802084
Number of Epoch = 3 - Accuracy:= 9.990025838216146
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.11627923990885417
Number of Epoch = 4 - Accuracy:= 9.863367716471354
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.11606238606770833
Number of Epoch = 5 - Accuracy:= 9.89502156575521
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.11585870768229167
Number of Epoch = 6 - Accuracy:= 9.936688232421876
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.1156848876953125
Number of Epoch = 7 - Accuracy:= 9.92504170735677
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.11560489908854167
Number of Epoch = 8 - Accuracy:= 9.953370157877604
Number of Epoch = 9 - Categorical C

In [134]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0002
10.0 %


6. Taking L2 regularization, learning rate = 0.6 and epochs as 12
 

In [135]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [136]:
NUM_EPOCHS = 12

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.7235477213541667
Number of Epoch = 1 - Accuracy:= 10.216709391276042
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.7253298828125
Number of Epoch = 2 - Accuracy:= 9.998377482096355
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.7253248697916667
Number of Epoch = 3 - Accuracy:= 9.998367309570312
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.7253160807291666
Number of Epoch = 4 - Accuracy:= 10.0000244140625
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.7253161458333334
Number of Epoch = 5 - Accuracy:= 10.000014241536459
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.7253158203125
Number of Epoch = 6 - Accuracy:= 10.000023396809896
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.7253161458333334
Number of Epoch = 7 - Accuracy:= 10.00003662109375
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.7253166015625
Number of Epoch = 8 - Accuracy:= 10.000038655598958
Number of Epoch = 9 - Categorical Cross-Entrop

7.Changing the optimizer to adam.

In [29]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [30]:
NUM_EPOCHS = 12

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(20)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 20 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.12183611653645833
Number of Epoch = 1 - Accuracy:= 39.02999267578125
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.14462482096354168
Number of Epoch = 2 - Accuracy:= 44.873347981770834
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.20283055013020834
Number of Epoch = 3 - Accuracy:= 45.629964192708336
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.28638326822916665
Number of Epoch = 4 - Accuracy:= 37.92996826171875
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.47443805338541667
Number of Epoch = 5 - Accuracy:= 25.369913736979164
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.6531487630208334
Number of Epoch = 6 - Accuracy:= 10.03669942220052
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.6521241536458333
Number of Epoch = 7 - Accuracy:= 9.8616943359375
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.6517583984375
Number of Epoch = 8 - Accuracy:= 9.861690266927084
Number of Epoch = 9 - Categorical Cro

8. Changing the batch size and changing the optimizer back to SGD


In [31]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [32]:
NUM_EPOCHS = 12

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(10)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 10 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 1.4506947916666666
Number of Epoch = 1 - Accuracy:= 9.995091756184896
Number of Epoch = 2 - Categorical Cross-Entropy:= 1.4506130208333334
Number of Epoch = 2 - Accuracy:= 10.000094604492189
Number of Epoch = 3 - Categorical Cross-Entropy:= 1.4505200520833332
Number of Epoch = 3 - Accuracy:= 9.996755981445313
Number of Epoch = 4 - Categorical Cross-Entropy:= 1.4497763020833334
Number of Epoch = 4 - Accuracy:= 10.015072631835938
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.23533079427083334
Number of Epoch = 5 - Accuracy:= 15.929986572265625
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.163135986328125
Number of Epoch = 6 - Accuracy:= 35.08503824869791
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.13875437825520834
Number of Epoch = 7 - Accuracy:= 45.773441569010416
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.1262797607421875
Number of Epoch = 8 - Accuracy:= 50.685087076822924
Number of Epoch = 9 - Categorical 

Changing the batch size to 30

In [24]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [25]:
NUM_EPOCHS = 12

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(30)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 30 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.48350338541666665
Number of Epoch = 1 - Accuracy:= 10.003385925292969
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.48353658854166665
Number of Epoch = 2 - Accuracy:= 10.000033569335939
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.48353766276041665
Number of Epoch = 3 - Accuracy:= 9.996703338623048
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.4835364583333333
Number of Epoch = 4 - Accuracy:= 10.000030517578125
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.48353642578125
Number of Epoch = 5 - Accuracy:= 10.000029754638671
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.48353658854166665
Number of Epoch = 6 - Accuracy:= 10.000029754638671
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.48353658854166665
Number of Epoch = 7 - Accuracy:= 10.000033569335939
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.483476953125
Number of Epoch = 8 - Accuracy:= 9.985028839111328
Number of Epoch = 9 - Categorical C

In [26]:
# Initialize model using GPU
# Define class to build mlp model
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.dropout_layer, self.device =\
    size_input, size_hidden, size_output, tf.keras.layers.Dropout(rate=0.2), device
    
    # Initialize weights between input layer and hidden layer-1
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden[0]]))
    # Initialize biases for hidden layer-1
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden[0]]))

    # Initialize weights between hidden layer-1 and hidden layer-2
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden[0], self.size_hidden[1]]))
    # Initialize biases for hidden layer-1
    self.b2 = tf.Variable(tf.random.normal([1, self.size_hidden[1]]))

     # Initialize weights between hidden layer and output layer
    self.W3 = tf.Variable(tf.random.normal([self.size_hidden[1], self.size_output]))
    # Initialize biases for output layer
    self.b3 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
    
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
    return tf.keras.losses.CategoricalCrossentropy()(y_true_tf, y_pred_tf)
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.06)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      L2= (tf.reduce_sum(tf.square(self.W1))+ tf.reduce_sum(tf.square(self.W2))+tf.reduce_sum(tf.square(self.W3)))/3 
      current_loss = self.loss(predicted, y_train) + 0.001 * L2 
    grads = tape.gradient(current_loss, self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    #Remember to normalize your dataset before moving forward
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)
    #hhat1 = self.dropout_layer(hhat1)
    # Compute values in hidden layer2
    what2 = tf.matmul(hhat1, self.W2) + self.b2
    hhat2 = tf.nn.relu(what2)
    #hhat2 = self.dropout_layer(hhat2)
    # Compute output
    output = tf.matmul(hhat2, self.W3) + self.b3
    output = tf.nn.softmax(output)
    #output = tf.keras.activations.softmax(output)
    #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
    #Second add tf.Softmax(output) and then return this variable
    return output

In [27]:
NUM_EPOCHS = 12

# Initialize model using GPU
mlp_on_gpu = MLP(size_input, size_hidden, size_output, device='gpu')

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  loss_total_gpu = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  acc = tf.zeros([], dtype=tf.float32)
  train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(25, seed=epoch*(5510)).batch(30)
  for inputs, outputs in train_ds:
    preds = mlp_on_gpu.forward(inputs) 
    outputs = tf.cast(tf.reshape(outputs, (-1,10)), dtype=tf.float32)
    preds = tf.cast(preds, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(preds, 1)), "float"))
    acc = acc + accuracy
    loss_total_gpu = loss_total_gpu + mlp_on_gpu.loss(preds, outputs)
    lt = lt + mlp_on_gpu.loss(preds, outputs)
    mlp_on_gpu.backward(inputs, outputs)
  print('Number of Epoch = {} - Categorical Cross-Entropy:= {}'.format(epoch + 1, np.sum(loss_total_gpu) / X_train.shape[0]))
  print('Number of Epoch = {} - Accuracy:= {}'.format(epoch + 1, (np.sum(acc) * 30 / X_train.shape[0])*100))
  time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))


Number of Epoch = 1 - Categorical Cross-Entropy:= 0.4835365234375
Number of Epoch = 1 - Accuracy:= 10.000051879882813
Number of Epoch = 2 - Categorical Cross-Entropy:= 0.48353658854166665
Number of Epoch = 2 - Accuracy:= 10.000033569335939
Number of Epoch = 3 - Categorical Cross-Entropy:= 0.4835365234375
Number of Epoch = 3 - Accuracy:= 10.00003662109375
Number of Epoch = 4 - Categorical Cross-Entropy:= 0.4835364583333333
Number of Epoch = 4 - Accuracy:= 10.000030517578125
Number of Epoch = 5 - Categorical Cross-Entropy:= 0.48353642578125
Number of Epoch = 5 - Accuracy:= 10.000029754638671
Number of Epoch = 6 - Categorical Cross-Entropy:= 0.48353658854166665
Number of Epoch = 6 - Accuracy:= 10.000029754638671
Number of Epoch = 7 - Categorical Cross-Entropy:= 0.48353658854166665
Number of Epoch = 7 - Accuracy:= 10.000033569335939
Number of Epoch = 8 - Categorical Cross-Entropy:= 0.48316637369791665
Number of Epoch = 8 - Accuracy:= 10.045030975341797
Number of Epoch = 9 - Categorical Cro

In [28]:
test_loss_total = tf.Variable(0, dtype=tf.float32)
#test_loss_total = 0.0
# for inputs, outputs in test_ds:
preds = mlp_on_gpu.forward(X_test)
#b = mlp_on_default.loss(preds, outputs)
test_loss_total = test_loss_total + mlp_on_gpu.loss(preds, y_test)

print('Test Categorical entropy loss: {:.4f}'.format(np.sum(test_loss_total.numpy()) / X_test.shape[0]))


maxposition = lambda x : np.argmax(x)
#List comprehension to map the lambda function across all records of y_true and y_pred
y_true = np.array([maxposition(rec) for rec in y_test])
y_pred = np.array([maxposition(rec) for rec in preds])
val_acc = sum(y_true == y_pred)/len(y_pred)
print(val_acc*100,"%")

Test Categorical entropy loss: 0.0002
37.05 %
