In [None]:
import tensorflow as tf
from tensorflow import keras

import numpy as np

from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

# Preparing MNIST Dataset

In [None]:
batch_size = 64

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

x_train = (x_train - 127.5)/127.5
x_test = (x_test - 127.5)/127.5

x_train = np.reshape(x_train, [-1, 784])
x_test = np.reshape(x_test, [-1, 784])

x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size = 4096).batch(batch_size)

val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(batch_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


# MLP from Scratch

In [None]:
initializer = tf.keras.initializers.GlorotUniform()

class Dense(tf.Module):
  def __init__(self, units, activation = None, name = None):
    super().__init__(name = name)
    self.is_built = False
    self.units = units
    if activation == 'relu':
      self.activation = tf.nn.relu
    elif activation == 'sigmoid':
      self.activation = tf.nn.sigmoid
    elif activation == 'softmax':
      self.activation = tf.nn.softmax
    elif activation == 'tanh':
      self.activation = tf.nn.tanh
    else:
      self.activation = tf.keras.activations.linear
  
  def __call__(self, x):
    if not self.is_built:
      self.W = tf.Variable(initializer([x.shape[-1], self.units]), name = 'w')
      self.b = tf.Variable(initializer([self.units]), name = 'b')
      self.is_built = True

    y = tf.matmul(x, self.W) + self.b
    return self.activation(y)

In [None]:
class Sequential(tf.Module):
  def __init__(self, name = None):
    super().__init__(name = name)
    self.dense_1 = Dense(units = 128, activation = 'relu')
    self.dense_2 = Dense(units = 64, activation = 'relu')
    self.output = Dense(units = 10, activation = 'softmax')

  def __call__(self, x):
    x = self.dense_1(x)
    x = self.dense_2(x)
    return self.output(x)

In [None]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate= 1e-3)

In [None]:
model = Sequential()

In [None]:
epochs = 2

for epoch in range(epochs):
  print('\nEpoch %d'%(epoch))
  for step, (x_batch_train,y_batch_train) in enumerate(train_dataset):
    with tf.GradientTape() as tape:
      logits = model(x_batch_train)
      loss_value = loss_fn(y_batch_train, logits)

    grads = tape.gradient(loss_value, model.variables)
    optimizer.apply_gradients(zip(grads, model.variables))

    if not(step % 200):
      print('Step %3d: %lf'%(step, float(loss_value)))



Epoch 0
Step   0: 2.618264
Step 200: 0.560044
Step 400: 0.173565
Step 600: 0.227689

Epoch 1
Step   0: 0.160159
Step 200: 0.322173
Step 400: 0.099673
Step 600: 0.126099


In [None]:
y_predicted = np.argmax(model(x_test), axis = 1)
print(confusion_matrix(y_test, y_predicted))
print(classification_report(y_test, y_predicted))

[[ 951    0    2    1    0    3   13    4    5    1]
 [   0 1119    3    2    0    0    6    1    4    0]
 [   6    1 1005    1    2    0    5    4    6    2]
 [   1    1   18  956    0    3    0    6   16    9]
 [   1    0   23    0  919    2   10    5    1   21]
 [   7    2    5   31    0  803   15    3   21    5]
 [   4    1    3    0    2    2  943    0    3    0]
 [   0    9   30    1    1    1    0  960    1   25]
 [   3    1   10    4    7    5    8    4  928    4]
 [   2    6    2    6   16    2    2    6   12  955]]
              precision    recall  f1-score   support

           0       0.98      0.97      0.97       980
           1       0.98      0.99      0.98      1135
           2       0.91      0.97      0.94      1032
           3       0.95      0.95      0.95      1010
           4       0.97      0.94      0.95       982
           5       0.98      0.90      0.94       892
           6       0.94      0.98      0.96       958
           7       0.97      0.93   

# Building your own MLP Layer and Model from Keras API

In [None]:
initializer = tf.keras.initializers.GlorotUniform()

class MyDense(tf.keras.layers.Layer):
  def __init__(self, units, activation = None, **kwargs):
    super().__init__(**kwargs)
    self.units = units
    if activation == 'relu':
      self.activation = tf.nn.relu
    elif activation == 'sigmoid':
      self.activation = tf.nn.sigmoid
    elif activation == 'softmax':
      self.activation = tf.nn.softmax
    elif activation == 'tanh':
      self.activation = tf.nn.tanh
    else:
      self.activation = tf.keras.activations.linear
    
  def build(self, input_shape):
    self.W = tf.Variable(initializer([input_shape[-1], self.units]), name = 'w')
    self.b = tf.Variable(initializer([self.units]), name = 'b')

  def call(self, x):
    y = tf.matmul(x, self.W) + self.b
    return self.activation(y)

In [None]:
class MyModel(tf.keras.Model):
  def __init__(self, name = None, **kwargs):
    super().__init__(**kwargs)

    self.dense1 = MyDense(64, activation= 'relu')
    self.dense2 = MyDense(32, activation= 'relu')
    self.out_1 = MyDense(10, activation= 'softmax')

  def call(self, x):
    x = self.dense1(x)
    x = self.dense2(x)
    return self.out_1(x)

In [None]:
model2 = MyModel()
loss_fn = keras.losses.SparseCategoricalCrossentropy()
model2.compile(loss=loss_fn, optimizer='adam')
model2.build(input_shape = (None,784))
model2.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 my_dense (MyDense)          multiple                  50240     
                                                                 
 my_dense_1 (MyDense)        multiple                  2080      
                                                                 
 my_dense_2 (MyDense)        multiple                  330       
                                                                 
Total params: 52,650
Trainable params: 52,650
Non-trainable params: 0
_________________________________________________________________


In [None]:
model2.fit(x_train, y_train, batch_size=64, epochs=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7fac8220dc50>

In [None]:
y_predicted = np.argmax(model2(x_test), axis = 1)
print(confusion_matrix(y_test, y_predicted))
print(classification_report(y_test, y_predicted))

[[ 964    0    1    0    0    5    7    1    2    0]
 [   0 1108    3    2    0    8    3    1   10    0]
 [   5    2  987    9    3    4    2    7   11    2]
 [   1    0   11  902    1   73    0   10   10    2]
 [   1    0    6    0  939    1    9    2    2   22]
 [   6    0    1    3    6  864    8    1    2    1]
 [   4    1    3    0   15   20  914    0    1    0]
 [   0    8   18    6    8    5    0  967    1   15]
 [   7    4    6   16   11   57   13    4  852    4]
 [   6    6    1    6   49   28    1    8    2  902]]
              precision    recall  f1-score   support

           0       0.97      0.98      0.98       980
           1       0.98      0.98      0.98      1135
           2       0.95      0.96      0.95      1032
           3       0.96      0.89      0.92      1010
           4       0.91      0.96      0.93       982
           5       0.81      0.97      0.88       892
           6       0.96      0.95      0.95       958
           7       0.97      0.94   

# Building MLP from Keras

In [None]:
model3 = tf.keras.Sequential()
model3.add(tf.keras.layers.Dense(64, activation = 'relu'))
model3.add(tf.keras.layers.Dense(32, activation = 'relu'))
model3.add(tf.keras.layers.Dense(10, activation = 'softmax'))

loss_fn = keras.losses.SparseCategoricalCrossentropy()
model3.compile(loss=loss_fn, optimizer='adam')
model3.build(input_shape = (None,784))
model3.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                50240     
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 10)                330       
                                                                 
Total params: 52,650
Trainable params: 52,650
Non-trainable params: 0
_________________________________________________________________


In [None]:
model3.fit(x_train, y_train, batch_size=64, epochs=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f18fa241790>

In [None]:
y_predicted = np.argmax(model(x_test), axis = 1)
print(confusion_matrix(y_test, y_predicted))
print(classification_report(y_test, y_predicted))

[[ 970    0    3    0    0    1    2    1    3    0]
 [   0 1116    4    2    0    1    3    1    8    0]
 [   6    3 1004    3    4    1    2    6    3    0]
 [   0    0   15  966    1    6    0   11    9    2]
 [   1    0    8    0  944    0    4    2    7   16]
 [   5    4    1   17    2  845    5    0    9    4]
 [  12    4    3    0   13   21  902    0    3    0]
 [   1   10   19    1    0    0    0  986    3    8]
 [   5    2    7   18    7   10    5   11  909    0]
 [   7    7    2    6   23    6    0   11   11  936]]
              precision    recall  f1-score   support

           0       0.96      0.99      0.98       980
           1       0.97      0.98      0.98      1135
           2       0.94      0.97      0.96      1032
           3       0.95      0.96      0.96      1010
           4       0.95      0.96      0.96       982
           5       0.95      0.95      0.95       892
           6       0.98      0.94      0.96       958
           7       0.96      0.96   