In [1]:
import tensorflow as tf
import numpy as np
import tensorflow.keras as keras

In [2]:
class MyDense(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        super(MyDense, self).__init__(**kwargs)
        self.units = units
        self.activation = keras.activations.get(activation)
    
    def build(self, batch_input_shape):
        self.kernel = self.add_weight("kernel", shape=[batch_input_shape[-1], self.units],
                                       initializer='he_normal')
        self.bias = self.add_weight('bias', shape=[self.units], initializer='zeros')

    def call(self, X):
        return self.activation(X @ self.kernel + self.bias)

    def compute_output_shape(self, batch_input_shape):
        return tf.TensorShape([batch_input_shape.as_list()[:-1] + [self.units]])
    
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, 'units': self.units,
                'activation': keras.activations.serialize(self.activation)}


In [3]:
class MLPClassifier(keras.Model):
    def __init__(self, hidden_layer_sizes=[100,], activation=None):
        super(MLPClassifier, self).__init__()
        self.hidden_layer_sizes = hidden_layer_sizes
        self.num_layers = len(hidden_layer_sizes)
        self.activation = activation
        self.layerz = [MyDense(unit, activation) for unit in hidden_layer_sizes[:-1]]
        self.out = MyDense(hidden_layer_sizes[-1], activation='softmax')

    def call(self, X):
        for i in range(self.num_layers-1):
            X = self.layerz[i](X)
        X = self.out(X)
        return X

In [4]:
#  Importing Dataset

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = np.reshape(x_train, (x_train.shape[0], 784))/255.         # scaling, so that value lies between (0,1)
x_test = np.reshape(x_test, (x_test.shape[0], 784))/255.
y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)

x_val = x_train[55000:]
x_train = x_train[:55000]
y_val = y_train[55000:]
y_train = y_train[:55000]

print((x_train.shape), (x_val.shape), (x_test.shape))

(55000, 784) (5000, 784) (10000, 784)


In [5]:
mlp = MLPClassifier(hidden_layer_sizes=[784, 128, 128, 10], activation='relu')

In [6]:
mlp.build([None, 784])  # build with input shape is necessary to see summary
mlp.summary()

Model: "mlp_classifier"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
my_dense (MyDense)           multiple                  615440    
_________________________________________________________________
my_dense_1 (MyDense)         multiple                  100480    
_________________________________________________________________
my_dense_2 (MyDense)         multiple                  16512     
_________________________________________________________________
my_dense_3 (MyDense)         multiple                  1290      
Total params: 733,722
Trainable params: 733,722
Non-trainable params: 0
_________________________________________________________________


In [7]:
mlp.compile(optimizer='adam', loss='categorical_crossentropy',
             metrics=['accuracy'])

In [8]:
mlp.fit(x_train, y_train, batch_size=256, epochs=20, verbose=1, validation_data=(x_val, y_val))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f96b126aba8>

In [9]:
tloss, tacc = mlp.evaluate(x_test, y_test)
print(f'Test Accuracy :: {tacc*100:.2f}')

Test Accuracy :: 97.90
