We implemented a naive Sequential model which can contain naive Dense layers. Purpose is to understand the mathematics of neural networks. This model:-<br>
1. makes the forward pass<br>
2. calculates loss and gradients under GradientTape scope<br>
3. updates model weights based on gradients <br>
**Note:-**<br>
1. there is no option to choose metric, the model only monitors loss<br>
2. there is no predict method, evaluation has to be done manually<br>
3. user has to specify the input shape also in each dense layer<br>

In [None]:
import tensorflow as tf

class NaiveDense:
    def __init__(self, input_size, output_size, activation):
        self.activation = activation

        w_shape = (input_size, output_size)
        w_initial_value = tf.random.uniform(w_shape, minval=0, maxval=1e-1)
        self.W = tf.Variable(w_initial_value)

        b_shape = (output_size,)
        b_initial_value = tf.zeros(b_shape)
        self.b = tf.Variable(b_initial_value)

    def __call__(self, inputs):
        return self.activation(tf.matmul(inputs, self.W) + self.b)

    @property
    def weights(self):
        return [self.W, self.b]

In [None]:
import math

class BatchGenerator:
    def __init__(self, images, labels, batch_size=128):
        assert len(images) == len(labels)
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images) / batch_size)

    def next(self):
        images = self.images[self.index : self.index + self.batch_size]
        labels = self.labels[self.index : self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels

In [None]:
class NaiveSequential:
    def __init__(self, layers):
        self.layers = layers

    def __call__(self, inputs):
        x = inputs
        for layer in self.layers:
          x = layer(x)
        return x

    def compile(self, optimizer, loss):
        self.optimizer = optimizer
        self.loss = loss

    def one_training_step(self, images_batch, labels_batch):
        with tf.GradientTape() as tape:
            predictions = self(images_batch)
            per_sample_losses = self.loss(labels_batch, predictions)
            average_loss = tf.reduce_mean(per_sample_losses)
        gradients = tape.gradient(average_loss, self.weights)
        self.optimizer.apply_gradients(zip(gradients, self.weights))
        return average_loss

    def fit(self, images, labels, epochs, batch_size=128):
        for epoch_counter in range(epochs):
            batch_generator = BatchGenerator(images, labels, batch_size)
            for batch_counter in range(batch_generator.num_batches):
                images_batch, labels_batch = batch_generator.next()
                loss = self.one_training_step(images_batch, labels_batch)
                if batch_counter == batch_generator.num_batches - 1:
                    print(f"Epoch {epoch_counter + 1}\n  loss : {loss:.2f}")

    @property
    def weights(self):
        weights = []
        for layer in self.layers:
          weights += layer.weights
        return weights

Implementing our mock Sequential model

In [None]:
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

In [None]:
model = NaiveSequential([
        NaiveDense(input_size=28 * 28, output_size=512, activation=tf.nn.relu),
        NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax)
])

In [None]:
model.compile(tf.keras.optimizers.SGD(learning_rate=1e-3), tf.keras.losses.sparse_categorical_crossentropy)

In [None]:
model.fit(train_images, train_labels, epochs=10, batch_size=128)

Epoch 1
  loss : 2.04
Epoch 2
  loss : 1.80
Epoch 3
  loss : 1.57
Epoch 4
  loss : 1.38
Epoch 5
  loss : 1.23
Epoch 6
  loss : 1.11
Epoch 7
  loss : 1.02
Epoch 8
  loss : 0.94
Epoch 9
  loss : 0.88
Epoch 10
  loss : 0.83


In [None]:
import numpy as np

predictions = model(test_images)
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions, axis=1)
matches = predicted_labels == test_labels
print(f"accuracy: {matches.mean():.2f}")

accuracy: 0.82
