In [132]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import numpy as np

In [133]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [134]:
#The MNIST dataset has input features of 28x28. We need 32x32 so converting it to that.
train_images = tf.pad(train_images, [[0, 0], [2,2], [2,2]])/255
test_images = tf.pad(test_images, [[0, 0], [2,2], [2,2]])/255

In [135]:
train_images = tf.expand_dims(train_images, axis=3, name=None)
test_images = tf.expand_dims(test_images, axis=3, name=None)

In [136]:
class LeNet5(tf.Module):
    def __init__(self):
        self.conv1 = tf.Variable(tf.random.normal([5, 5, 1, 6]))
        self.bias1 = tf.Variable(tf.random.normal([6]))
        self.conv2 = tf.Variable(tf.random.normal([5, 5, 6, 16]))
        self.bias2 = tf.Variable(tf.random.normal([16]))
        self.final_pooling = tf.Variable(tf.random.normal([5 * 5 * 16, 120]))
        self.bias3 = tf.Variable(tf.random.normal([120]))
        self.fc1 = tf.Variable(tf.random.normal([120, 84]))
        self.bias4 = tf.Variable(tf.random.normal([84]))
        self.fc2 = tf.Variable(tf.random.normal([84, 10]))
        self.bias5 = tf.Variable(tf.random.normal([10]))

    def __call__(self, x):
        x = tf.nn.conv2d(x, self.conv1, strides=1, padding='VALID') + self.bias1
        x = tf.nn.tanh(x)
        x = tf.nn.avg_pool(x, ksize=2, strides=2, padding='VALID')

        x = tf.nn.conv2d(x, self.conv2, strides=1, padding='VALID') + self.bias2
        x = tf.nn.tanh(x)
        x = tf.nn.avg_pool(x, ksize=2, strides=2, padding='VALID')

        x = tf.reshape(x, (-1, 5 * 5 * 16))

        x = tf.matmul(x, self.final_pooling) + self.bias3
        x = tf.nn.tanh(x)

        x = tf.matmul(x, self.fc1) + self.bias4
        x = tf.nn.tanh(x)

        logits = tf.matmul(x, self.fc2) + self.bias5
        probabilities = tf.nn.softmax(logits)  
        return probabilities

In [137]:
model = LeNet5()  

In [138]:
optimizer = tf.optimizers.Adam()



In [139]:
def sparse_crossentropy(y_true, y_pred):
    y_true = tf.cast(y_true, tf.int64)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred)
    return tf.reduce_mean(loss)

loss_object = sparse_crossentropy

In [140]:
#Training the data
epochs = 30
batch_size = 64

for epoch in range(epochs):
    for i in range(0, len(train_images), batch_size):
        batch_images = train_images[i:i + batch_size]
        batch_labels = train_labels[i:i + batch_size]

        with tf.GradientTape() as tape:
            logits = model(batch_images)
            loss = loss_object(batch_labels, logits)

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_logits = model(train_images)
    train_predicted = tf.argmax(train_logits, axis=1, output_type=tf.int32)
    train_accuracy = tf.reduce_mean(tf.cast(tf.equal(train_predicted, train_labels), dtype=tf.float32))
    print(f'Epoch {epoch + 1}, Loss: {loss.numpy()}, Accuracy: {train_accuracy.numpy()}')

Epoch 1, Loss: 1.9219536781311035, Accuracy: 0.5027166604995728
Epoch 2, Loss: 1.8210641145706177, Accuracy: 0.593583345413208
Epoch 3, Loss: 1.8380365371704102, Accuracy: 0.6223166584968567
Epoch 4, Loss: 1.756899118423462, Accuracy: 0.6869000196456909
Epoch 5, Loss: 1.546610713005066, Accuracy: 0.7529833316802979
Epoch 6, Loss: 1.5719115734100342, Accuracy: 0.7829833626747131
Epoch 7, Loss: 1.5677082538604736, Accuracy: 0.7882000207901001
Epoch 8, Loss: 1.5819745063781738, Accuracy: 0.7941499948501587
Epoch 9, Loss: 1.5599223375320435, Accuracy: 0.8023499846458435
Epoch 10, Loss: 1.5687637329101562, Accuracy: 0.8097333312034607
Epoch 11, Loss: 1.5793914794921875, Accuracy: 0.8156499862670898
Epoch 12, Loss: 1.5246639251708984, Accuracy: 0.8607666492462158
Epoch 13, Loss: 1.5275018215179443, Accuracy: 0.8978999853134155
Epoch 14, Loss: 1.465390682220459, Accuracy: 0.9088000059127808
Epoch 15, Loss: 1.4732043743133545, Accuracy: 0.9146333336830139
Epoch 16, Loss: 1.5252611637115479, Ac

In [141]:
#Testing the data
test_logits = model(test_images)
predictions = tf.argmax(test_logits, axis=1, output_type=tf.int32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, test_labels), dtype=tf.float32))
print(f' Test Accuracy: {accuracy.numpy()}')

 Test Accuracy: 0.9416000247001648
