Objective:
Implement a Convolutional Neural Network (CNN) for handwritten digit classification using MNIST and compare performance with a baseline MLP.

Why CNN:
CNNs preserve spatial information and learn hierarchical visual features, making them superior for image data.

In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import  Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.datasets import mnist
import numpy as np


In [4]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#Normalise the data
X_train = X_train.astype(float) / 255.0
X_test = X_test.astype(float) / 255.0

# add dimension for CNN
X_train = np.expand_dims(X_train, -1)  # shape: (60000, 28, 28, 1)
X_test = np.expand_dims(X_test, -1)    # shape: (10000, 28, 28, 1)

print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)

Training data shape: (60000, 28, 28, 1)
Testing data shape: (10000, 28, 28, 1)


In [5]:
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    MaxPooling2D((2,2)),

    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()


In [9]:
history = model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=128,
    validation_split=0.2
)

Epoch 1/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.8935 - loss: 0.3497 - val_accuracy: 0.9791 - val_loss: 0.0705
Epoch 2/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9676 - loss: 0.1086 - val_accuracy: 0.9838 - val_loss: 0.0567
Epoch 3/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9770 - loss: 0.0769 - val_accuracy: 0.9877 - val_loss: 0.0407
Epoch 4/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9806 - loss: 0.0634 - val_accuracy: 0.9881 - val_loss: 0.0386
Epoch 5/5
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.9837 - loss: 0.0524 - val_accuracy: 0.9890 - val_loss: 0.0388


In [10]:
test_loss, test_acc = model.evaluate(X_test, y_test , verbose=2)
print("Test accuracy:", test_acc)

313/313 - 1s - 2ms/step - accuracy: 0.9893 - loss: 0.0324
Test accuracy: 0.989300012588501
