In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Load and preprocess the MNIST dataset
# The dataset contains 60,000 training images and 10,000 test images
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Reshape the images to add a channel dimension (grayscale images have 1 channel)
x_train = x_train.reshape((x_train.shape[0], 28, 28, 1)).astype('float32') / 255
x_test = x_test.reshape((x_test.shape[0], 28, 28, 1)).astype('float32') / 255

# One-hot encode the labels (e.g., 5 -> [0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Build the CNN model
model = models.Sequential()

# First convolutional layer
# - 32 filters
# - 3x3 kernel size
# - ReLU activation
# - Input shape specified for the first layer
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))

# Add a max pooling layer to reduce spatial dimensions
model.add(layers.MaxPooling2D((2, 2)))

# Second convolutional layer
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Add another max pooling layer
model.add(layers.MaxPooling2D((2, 2)))

# Third convolutional layer
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Flatten the output to feed into a dense layer
model.add(layers.Flatten())

# Fully connected (dense) layer with 64 neurons
model.add(layers.Dense(64, activation='relu'))

# Output layer with 10 neurons (one for each class) and softmax activation
model.add(layers.Dense(10, activation='softmax'))

# Compile the model
# - Loss function: categorical crossentropy (suitable for multi-class classification)
# - Optimizer: Adam
# - Metric: Accuracy
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Print the model summary to understand its architecture
model.summary()

# Train the model
# - Batch size: 64
# - Epochs: 5 (can be increased for better accuracy)
history = model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# Evaluate the model on the test dataset
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_accuracy:.2f}")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 79ms/step - accuracy: 0.8575 - loss: 0.4769 - val_accuracy: 0.9812 - val_loss: 0.0626
Epoch 2/5
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 63ms/step - accuracy: 0.9821 - loss: 0.0595 - val_accuracy: 0.9855 - val_loss: 0.0518
Epoch 3/5
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 61ms/step - accuracy: 0.9872 - loss: 0.0401 - val_accuracy: 0.9889 - val_loss: 0.0425
Epoch 4/5
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 59ms/step - accuracy: 0.9915 - loss: 0.0275 - val_accuracy: 0.9863 - val_loss: 0.0472
Epoch 5/5
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 59ms/step - accuracy: 0.9931 - loss: 0.0219 - val_accuracy: 0.9852 - val_loss: 0.0486
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.9830 - loss: 0.0519
Test accuracy: 0.99
