<a href="https://colab.research.google.com/github/rajeevrpandey/High-Accuracy-CNN-for-MNIST/blob/main/High_Accuracy_CNN_for_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

In [2]:
# Load the MNIST dataset (handwritten digits 0-9)
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.mnist.load_data()
# Normalize pixel values to the range [0,1] for better training performance
X_train_full = X_train_full / 255.
X_test = X_test / 255.
# Split the training data into training (55,000 samples) and validation (5,000 samples) sets
X_train, X_valid = X_train_full[:-5000], X_train_full[-5000:]
y_train, y_valid = y_train_full[:-5000], y_train_full[-5000:]

# Expand the dimensions to match the expected input shape for Conv2D (adding channel dimension)
X_train = X_train[..., np.newaxis]
X_valid = X_valid[..., np.newaxis]
X_test = X_test[..., np.newaxis]

In [3]:
# Clear the previous session to free memory (useful when running multiple models in a script)
keras.backend.clear_session()
# Set random seeds for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

In [4]:
# Build a Convolutional Neural Network (CNN) model
model = keras.models.Sequential([
    # First convolutional layer: 32 filters, 3x3 kernel, ReLU activation, 'same' padding
    keras.layers.Conv2D(32, kernel_size=3, padding="same", activation="relu"),
    # Second convolutional layer: 64 filters, 3x3 kernel, ReLU activation, 'same' padding
    keras.layers.Conv2D(64, kernel_size=3, padding="same", activation="relu"),
    # Max pooling layer: Reduces spatial dimensions (downsampling)
    keras.layers.MaxPool2D(),
    # Flatten layer: Converts 2D feature maps into a 1D feature vector
    keras.layers.Flatten(),
    # Dropout layer (25%): Helps prevent overfitting by randomly deactivating neurons
    keras.layers.Dropout(0.25),
    # Fully connected dense layer with 128 neurons and ReLU activation
    keras.layers.Dense(128, activation="relu"),
    # Dropout layer (50%): Further regularization to prevent overfitting
    keras.layers.Dropout(0.5),
    # Output layer: 10 neurons (one for each digit), softmax activation for classification
    keras.layers.Dense(10, activation="softmax")
])

In [5]:
# Compile the model with categorical cross-entropy loss, Nadam optimizer, and accuracy metric
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam",
              metrics=["accuracy"])

In [6]:
# Train the model for 10 epochs using the training set and validate on the validation set
model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))
# Evaluate the model on the test set to measure final performance
model.evaluate(X_test, y_test)

Epoch 1/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 111ms/step - accuracy: 0.8781 - loss: 0.3912 - val_accuracy: 0.9868 - val_loss: 0.0537
Epoch 2/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 110ms/step - accuracy: 0.9729 - loss: 0.0936 - val_accuracy: 0.9882 - val_loss: 0.0415
Epoch 3/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 110ms/step - accuracy: 0.9794 - loss: 0.0672 - val_accuracy: 0.9888 - val_loss: 0.0439
Epoch 4/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 110ms/step - accuracy: 0.9844 - loss: 0.0538 - val_accuracy: 0.9896 - val_loss: 0.0417
Epoch 5/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 110ms/step - accuracy: 0.9859 - loss: 0.0460 - val_accuracy: 0.9912 - val_loss: 0.0386
Epoch 6/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m204s[0m 111ms/step - accuracy: 0.9873 - loss: 0.0411 - val_accuracy: 0.9918 - val_loss:

[0.03417564928531647, 0.9915000200271606]