In [13]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt

In [11]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((60000, 28, 28), (10000, 28, 28), (60000,), (10000,))

In [14]:
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]
x_train.shape, x_test.shape

((60000, 28, 28, 1), (10000, 28, 28, 1))

In [15]:
x_train = tf.image.resize_with_pad(x_train, 32, 32)
x_test = tf.image.resize_with_pad(x_test, 32, 32)
x_train.shape, x_test.shape

(TensorShape([60000, 32, 32, 1]), TensorShape([10000, 32, 32, 1]))

In [18]:
x_train = tf.cast(x_train, tf.float32) / 255.0
x_test = tf.cast(x_test, tf.float32) / 255.0

x_train.shape, x_test.shape

(TensorShape([60000, 32, 32, 1]), TensorShape([10000, 32, 32, 1]))

In [58]:
y_train = keras.utils.to_categorical(y_train, len(np.unique(y_train)))
y_test = keras.utils.to_categorical(y_test, len(np.unique(y_test)))

y_train.shape, y_test.shape

((60000, 10), (10000, 10))

In [49]:
lenet5 = keras.models.Sequential([
    layers.Input(shape = (32, 32, 1)),
    layers.Conv2D(6, (5, 5), activation = "tanh"),
    layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),
    layers.Conv2D(16, (5, 5), activation = "tanh"),
    layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),
    layers.Conv2D(120, (5, 5), activation = "tanh"),
    layers.Flatten(),
    layers.Dense(84, activation = "tanh"),
    layers.Dense(10, activation = "softmax")
])
lenet5.summary()

Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_76 (Conv2D)          (None, 28, 28, 6)         156       
                                                                 
 max_pooling2d_15 (MaxPooli  (None, 14, 14, 6)         0         
 ng2D)                                                           
                                                                 
 conv2d_77 (Conv2D)          (None, 10, 10, 16)        2416      
                                                                 
 max_pooling2d_16 (MaxPooli  (None, 5, 5, 16)          0         
 ng2D)                                                           
                                                                 
 conv2d_78 (Conv2D)          (None, 1, 1, 120)         48120     
                                                                 
 flatten_19 (Flatten)        (None, 120)             

My implementation is slightly different from the the original paper. In the original LeNet-5 they have used some kind of non-complete connection scheme after first downsampling. Also, they have used RBF in the final layer, and with RBF they have used a different kind of MAP criterain based loss (maximum a posteriori). However the number of parameters described in the papers are same at 60K.

In [60]:
early_stopping = keras.callbacks.EarlyStopping(monitor = "val_loss", restore_best_weights = True, patience = 5, verbose = 1)
lenet5.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

history = lenet5.fit(x_train, y_train, epochs = 20, batch_size = 32, validation_data=(x_test, y_test), callbacks = [early_stopping])

# Evaluate the model on test data
test_loss, test_acc = lenet5.evaluate(x_test, y_test)

print(f'Test accuracy: {test_acc}')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 8: early stopping
Test accuracy: 0.9886999726295471
