In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[![View on GitHub][github-badge]][github-keras-v3] [![Open In Colab][colab-badge]][colab-keras-v3] [![Open in Binder][binder-badge]][binder-keras-v3]

[github-badge]: https://img.shields.io/badge/View-on%20GitHub-blue?logo=GitHub
[colab-badge]: https://colab.research.google.com/assets/colab-badge.svg
[binder-badge]: https://static.mybinder.org/badge_logo.svg

[github-keras-v3]: LeNet_v3_Subsamping_fixed_scaling_and_learning_rate_decay_in_Keras.ipynb
[colab-keras-v3]: https://colab.research.google.com/github/mbrukman/reimplementing-ml-papers/blob/main/lenet/LeNet_v3_Subsamping_fixed_scaling_and_learning_rate_decay_in_Keras.ipynb
[binder-keras-v3]: https://mybinder.org/v2/gh/mbrukman/reimplementing-ml-papers/main?filepath=lenet/LeNet_v3_Subsamping_fixed_scaling_and_learning_rate_decay_in_Keras.ipynb

In [None]:
import numpy as np

from tensorflow import keras
from keras import Input, Sequential
from keras.layers import AveragePooling2D, Conv2D, Dense, Flatten

In [None]:
# Download and import custom Subsampling layer.
!curl -sO https://raw.githubusercontent.com/mbrukman/reimplementing-ml-papers/main/lenet/subsampling.py
from subsampling import Subsampling

In [None]:
%pip install -q -U 'einops==0.4'
import einops

In [None]:
# Load the MNIST dataset.
(x_train_raw, y_train_raw), (x_test_raw, y_test_raw) = keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
# Examine the dataset shape.
print('Raw data:')
print(f'Train x: {x_train_raw.shape}')
print(f'      y: {y_train_raw.shape}')
print(f'Test  x: {x_test_raw.shape}')
print(f'      y: {y_test_raw.shape}')

Raw data:
Train x: (60000, 28, 28)
      y: (60000,)
Test  x: (10000, 28, 28)
      y: (10000,)


In [None]:
num_classes = 10

# Add a channel dimension so that our inputs have the dimension (28, 28, 1)
# rather than (28, 28). This is done by converting our input of
# (batch, width, height) -> (batch, width, height, channels) with channels=1.
#
# In this specific case, this reshapes (60000, 28, 28) -> (60000, 28, 28, 1).
#
# Although this is similar to expanding dimensions via NumPy as follows:
#
#     x_train = np.expand_dims(x_train_raw, -1)
#     x_test = np.expand_dims(x_test_raw, -1)
#
# the approach with `einops` makes it much more readable and understandable.
x_train = einops.rearrange(x_train_raw, 'b w h -> b w h ()')
x_test = einops.rearrange(x_test_raw, 'b w h -> b w h ()')

# Scale train and test inputs by converting them from range of [0, 255] to
# [-0.1, 1.175]. From the LeNet paper, pg. 7:
#
#     "The values of the input pixels are normalized so that the background level
#      white corresponds to a value of -0.1 and the foreground black corresponds
#      to 1.175. This makes the mean input roughly 0 and the variance roughly 1
#      which accelerates learning."
lower_bound = -0.1
upper_bound = 1.175
x_train = x_train_raw.astype('float32') / 255.0 * (upper_bound - lower_bound) - abs(lower_bound)
x_test = x_test_raw.astype('float32') / 255.0 * (upper_bound - lower_bound) - abs(lower_bound)

# Convert the output to categorical one-hot encoding to match the output of our
# network.
y_train = keras.utils.to_categorical(y_train_raw, num_classes)
y_test = keras.utils.to_categorical(y_test_raw, num_classes)

print('\nProcessed data:')
print(f'Train x: {x_train.shape}')
print(f'      y: {y_train.shape}')
print(f'Test  x: {x_test.shape}')
print(f'      y: {y_test.shape}')


Processed data:
Train x: (60000, 28, 28)
      y: (60000, 10)
Test  x: (10000, 28, 28)
      y: (10000, 10)


In [None]:
# Examine the format of the Y values before and after preprocessing.
print('Y values before preprocessing:')
print(y_train_raw[0:4])

print('\nY values after preprocessing:')
print(y_train[0:4])

Y values before preprocessing:
[5 0 4 1]

Y values after preprocessing:
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [None]:
def lenet_activation(a: float) -> float:
    A = 1.7159
    S = 2. / 3
    return A * keras.activations.tanh(S * a)

In [None]:
model = Sequential([
    Input(shape=(28, 28, 1)),
    Conv2D(filters=6, kernel_size=(5, 5), padding='same', activation=lenet_activation, name='C1'),
    Subsampling(pool_size=(2, 2), strides=(2, 2), activation=lenet_activation, name='S2'),
    Conv2D(filters=16, kernel_size=(5, 5), activation=lenet_activation, name='C3'),
    Subsampling(pool_size=(2, 2), strides=(2, 2), activation=lenet_activation, name='S4'),
    Flatten(),
    Dense(120, activation=lenet_activation, name='C5'),
    Dense(84, activation=lenet_activation, name='F6'),
    Dense(10, activation='softmax', name='Output'),
], name='LeNet-5')

model.summary()

Model: "LeNet-5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 C1 (Conv2D)                 (None, 28, 28, 6)         156       
                                                                 
 S2 (Subsampling)            (None, 14, 14, 6)         12        
                                                                 
 C3 (Conv2D)                 (None, 10, 10, 16)        2416      
                                                                 
 S4 (Subsampling)            (None, 5, 5, 16)          32        
                                                                 
 flatten (Flatten)           (None, 400)               0         
                                                                 
 C5 (Dense)                  (None, 120)               48120     
                                                                 
 F6 (Dense)                  (None, 84)                1016

In [None]:
def scheduler(epoch: int, lr: float) -> float:
    if epoch < 2:
        eta = 0.0005
    elif epoch < 5:
        eta = 0.0002
    elif epoch < 8:
        eta = 0.0001
    elif epoch < 12:
        eta = 0.00005
    else:
        eta = 0.00001

    mu = 0.02
    h_kk = 1
    return eta / (mu + h_kk)

lr_callback = keras.callbacks.LearningRateScheduler(scheduler)

In [None]:
# Compile the model with optimizer and loss function.
opt = keras.optimizers.Adam(learning_rate=0.0005)
loss_fn = keras.losses.CategoricalCrossentropy()
model.compile(optimizer=opt, loss=loss_fn, metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(x_train, y_train, epochs=20, callbacks=[lr_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
# Evaluate the model
model.evaluate(x_test, y_test)



[0.04284289851784706, 0.9864000082015991]