In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[![View on GitHub][github-badge]][github-keras-v2] [![Open In Colab][colab-badge]][colab-keras-v2] [![Open in Binder][binder-badge]][binder-keras-v2]

[github-badge]: https://img.shields.io/badge/View-on%20GitHub-blue?logo=GitHub
[colab-badge]: https://colab.research.google.com/assets/colab-badge.svg
[binder-badge]: https://static.mybinder.org/badge_logo.svg

[github-keras-v2]: LeNet_v2_custom_Subsampling_layer_and_activation_in_Keras.ipynb
[colab-keras-v2]: https://colab.research.google.com/github/mbrukman/reimplementing-ml-papers/blob/main/lenet/LeNet_v2_custom_Subsampling_layer_and_activation_in_Keras.ipynb
[binder-keras-v2]: https://mybinder.org/v2/gh/mbrukman/reimplementing-ml-papers/main?filepath=lenet/LeNet_v2_custom_Subsampling_layer_and_activation_in_Keras.ipynb

In [None]:
import numpy as np

from tensorflow import keras
from keras import Input, Sequential
from keras.layers import AveragePooling2D, Conv2D, Dense, Flatten

In [None]:
# Download and import custom Subsampling layer.
!curl -sO https://raw.githubusercontent.com/mbrukman/reimplementing-ml-papers/main/lenet/subsampling.py
from subsampling import Subsampling

For details on the MNIST dataset including a data exploration, see [MNIST directory in my repo](https://github.com/mbrukman/reimplementing-ml-papers/tree/main/datasets/mnist).

Here, we will import a shared library to process the MNIST dataset into the format that we need to use below for model training and testing.

In [None]:
# Download and import our library for processing MNIST dataset.
!curl -sO https://raw.githubusercontent.com/mbrukman/reimplementing-ml-papers/main/datasets/mnist/mnist.py
from mnist import MNIST

In [None]:
def scaled_tanh(a: float) -> float:
    A = 1.7159
    S = 2. / 3
    return A * keras.activations.tanh(S * a)

softmax = keras.activations.softmax

# Define the model architecture.
model = Sequential([
    Input(shape=(28, 28, 1)),
    Conv2D(filters=6, kernel_size=(5, 5), activation=scaled_tanh, padding='same', name='C1'),
    Subsampling(pool_size=(2, 2), strides=(2, 2), activation=scaled_tanh, name='S2'),
    Conv2D(filters=16, kernel_size=(5, 5), activation=scaled_tanh, name='C3'),
    Subsampling(pool_size=(2, 2), strides=(2, 2), activation=scaled_tanh, name='S4'),
    Flatten(),
    Dense(120, activation=scaled_tanh, name='C5'),
    Dense(84, activation=scaled_tanh, name='F6'),
    Dense(10, activation=softmax, name='Output'),
], name='LeNet-5')

model.summary()

Model: "LeNet-5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 C1 (Conv2D)                 (None, 28, 28, 6)         156       
                                                                 
 S2 (Subsampling)            (None, 14, 14, 6)         12        
                                                                 
 C3 (Conv2D)                 (None, 10, 10, 16)        2416      
                                                                 
 S4 (Subsampling)            (None, 5, 5, 16)          32        
                                                                 
 flatten (Flatten)           (None, 400)               0         
                                                                 
 C5 (Dense)                  (None, 120)               48120     
                                                                 
 F6 (Dense)                  (None, 84)                1016

In [None]:
# Compile the model with optimizer and loss function.
opt = keras.optimizers.Adam(learning_rate=0.001)
loss_fn = keras.losses.CategoricalCrossentropy()
model.compile(optimizer=opt, loss=loss_fn, metrics=['accuracy'])

In [None]:
%%capture --no-stderr
mnist = MNIST()

In [None]:
# Train the model.
#
# In this notebook, we scale the input into the range [0.0, 1.0] and convert the
# labels y to a categorical (one-hot) encoding from the default numeric values.
#
# For consistency, we use the same transformations for the test dataset below.
model.fit(mnist.x_train_scale_0_1(), mnist.y_train_categorical(), epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f9810142c70>

In [None]:
# Evaluate the model.
#
# Note that we use the same input range scaling and label encoding as above.
model.evaluate(mnist.x_test_scale_0_1(), mnist.y_test_categorical())



[0.0722367987036705, 0.9837999939918518]