# MNIST with different APIs
### Christian Igel, 2020

We use TensorFlow 2.x:

In [0]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
%load_ext tensorboard

import matplotlib.pyplot as plt

Load, visualize and prepare the MNIST handwritten digit data set:

In [0]:
# Load
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

# Helper function for inspecting images
def visim(images, rows = 3, cols = 5, scale = 2):
    fig = plt.figure(figsize=(cols * scale, rows * scale))
    for img_index in range(0, rows*cols):
        fig.add_subplot(rows,  cols, img_index+1)
        plt.imshow(images[img_index])
visim(train_images, 2, 10)

# Reshape
train_images = train_images[..., tf.newaxis]
test_images = test_images[..., tf.newaxis]

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

First way to define a model:

In [0]:
# For ReLUs we want ampositive activation in the begining
mean_init = 0.05  # Mean of random bias activation
sd_init   = 0.01  # Standard deviaion of random bias activation

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(32, # Number of output feature maps
                                 (3, 3),  # Filter size
                                 activation='relu', 
                                 padding='valid',  # No padding
                                 bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init),
                                 input_shape=(28, 28, 1)))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu', bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init)))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu', bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init)))
model.add(tf.keras.layers.Flatten())  # Reshape feature maps into one long vector
model.add(tf.keras.layers.Dense(64, activation='relu', bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init)))
model.add(tf.keras.layers.Dense(10, activation='softmax'))  # Probability distribution over 10 classes

print(model.summary())
tf.keras.backend.clear_session()

Second way to define a model:

In [0]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1), bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu', bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init)),
    tf.keras.layers.Dense(10, activation='softmax')])

print(model.summary())
tf.keras.backend.clear_session()

Third way, the *functional interface*:

In [0]:
# For ReLUs we want ampositive activation in the begining
mean_init = 0.05  # Mean of random bias activation
sd_init   = 0.01  # Standard deviaion of random bias activation

inputs = tf.keras.Input(shape=(28, 28, 1))
l = tf.keras.layers.Conv2D(32, (3, 3), 
                           activation='relu', 
                           bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init))(inputs)
l = tf.keras.layers.MaxPooling2D((2, 2))(l)
l = tf.keras.layers.Conv2D(64, (3, 3), bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init), activation='relu')(l)
l = tf.keras.layers.MaxPooling2D((2, 2))(l)
l = tf.keras.layers.Conv2D(64, (3, 3), bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init), activation='relu')(l)
l = tf.keras.layers.Flatten()(l)
l = tf.keras.layers.Dense(64, activation='relu', bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init))(l)
predictions = tf.keras.layers.Dense(10, activation='softmax')(l)

# Instantiate model
model = tf.keras.Model(inputs=inputs, outputs=predictions)

print(model.summary())
tf.keras.backend.clear_session()

Fourth way, *model subclassing API*:

In [0]:
class MyModel(tf.keras.Model):
 def __init__(self, name='Simple_MNIST_Model', **kwargs):
   super(MyModel, self).__init__(name=name, **kwargs)
   # Define your layers here.
   self.conv_1 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1), use_bias='True', bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init))
   self.pool_1 = tf.keras.layers.MaxPooling2D((2, 2))
   self.conv_2 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init))
   self.pool_2 = tf.keras.layers.MaxPooling2D((2, 2))
   self.conv_3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init))
   self.flat = tf.keras.layers.Flatten()
   self.dense = tf.keras.layers.Dense(64, activation='relu', bias_initializer=tf.initializers.TruncatedNormal(mean=mean_init, stddev=sd_init))
   self.predictions = tf.keras.layers.Dense(10, activation='softmax')
 def call(self, inputs, training=None):
   # Define your forward pass here,
   # using layers you previously defined in `__init__`
   # With the Boolean training flag, you can have different bahavior in training and testing 
   # (e.g., needed for dropout, batch normalization, ...)
   return self.predictions(self.dense(self.flat(self.conv_3(self.pool_2(self.conv_2(self.pool_1(self.conv_1(inputs))))))))

model = MyModel()

We look inspect the training using TensorBoard:

In [0]:
# Clean the logs first 
!mkdir -p /tmp/logs
!rm -rf /tmp/logs/MNIST_simple

In [0]:
%tensorboard --logdir /tmp/logs/ --host localhost

Train the model:

In [0]:
callbacks = [
  tf.keras.callbacks.TensorBoard(log_dir='/tmp/logs/MNIST_simple', profile_batch=0)
]
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_images, train_labels, batch_size=32, epochs=20, callbacks=callbacks)

In [0]:
test_loss, test_acc = model.evaluate(test_images, test_labels)
print("Test accuracy:", test_acc)