In [0]:
# get tensorflow 2.0 (run once per session)
pip install tensorflow==2.0

In [0]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
print('tensorflow version: ' + tf.__version__)

In [0]:
# load mnist dataset
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # customary normalization to [0, 1]

# because we're using CNNs, the data needs a channel dimension:
x_train = x_train[..., None]
x_test = x_test[..., None]

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

In [0]:
cnn_model = tf.keras.models.Sequential([
    # let's add some convolutional layers:
    tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=1, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=2, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=1, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=2, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    # now, let's transition into a fully-connected layer; first, we flatten:
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')  # length-10 output for classification
])

cnn_model.compile(optimizer='adam',  # pick an optimizer
                     loss='sparse_categorical_crossentropy',  # pick a loss
                     metrics=['accuracy'])  # pick a metric to monitor

In [0]:
# train model and track validation loss after each epoch:
cnn_model.fit(x_train, y_train,
              epochs=5,
              batch_size=32,
              validation_data=(x_test, y_test))

## Implementing custom layers
Although keras provides all the most popular neural network layers (e.g., Conv2D, BatchNormalization, etc.), as a researcher who wants to develop new architectures, you may want to define a custom layers based on low-level operations. In this exercise, we will create a custom layer that implements Dense (or fully-connected layer), which is also useful for understanding what is happening under the hood that keras has abstracted away. See https://www.tensorflow.org/guide/keras/custom_layers_and_models for more information.

In [0]:
class fc(tf.keras.layers.Layer):
  def __init__(self, num_outputs, activation):
    super().__init__()  # invoke the super class
    # constructor, which handles things like dtype, name,
    # training-related stuff, ...
    self.num_outputs = num_outputs
    self.activation = activation
    
  def build(self, input_shape):
    # if we want to make this layer type general for any input shape, then we
    # can't define the variables until we know the input shape, which is passed
    # to the build function, which we define here, which is called by the call
    # function below;
    # input shape is flattened, given by [batch_size, flattened data]
    self.W = tf.Variable(initial_value=
                         np.random.randn(input_shape[1],
                                    self.num_outputs).astype(np.float32))
    self.b = tf.Variable(initial_value=
                         np.random.randn(self.num_outputs).astype(np.float32))
  
  def call(self, inputs):
    return self.activation(tf.matmul(inputs, self.W) + self.b[None])
    

Now, let's repeat the above model, using our own fc instead of the built-in Dense.

In [0]:
cnn_model = tf.keras.models.Sequential([
    # let's add some convolutional layers:
    tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=1, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=2, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=1, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=2, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    # now, let's transition into a fully-connected layer; first, we flatten:
    tf.keras.layers.Flatten(),
    fc(num_outputs=128, activation=tf.nn.relu),
    fc(num_outputs=10, activation=tf.nn.softmax)
])

cnn_model.compile(optimizer='adam',  # pick an optimizer
                     loss='sparse_categorical_crossentropy',  # pick a loss
                     metrics=['accuracy'])  # pick a metric to monitor
# train model and track validation loss after each epoch:
cnn_model.fit(x_train, y_train,
              epochs=5,
              batch_size=32,
              validation_data=(x_test, y_test))