In [1]:
# import the necessary packages
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist
import tensorflow as tf
import numpy as np
import time
import sys

In [2]:
def build_model(width, height, depth, classes):

	input_shape = (height, width, depth) # dimensions of the input images
	channel_dim = -1 # channels dimension to be "channels last" ordering

	model = Sequential([

    # Layer Set 1 - Input Layer:
    # CONV => RELU => BN => POOL
    # Conv - 16 filters with 3x3 kernal, produces 16 feature maps
		Conv2D(16, (3, 3), padding="same", input_shape=input_shape),
		Activation("relu"), # introduce non-linearity
		BatchNormalization(axis=channel_dim), # converge faster
		# reduce spatial dims (hgt & width) of each feature map
    MaxPooling2D(pool_size=(2, 2)),

    # Layer Set 2:
    # (CONV => RELU => BN) * 2 => POOL
    # Conv - 32 filters with 3x3 kernal
		Conv2D(32, (3, 3), padding="same"),
		Activation("relu"),
		BatchNormalization(axis=channel_dim),
		Conv2D(32, (3, 3), padding="same"),
		Activation("relu"),
		BatchNormalization(axis=channel_dim),
		MaxPooling2D(pool_size=(2, 2)),

		# Layer Set 3:
    # (CONV => RELU => BN) * 3 => POOL
    # Conv - 64 filters with 3x3 kernal
		Conv2D(64, (3, 3), padding="same"),
		Activation("relu"),
		BatchNormalization(axis=channel_dim),
		Conv2D(64, (3, 3), padding="same"),
		Activation("relu"),
		BatchNormalization(axis=channel_dim),
		Conv2D(64, (3, 3), padding="same"),
		Activation("relu"),
		BatchNormalization(axis=channel_dim),
		MaxPooling2D(pool_size=(2, 2)),

    # Layer Set 4:
		# Fully Connected layer => RELU layers
		Flatten(),
		Dense(256), # 256 neurons
		Activation("relu"),
		BatchNormalization(), # axis not specified bc 1-D
		# 50% dropout reduces overfitting, increasing model generalization
    Dropout(0.5),

    # Layer Set 5 - Output Layer:
    # softmax classifier
    # each neuron is probability score of respective class
		Dense(classes),
		Activation("softmax")
	])
	return model

In [3]:
def step(X, y):
  '''
  Custom training loop (step function) that
  encapsulates forward and backward pass of
  data (single step of gradient descent) using
  GradientTape and updating model weights.
  '''

  # keeps track of all operations that happen on trainable
  # variables onto a "tape" for automatic differentiation.
  # trainable variables = weights and biases
  with tf.GradientTape() as tape:
    # make a prediction using the model
    pred = model(X)

    # calculate loss using a typical loss
    # function for multi-class classification
    loss = categorical_crossentropy(y, pred)

  # **** Automatic Differentiation ****
  # calculate gradients using tape & chain rule
  # A list of gradients corresponding to
  # each trainable variable is returned.
  grads = tape.gradient(loss, model.trainable_variables)

  # optimizer updates the model weights using calculated gradients.
  # output of zip looks like this:
  #   paired = [(grad1, var1), (grad2, var2) ... ]
  opt.apply_gradients(zip(grads, model.trainable_variables))

In [4]:
# initialize Training Parameters:
# num of epochs, batch size, learning rate
EPOCHS = 25
BATCH_SZ = 64 # num of samples used to update weights
LRN_RATE = 1e-3 # controls the size of the steps

In [5]:
# load the MNIST dataset
print("[INFO] loading MNIST dataset...")
((trainX, trainY), (testX, testY)) = mnist.load_data()

[INFO] loading MNIST dataset...


In [6]:
# add a channel dimension at last axis
# to every image in the dataset
trainX = np.expand_dims(trainX, axis=-1)

# normalize images to the range [0, 1]
# by rescaling the pixel intensities
testX = np.expand_dims(testX, axis=-1)
trainX = trainX.astype("float32") / 255.0
testX = testX.astype("float32") / 255.0

# one-hot encode the labels to
# allow the model to predict the
# probability of each class independently
trainY = to_categorical(trainY, 10)
testY = to_categorical(testY, 10)

In [7]:
# build model
print("[INFO] creating model...")
model = build_model(28, 28, 1, 10)

steps_per_epoch = len(trainX) // BATCH_SZ
decay_steps = steps_per_epoch * EPOCHS

# Exponential decay schedule
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=LRN_RATE,
    decay_steps=decay_steps,
    decay_rate=0.9,
    staircase=True
)
# initialize optimizer
opt = Adam(learning_rate=lr_schedule)

[INFO] creating model...


In [8]:
# compute number of batch updates per epoch
num_updates = int(trainX.shape[0] / BATCH_SZ)

# loop over the number of epochs
for epoch in range(0, EPOCHS):

	# log current epoch number
	print("[INFO] starting epoch {}/{}...".format(epoch + 1, EPOCHS), end="")
	sys.stdout.flush()
	epochStart = time.time()

  # loop over the data in batch size increments
	for i in range(0, num_updates):

		# determine starting and ending
    # slice indexes for current batch
		start = i * BATCH_SZ
		end = start + BATCH_SZ

		# take a step - apply step function
		step(trainX[start:end], trainY[start:end])

  # log timing information for the epoch
	epochEnd = time.time()
	elapsed = (epochEnd - epochStart) / 60.0
	print("[INFO] took {:.4} minutes".format(elapsed))

[INFO] starting epoch 1/25...[INFO] took 4.427 minutes
[INFO] starting epoch 2/25...[INFO] took 4.441 minutes
[INFO] starting epoch 3/25...[INFO] took 4.363 minutes
[INFO] starting epoch 4/25...[INFO] took 4.362 minutes
[INFO] starting epoch 5/25...[INFO] took 4.345 minutes
[INFO] starting epoch 6/25...[INFO] took 4.289 minutes
[INFO] starting epoch 7/25...[INFO] took 4.285 minutes
[INFO] starting epoch 8/25...[INFO] took 4.288 minutes
[INFO] starting epoch 9/25...[INFO] took 4.265 minutes
[INFO] starting epoch 10/25...[INFO] took 4.254 minutes
[INFO] starting epoch 11/25...[INFO] took 4.246 minutes
[INFO] starting epoch 12/25...[INFO] took 4.267 minutes
[INFO] starting epoch 13/25...[INFO] took 4.213 minutes
[INFO] starting epoch 14/25...[INFO] took 4.273 minutes
[INFO] starting epoch 15/25...[INFO] took 4.242 minutes
[INFO] starting epoch 16/25...[INFO] took 4.266 minutes
[INFO] starting epoch 17/25...[INFO] took 4.228 minutes
[INFO] starting epoch 18/25...[INFO] took 4.3 minutes
[IN

In [10]:
# Need to compile the model to
# calculate accuracy using Keras functions.
model.compile(optimizer=opt, loss=categorical_crossentropy, metrics=["acc"])

# compute accuracy
(loss, acc) = model.evaluate(testX, testY)
print("[INFO] test loss: {:.4f}".format(loss))
print("[INFO] test accuracy: {:.4f}".format(acc))

[INFO] test loss: 0.0450
[INFO] test accuracy: 0.9908
