In [3]:
BATCH_SIZE = 128
EPOCHS = 10

training_images_file   = 'gs://mnist-public/train-images-idx3-ubyte'
training_labels_file   = 'gs://mnist-public/train-labels-idx1-ubyte'
validation_images_file = 'gs://mnist-public/t10k-images-idx3-ubyte'
validation_labels_file = 'gs://mnist-public/t10k-labels-idx1-ubyte'

In [4]:
import os, re, math, json, shutil, pprint
import PIL.Image, PIL.ImageFont, PIL.ImageDraw
import IPython.display as display
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt

In [5]:
AUTO = tf.data.experimental.AUTOTUNE

def read_label(tf_bytestring):
    label = tf.io.decode_raw(tf_bytestring, tf.uint8)
    label = tf.reshape(label, [])
    label = tf.one_hot(label, 10)
    return label
  
def read_image(tf_bytestring):
    image = tf.io.decode_raw(tf_bytestring, tf.uint8)
    image = tf.cast(image, tf.float32)/256.0
    image = tf.reshape(image, [28*28])
    return image
  
def load_dataset(image_file, label_file):
    imagedataset = tf.data.FixedLengthRecordDataset(image_file, 28*28, header_bytes=16)
    imagedataset = imagedataset.map(read_image, num_parallel_calls=16)
    labelsdataset = tf.data.FixedLengthRecordDataset(label_file, 1, header_bytes=8)
    labelsdataset = labelsdataset.map(read_label, num_parallel_calls=16)
    dataset = tf.data.Dataset.zip((imagedataset, labelsdataset))
    return dataset 
  
def get_training_dataset(image_file, label_file, batch_size):
    dataset = load_dataset(image_file, label_file)
    dataset = dataset.cache()
    dataset = dataset.shuffle(5000, reshuffle_each_iteration=True)
    dataset = dataset.repeat()
    dataset = dataset.batch(batch_size, drop_remainder=True)
    dataset = dataset.prefetch(AUTO)
    return dataset
  
def get_validation_dataset(image_file, label_file):
    dataset = load_dataset(image_file, label_file)
    dataset = dataset.cache()
    dataset = dataset.batch(10000, drop_remainder=True)
    dataset = dataset.repeat()
    return dataset

In [6]:
training_dataset = get_training_dataset(training_images_file, training_labels_file, BATCH_SIZE)
validation_dataset = get_validation_dataset(validation_images_file, validation_labels_file)

training_input_fn = lambda: get_training_dataset(training_images_file, training_labels_file, BATCH_SIZE)
validation_input_fn = lambda: get_validation_dataset(validation_images_file, validation_labels_file)

# Single layer neural net

In [None]:
model = tf.keras.Sequential([
  tf.keras.layers.Input(shape=(28*28, )),
  tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 10)                7850      
Total params: 7,850
Trainable params: 7,850
Non-trainable params: 0
_________________________________________________________________


In [None]:
steps_per_epoch = 60000 // BATCH_SIZE
print("Steps per epoch: ", steps_per_epoch)

history = model.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS,
                    validation_data=validation_dataset, validation_steps=1)

Steps per epoch:  468
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
probabilities = model.predict(validation_dataset, steps=1)
predicted_labels = np.argmax(probabilities, axis=1)
print(predicted_labels)

[7 2 1 ... 4 8 6]


# Adding layers: Multilayer neural net with sigmoid activations

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28*28, )),
    tf.keras.layers.Dense(200, activation='sigmoid'),
    tf.keras.layers.Dense(60, activation='sigmoid'),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 200)               157000    
_________________________________________________________________
dense_7 (Dense)              (None, 60)                12060     
_________________________________________________________________
dense_8 (Dense)              (None, 10)                610       
Total params: 169,670
Trainable params: 169,670
Non-trainable params: 0
_________________________________________________________________


In [None]:
steps_per_epoch = 60000 // BATCH_SIZE
print("Steps per epoch: ", steps_per_epoch)

history = model.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS,
                    validation_data=validation_dataset, validation_steps=1)

Steps per epoch:  468
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Special care for deep networks

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28*28, )),
    tf.keras.layers.Dense(200, activation='relu'),
    tf.keras.layers.Dense(60, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 200)               157000    
_________________________________________________________________
dense_10 (Dense)             (None, 60)                12060     
_________________________________________________________________
dense_11 (Dense)             (None, 10)                610       
Total params: 169,670
Trainable params: 169,670
Non-trainable params: 0
_________________________________________________________________


In [None]:
steps_per_epoch = 60000 // BATCH_SIZE
print("Steps per epoch: ", steps_per_epoch)

history = model.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS,
                    validation_data=validation_dataset, validation_steps=1)

Steps per epoch:  468
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Learning rate decay

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28*28, )),
    tf.keras.layers.Dense(200, activation='relu'),
    tf.keras.layers.Dense(60, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(lr=0.01)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 200)               157000    
_________________________________________________________________
dense_13 (Dense)             (None, 60)                12060     
_________________________________________________________________
dense_14 (Dense)             (None, 10)                610       
Total params: 169,670
Trainable params: 169,670
Non-trainable params: 0
_________________________________________________________________


In [None]:
steps_per_epoch = 60000 // BATCH_SIZE
print("Steps per epoch: ", steps_per_epoch)

def lr_decay(epoch):
  return 0.01 * math.pow(0.6, epoch)

lr_decay_callback = tf.keras.callbacks.LearningRateScheduler(lr_decay, verbose=True)

history = model.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, 
                    validation_data=validation_dataset, validation_steps=1, 
                    callbacks=[lr_decay_callback])

Steps per epoch:  468
Epoch 1/10

Epoch 00001: LearningRateScheduler reducing learning rate to 0.01.
Epoch 2/10

Epoch 00002: LearningRateScheduler reducing learning rate to 0.006.
Epoch 3/10

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0036.
Epoch 4/10

Epoch 00004: LearningRateScheduler reducing learning rate to 0.0021599999999999996.
Epoch 5/10

Epoch 00005: LearningRateScheduler reducing learning rate to 0.001296.
Epoch 6/10

Epoch 00006: LearningRateScheduler reducing learning rate to 0.0007775999999999998.
Epoch 7/10

Epoch 00007: LearningRateScheduler reducing learning rate to 0.0004665599999999999.
Epoch 8/10

Epoch 00008: LearningRateScheduler reducing learning rate to 0.00027993599999999994.
Epoch 9/10

Epoch 00009: LearningRateScheduler reducing learning rate to 0.00016796159999999993.
Epoch 10/10

Epoch 00010: LearningRateScheduler reducing learning rate to 0.00010077695999999997.


# Dropout / Overfitting

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28*28, )),
    tf.keras.layers.Dense(200, activation='relu'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Dense(60, activation='relu'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Dense(10, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(lr=0.01)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 200)               157000    
_________________________________________________________________
dropout (Dropout)            (None, 200)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 100)               20100     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_17 (Dense)             (None, 60)                6060      
_________________________________________________________________
dropout_2 (Dropout)          (None, 60)                0         
_________________________________________________________________
dense_18 (Dense)             (None, 10)               

In [None]:
steps_per_epoch = 60000 // BATCH_SIZE
print("Steps per epoch: ", steps_per_epoch)

def lr_decay(epoch):
  return 0.01 * math.pow(0.6, epoch)

lr_decay_callback = tf.keras.callbacks.LearningRateScheduler(lr_decay, verbose=True)

history = model.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, 
                    validation_data=validation_dataset, validation_steps=1, 
                    callbacks=[lr_decay_callback])

Steps per epoch:  468
Epoch 1/10

Epoch 00001: LearningRateScheduler reducing learning rate to 0.01.
Epoch 2/10

Epoch 00002: LearningRateScheduler reducing learning rate to 0.006.
Epoch 3/10

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0036.
Epoch 4/10

Epoch 00004: LearningRateScheduler reducing learning rate to 0.0021599999999999996.
Epoch 5/10

Epoch 00005: LearningRateScheduler reducing learning rate to 0.001296.
Epoch 6/10

Epoch 00006: LearningRateScheduler reducing learning rate to 0.0007775999999999998.
Epoch 7/10

Epoch 00007: LearningRateScheduler reducing learning rate to 0.0004665599999999999.
Epoch 8/10

Epoch 00008: LearningRateScheduler reducing learning rate to 0.00027993599999999994.
Epoch 9/10

Epoch 00009: LearningRateScheduler reducing learning rate to 0.00016796159999999993.
Epoch 10/10

Epoch 00010: LearningRateScheduler reducing learning rate to 0.00010077695999999997.


# A convolutional network

In [7]:
model = tf.keras.Sequential([
    tf.keras.layers.Reshape(input_shape=(28*28,), target_shape=(28, 28, 1)),
    tf.keras.layers.Conv2D(kernel_size=3, filters=12, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(kernel_size=6, filters=24, padding='same', activation='relu', strides=2),
    tf.keras.layers.Conv2D(kernel_size=6, filters=32, padding='same', activation='relu', strides=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(200, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(lr=0.01)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 28, 28, 12)        120       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 24)        10392     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 32)          27680     
_________________________________________________________________
flatten (Flatten)            (None, 1568)              0         
_________________________________________________________________
dense (Dense)                (None, 200)               313800    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                2

In [8]:
steps_per_epoch = 60000 // BATCH_SIZE
print("Steps per epoch: ", steps_per_epoch)

def lr_decay(epoch):
  return 0.01 * math.pow(0.6, epoch)

lr_decay_callback = tf.keras.callbacks.LearningRateScheduler(lr_decay, verbose=True)

history = model.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, 
                    validation_data=validation_dataset, validation_steps=1, 
                    callbacks=[lr_decay_callback])

Steps per epoch:  468
Epoch 1/10

Epoch 00001: LearningRateScheduler reducing learning rate to 0.01.
Epoch 2/10

Epoch 00002: LearningRateScheduler reducing learning rate to 0.006.
Epoch 3/10

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0036.
Epoch 4/10

Epoch 00004: LearningRateScheduler reducing learning rate to 0.0021599999999999996.
Epoch 5/10

Epoch 00005: LearningRateScheduler reducing learning rate to 0.001296.
Epoch 6/10

Epoch 00006: LearningRateScheduler reducing learning rate to 0.0007775999999999998.
Epoch 7/10

Epoch 00007: LearningRateScheduler reducing learning rate to 0.0004665599999999999.
Epoch 8/10

Epoch 00008: LearningRateScheduler reducing learning rate to 0.00027993599999999994.
Epoch 9/10

Epoch 00009: LearningRateScheduler reducing learning rate to 0.00016796159999999993.
Epoch 10/10

Epoch 00010: LearningRateScheduler reducing learning rate to 0.00010077695999999997.


# Dropout again

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.Reshape(input_shape=(28*28,), target_shape=(28, 28, 1)),
    tf.keras.layers.Conv2D(kernel_size=3, filters=12, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(kernel_size=6, filters=24, padding='same', activation='relu', strides=2),
    tf.keras.layers.Conv2D(kernel_size=6, filters=32, padding='same', activation='relu', strides=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(200, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(10, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(lr=0.01)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_1 (Reshape)          (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 28, 28, 12)        120       
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 14, 14, 24)        10392     
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 7, 7, 32)          27680     
_________________________________________________________________
flatten_1 (Flatten)          (None, 1568)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 200)               313800    
_________________________________________________________________
dropout (Dropout)            (None, 200)              

In [10]:
steps_per_epoch = 60000 // BATCH_SIZE
print("Steps per epoch: ", steps_per_epoch)

def lr_decay(epoch):
  return 0.01 * math.pow(0.6, epoch)

lr_decay_callback = tf.keras.callbacks.LearningRateScheduler(lr_decay, verbose=True)

history = model.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, 
                    validation_data=validation_dataset, validation_steps=1, 
                    callbacks=[lr_decay_callback])

Steps per epoch:  468
Epoch 1/10

Epoch 00001: LearningRateScheduler reducing learning rate to 0.01.
Epoch 2/10

Epoch 00002: LearningRateScheduler reducing learning rate to 0.006.
Epoch 3/10

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0036.
Epoch 4/10

Epoch 00004: LearningRateScheduler reducing learning rate to 0.0021599999999999996.
Epoch 5/10

Epoch 00005: LearningRateScheduler reducing learning rate to 0.001296.
Epoch 6/10

Epoch 00006: LearningRateScheduler reducing learning rate to 0.0007775999999999998.
Epoch 7/10

Epoch 00007: LearningRateScheduler reducing learning rate to 0.0004665599999999999.
Epoch 8/10

Epoch 00008: LearningRateScheduler reducing learning rate to 0.00027993599999999994.
Epoch 9/10

Epoch 00009: LearningRateScheduler reducing learning rate to 0.00016796159999999993.
Epoch 10/10

Epoch 00010: LearningRateScheduler reducing learning rate to 0.00010077695999999997.


# Batch normalization

In [11]:
model = tf.keras.Sequential([
    tf.keras.layers.Reshape(input_shape=(28*28,), target_shape=(28, 28, 1)),

    tf.keras.layers.Conv2D(kernel_size=3, filters=12, padding='same', use_bias=False),
    tf.keras.layers.BatchNormalization(scale=False, center=True),
    tf.keras.layers.Activation('relu'),

    tf.keras.layers.Conv2D(kernel_size=6, filters=24, padding='same', strides=2, use_bias=False),
    tf.keras.layers.BatchNormalization(scale=False, center=True),
    tf.keras.layers.Activation('relu'),

    tf.keras.layers.Conv2D(kernel_size=6, filters=32, padding='same', strides=2, use_bias=False),
    tf.keras.layers.BatchNormalization(scale=False, center=True),
    tf.keras.layers.Activation('relu'),

    tf.keras.layers.Flatten(),

    tf.keras.layers.Dense(200, use_bias=False),
    tf.keras.layers.BatchNormalization(scale=False, center=True),
    tf.keras.layers.Activation('relu'),

    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(10, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(lr=0.01)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_2 (Reshape)          (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 28, 28, 12)        108       
_________________________________________________________________
batch_normalization (BatchNo (None, 28, 28, 12)        36        
_________________________________________________________________
activation (Activation)      (None, 28, 28, 12)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 14, 14, 24)        10368     
_________________________________________________________________
batch_normalization_1 (Batch (None, 14, 14, 24)        72        
_________________________________________________________________
activation_1 (Activation)    (None, 14, 14, 24)       

In [12]:
steps_per_epoch = 60000 // BATCH_SIZE
print("Steps per epoch: ", steps_per_epoch)

def lr_decay(epoch):
  return 0.01 * math.pow(0.6, epoch)

lr_decay_callback = tf.keras.callbacks.LearningRateScheduler(lr_decay, verbose=True)

history = model.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, 
                    validation_data=validation_dataset, validation_steps=1, 
                    callbacks=[lr_decay_callback])

Steps per epoch:  468
Epoch 1/10

Epoch 00001: LearningRateScheduler reducing learning rate to 0.01.
Epoch 2/10

Epoch 00002: LearningRateScheduler reducing learning rate to 0.006.
Epoch 3/10

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0036.
Epoch 4/10

Epoch 00004: LearningRateScheduler reducing learning rate to 0.0021599999999999996.
Epoch 5/10

Epoch 00005: LearningRateScheduler reducing learning rate to 0.001296.
Epoch 6/10

Epoch 00006: LearningRateScheduler reducing learning rate to 0.0007775999999999998.
Epoch 7/10

Epoch 00007: LearningRateScheduler reducing learning rate to 0.0004665599999999999.
Epoch 8/10

Epoch 00008: LearningRateScheduler reducing learning rate to 0.00027993599999999994.
Epoch 9/10

Epoch 00009: LearningRateScheduler reducing learning rate to 0.00016796159999999993.
Epoch 10/10

Epoch 00010: LearningRateScheduler reducing learning rate to 0.00010077695999999997.
