<a href="https://colab.research.google.com/github/michelucci/oreilly-london-ai/blob/master/day1/MNIST_Classification_with_TF2_0_in_Keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MNIST Classification with TF2.0 in Keras

(C) 2019 Umberto Michelucci

umberto.michelucci@toelt.ai

www.toelt.ai

In [12]:
!pip install tf-nightly # for GPU support

Collecting tf-nightly
[?25l  Downloading https://files.pythonhosted.org/packages/ce/71/704e3d8f73e06fa4a1bfffd17aec89737bb5e0fa9ca2aa464c42f9422d62/tf_nightly-2.1.0.dev20191010-cp36-cp36m-manylinux2010_x86_64.whl (397.3MB)
[K     |████████████████████████████████| 397.3MB 69kB/s 
Collecting tb-nightly<2.2.0a0,>=2.1.0a0 (from tf-nightly)
[?25l  Downloading https://files.pythonhosted.org/packages/1d/3b/1479cccf0bdcdabc0e046eb761fdf413125cc1b5e83398e15ff00000f6ed/tb_nightly-2.1.0a20191010-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 41.3MB/s 
Collecting tf-estimator-nightly (from tf-nightly)
[?25l  Downloading https://files.pythonhosted.org/packages/8c/c0/f525d294adc3f5d3c6f922c5a302ac88a1710dcc0b4faf29a48679b5852f/tf_estimator_nightly-2.0.0.dev2019101101-py2.py3-none-any.whl (450kB)
[K     |████████████████████████████████| 460kB 52.0MB/s 
Installing collected packages: tb-nightly, tf-estimator-nightly, tf-nightly
Successfully installed tb-nightly-2.1.0a

In [0]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('default')

from google.colab import files

import tensorflow_datasets as tfds

In [4]:
print (tf.__version__)

2.1.0-dev20191010


let's load the MNIST dataset first

In [0]:
datasets, info = tfds.load(name = 'mnist', with_info = True, as_supervised = True)
mnist_train, mnist_test = datasets['train'], datasets['test']

In [0]:
def scale(image, label):
  image = tf.cast(image, tf.float32)
  image /= 255.0

  return image, label

In [0]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64
NUM_EPOCHS = 5

Now let's convert our data in a dataset, to make our training easier.

In [0]:
train_data = mnist_train.map(scale).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
test_data = mnist_test.map(scale).batch(BATCH_SIZE)

STEPS_PER_EPOCH = 50

train_data = train_data.take(STEPS_PER_EPOCH)
test_data = test_data.take(STEPS_PER_EPOCH)

In [0]:
image_batch, label_batch = next(iter(train_data))

In [10]:
print(image_batch.shape)

(64, 28, 28, 1)


## First version with `compile()` and `fit()`

In [14]:
mnist_model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, 3, activation='relu',
                           kernel_regularizer=tf.keras.regularizers.l2(0.02),
                           input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation='softmax')
])

mnist_model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

mnist_model.fit(train_data, epochs=NUM_EPOCHS)
loss, acc = mnist_model.evaluate(test_data)

print("Loss {}, Accuracy {}".format(loss, acc))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
     50/Unknown - 1s 22ms/step - loss: 0.7178 - accuracy: 0.9609Loss 0.7178147268295288, Accuracy 0.9609375


We can check the results for one single image easily with the dataset

In [0]:
optimizer = tf.keras.optimizers.Adam(0.001) # 2.0 specific
loss_history = []

## Custom training loop

In [0]:
mnist_model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

metrics_names = mnist_model.metrics_names

In [18]:
print(metrics_names)

['loss', 'accuracy']


In [0]:
%%timeit
for epoch in range(NUM_EPOCHS):
  #Reset the metric accumulators
  mnist_model.reset_metrics()

  for image_batch, label_batch in train_data:
    result = mnist_model.train_on_batch(image_batch, label_batch)
    #print("train: ",
    #      "{}: {:.3f}".format(metrics_names[0], result[0]),
    #      "{}: {:.3f}".format(metrics_names[1], result[1]))
  for image_batch, label_batch in test_data:
    result = mnist_model.test_on_batch(image_batch, label_batch,
                                 # return accumulated metrics
                                 reset_metrics=False)
  print("\neval: ",
        "{}: {:.3f}".format(metrics_names[0], result[0]),
        "{}: {:.3f}".format(metrics_names[1], result[1]))



eval:  loss: 0.195 accuracy: 0.964

eval:  loss: 0.106 accuracy: 0.969

eval:  loss: 0.089 accuracy: 0.962

eval:  loss: 0.076 accuracy: 0.967

eval:  loss: 0.071 accuracy: 0.965


We can now recheck one single image to see if we perform better than before training.

# MNIST with GPU acceleration

In [11]:
print(tf.test.is_gpu_available())

Instructions for updating:
Use `tf.config.experimental.list_physical_devices('GPU')` instead.


Instructions for updating:
Use `tf.config.experimental.list_physical_devices('GPU')` instead.


True


In [12]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found. Check the notebook settings.')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


The following code have tensorflow operations that are placed on a GPU. At the moment the argmax has a bug and therefore has been placed on a CPU. A GPU make the code much faster than when using only a CPU (check above for the CPU only version)

In [21]:
%%time
for epoch in range(NUM_EPOCHS):
  #Reset the metric accumulators
  mnist_model.reset_metrics()

  for image_batch, label_batch in train_data:
    with tf.device('/gpu:0'):    
      result = mnist_model.train_on_batch(image_batch, label_batch)
    #print("train: ",
    #      "{}: {:.3f}".format(metrics_names[0], result[0]),
    #      "{}: {:.3f}".format(metrics_names[1], result[1]))
  for image_batch, label_batch in test_data:
    with tf.device('/gpu:0'):  
      result = mnist_model.test_on_batch(image_batch, label_batch,
                                 # return accumulated metrics
                                 reset_metrics=False)
  print("\neval: ",
        "{}: {:.3f}".format(metrics_names[0], result[0]),
        "{}: {:.3f}".format(metrics_names[1], result[1]))



eval:  loss: 0.069 accuracy: 0.974

eval:  loss: 0.109 accuracy: 0.973

eval:  loss: 0.020 accuracy: 0.972

eval:  loss: 0.061 accuracy: 0.968

eval:  loss: 0.031 accuracy: 0.971
CPU times: user 38.1 s, sys: 8.11 s, total: 46.2 s
Wall time: 27.6 s


In [23]:
%%time
for epoch in range(NUM_EPOCHS):
  #Reset the metric accumulators
  mnist_model.reset_metrics()

  for image_batch, label_batch in train_data:
    with tf.device('/cpu:0'):    
      result = mnist_model.train_on_batch(image_batch, label_batch)
    #print("train: ",
    #      "{}: {:.3f}".format(metrics_names[0], result[0]),
    #      "{}: {:.3f}".format(metrics_names[1], result[1]))
  for image_batch, label_batch in test_data:
    with tf.device('/cpu:0'):  
      result = mnist_model.test_on_batch(image_batch, label_batch,
                                 # return accumulated metrics
                                 reset_metrics=False)
  print("\neval: ",
        "{}: {:.3f}".format(metrics_names[0], result[0]),
        "{}: {:.3f}".format(metrics_names[1], result[1]))



eval:  loss: 0.037 accuracy: 0.974

eval:  loss: 0.045 accuracy: 0.976

eval:  loss: 0.030 accuracy: 0.971

eval:  loss: 0.056 accuracy: 0.967

eval:  loss: 0.095 accuracy: 0.972
CPU times: user 58.1 s, sys: 8.77 s, total: 1min 6s
Wall time: 40.1 s


## Custom training loops with gradients

In [0]:
# Create the metrics
loss_metric = tf.keras.metrics.Mean(name='train_loss')
accuracy_metric = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

mnist_model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, 3, activation='relu',
                           kernel_regularizer=tf.keras.regularizers.l2(0.02),
                           input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(0.001)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()

@tf.function
def train_step(inputs, labels):
  with tf.GradientTape() as tape:
    predictions = mnist_model(inputs, training=True)
    regularization_loss = tf.math.add_n(mnist_model.losses)
    pred_loss = loss_fn(labels, predictions)
    total_loss = pred_loss + regularization_loss

  gradients = tape.gradient(total_loss, mnist_model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, mnist_model.trainable_variables))

  # Update the metrics
  loss_metric.update_state(total_loss)
  accuracy_metric.update_state(labels, predictions)

for epoch in range(NUM_EPOCHS):
  loss_metric.reset_states()
  accuracy_metric.reset_states()

  for inputs, labels in train_data:
    train_step(inputs, labels)
  
  mean_loss = loss_metric.result()
  mean_accuracy = accuracy_metric.result()

  print('Epoch: ', epoch)
  print('  loss:     {:.3f}'.format(mean_loss))
  print('  accuracy: {:.3f}'.format(mean_accuracy))

Epoch:  0
  loss:     0.622
  accuracy: 0.831
Epoch:  1
  loss:     0.266
  accuracy: 0.943
Epoch:  2
  loss:     0.198
  accuracy: 0.957
Epoch:  3
  loss:     0.151
  accuracy: 0.973
Epoch:  4
  loss:     0.125
  accuracy: 0.977
