<a href="https://colab.research.google.com/github/michelucci/oreilly-london-ai/blob/master/day1/MNIST_Classification_with_TF2_0_in_Keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MNIST Classification with TF2.0 in Keras

(C) 2019 Umberto Michelucci

umberto.michelucci@toelt.ai

www.toelt.ai

In [6]:
!pip install tensorflow==2.0.0

Collecting tensorflow==2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/46/0f/7bd55361168bb32796b360ad15a25de6966c9c1beb58a8e30c01c8279862/tensorflow-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl (86.3MB)
[K     |████████████████████████████████| 86.3MB 39.5MB/s 
Collecting tensorboard<2.1.0,>=2.0.0 (from tensorflow==2.0.0)
[?25l  Downloading https://files.pythonhosted.org/packages/9b/a6/e8ffa4e2ddb216449d34cfcb825ebb38206bee5c4553d69e7bc8bc2c5d64/tensorboard-2.0.0-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 36.6MB/s 
Collecting tensorflow-estimator<2.1.0,>=2.0.0 (from tensorflow==2.0.0)
[?25l  Downloading https://files.pythonhosted.org/packages/95/00/5e6cdf86190a70d7382d320b2b04e4ff0f8191a37d90a422a2f8ff0705bb/tensorflow_estimator-2.0.0-py2.py3-none-any.whl (449kB)
[K     |████████████████████████████████| 450kB 43.0MB/s 
Installing collected packages: tensorboard, tensorflow-estimator, tensorflow
  Found existing installation: tensorboar

In [1]:
import tensorflow as tf
import keras
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('default')

from google.colab import files

import tensorflow_datasets as tfds

Using TensorFlow backend.


In [2]:
print (tf.__version__)

2.0.0


let's load the MNIST dataset first

In [0]:
datasets, info = tfds.load(name = 'mnist', with_info = True, as_supervised = True)
mnist_train, mnist_test = datasets['train'], datasets['test']

In [0]:
def scale(image, label):
  image = tf.cast(image, tf.float32)
  image /= 255.0

  return image, label

In [0]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64
NUM_EPOCHS = 5

Now let's convert our data in a dataset, to make our training easier.

In [0]:
train_data = mnist_train.map(scale).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
test_data = mnist_test.map(scale).batch(BATCH_SIZE)

STEPS_PER_EPOCH = 50

train_data = train_data.take(STEPS_PER_EPOCH)
test_data = test_data.take(STEPS_PER_EPOCH)

In [0]:
image_batch, label_batch = next(iter(train_data))

In [8]:
print(image_batch.shape)

(64, 28, 28, 1)


## First version with `compile()` and `fit()`

In [9]:
mnist_model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, 3, activation='relu',
                           kernel_regularizer=tf.keras.regularizers.l2(0.02),
                           input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation='softmax')
])

mnist_model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

mnist_model.fit(train_data, epochs=NUM_EPOCHS)
loss, acc = mnist_model.evaluate(test_data)

print("Loss {}, Accuracy {}".format(loss, acc))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loss 0.6152513134479523, Accuracy 0.9643750190734863


We can check the results for one single image easily with the dataset

In [0]:
optimizer = tf.keras.optimizers.Adam(0.001) # 2.0 specific
loss_history = []

## Custom training loop

In [0]:
mnist_model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

metrics_names = mnist_model.metrics_names

In [14]:
print(metrics_names)

['loss', 'accuracy']


In [17]:
for epoch in range(NUM_EPOCHS):
  #Reset the metric accumulators
  mnist_model.reset_metrics()

  for image_batch, label_batch in train_data:
    result = mnist_model.train_on_batch(image_batch, label_batch)
    #print("train: ",
    #      "{}: {:.3f}".format(metrics_names[0], result[0]),
    #      "{}: {:.3f}".format(metrics_names[1], result[1]))
  for image_batch, label_batch in test_data:
    result = mnist_model.test_on_batch(image_batch, label_batch,
                                 # return accumulated metrics
                                 reset_metrics=False)
  print("\neval: ",
        "{}: {:.3f}".format(metrics_names[0], result[0]),
        "{}: {:.3f}".format(metrics_names[1], result[1]))



eval:  loss: 0.195 accuracy: 0.964

eval:  loss: 0.106 accuracy: 0.969

eval:  loss: 0.089 accuracy: 0.962

eval:  loss: 0.076 accuracy: 0.967

eval:  loss: 0.071 accuracy: 0.965


We can now recheck one single image to see if we perform better than before training.

# MNIST with GPU acceleration

In [19]:
print(tf.test.is_gpu_available())

False


In [20]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found. Check the notebook settings.')
print('Found GPU at: {}'.format(device_name))

SystemError: ignored

In [0]:

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)


The following code have tensorflow operations that are placed on a GPU. At the moment the argmax has a bug and therefore has been placed on a CPU. A GPU make the code much faster than when using only a CPU (check above for the CPU only version)

In [0]:
%%time

for i in range(10): # Loop for the Epochs
  print ("\nEpoch:", i)
  
  for (batch, (images, labels)) in enumerate(dataset.take(60000)): # Loop for the mini-batches
    if batch % 100 == 0:
      print('.', end='')
    labels = tf.cast(labels, dtype = tf.int64)
    
    
    with tf.GradientTape() as tape:
      
      with tf.device('/gpu:0'):    
        logits = mnist_model(images, training=True)

      with tf.device('/cpu:0'):
        tgmax = tf.argmax(labels, axis = 1, output_type=tf.int64)
        
      with tf.device('/gpu:0'):  
        loss_value = tf.losses.sparse_softmax_cross_entropy(tgmax, logits)

        loss_history.append(loss_value.numpy())
        grads = tape.gradient(loss_value, mnist_model.variables)
        optimizer.apply_gradients(zip(grads, mnist_model.variables),
                                    global_step=tf.train.get_or_create_global_step())


Epoch: 0
..........
Epoch: 1
..........
Epoch: 2
..........
Epoch: 3
..........
Epoch: 4
..........
Epoch: 5
..........
Epoch: 6
..........
Epoch: 7
..........
Epoch: 8
..........
Epoch: 9
..........CPU times: user 1min 25s, sys: 3.41 s, total: 1min 28s
Wall time: 1min 26s


## Custom training loops with gradients

In [23]:
# Create the metrics
loss_metric = tf.keras.metrics.Mean(name='train_loss')
accuracy_metric = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

mnist_model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, 3, activation='relu',
                           kernel_regularizer=tf.keras.regularizers.l2(0.02),
                           input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(0.001)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()

@tf.function
def train_step(inputs, labels):
  with tf.GradientTape() as tape:
    predictions = mnist_model(inputs, training=True)
    regularization_loss = tf.math.add_n(mnist_model.losses)
    pred_loss = loss_fn(labels, predictions)
    total_loss = pred_loss + regularization_loss

  gradients = tape.gradient(total_loss, mnist_model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, mnist_model.trainable_variables))

  # Update the metrics
  loss_metric.update_state(total_loss)
  accuracy_metric.update_state(labels, predictions)

for epoch in range(NUM_EPOCHS):
  loss_metric.reset_states()
  accuracy_metric.reset_states()

  for inputs, labels in train_data:
    train_step(inputs, labels)
  
  mean_loss = loss_metric.result()
  mean_accuracy = accuracy_metric.result()

  print('Epoch: ', epoch)
  print('  loss:     {:.3f}'.format(mean_loss))
  print('  accuracy: {:.3f}'.format(mean_accuracy))

Epoch:  0
  loss:     0.622
  accuracy: 0.831
Epoch:  1
  loss:     0.266
  accuracy: 0.943
Epoch:  2
  loss:     0.198
  accuracy: 0.957
Epoch:  3
  loss:     0.151
  accuracy: 0.973
Epoch:  4
  loss:     0.125
  accuracy: 0.977
