In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import numpy as np
import matplotlib.pylab as plt
import os
from datetime import datetime

In [2]:
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

In [3]:
# Plain text name in alphabetical order. https://www.cs.toronto.edu/~kriz/cifar.html
CLASS_NAMES = ['airplane', 'automobile', 'bird', 'cat', 
               'deer','dog', 'frog', 'horse', 'ship', 'truck']

In [4]:
validation_dataset = tf.data.Dataset.from_tensor_slices((test_images[:500], test_labels[:500]))
test_dataset = tf.data.Dataset.from_tensor_slices((test_images[500:], test_labels[500:]))

In [5]:
# Create an instance of dataset from raw numpy images and labels.
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))

In [6]:
# https://www.tensorflow.org/api_docs/python/tf/data/Dataset#transformations_2
train_dataset_size = len(list(train_dataset.as_numpy_iterator()))
print('Training data sample size: ', train_dataset_size)

validation_dataset_size = len(list(validation_dataset.as_numpy_iterator()))
print('Validation data sample size: ', validation_dataset_size)

test_dataset_size = len(list(test_dataset.as_numpy_iterator()))
print('Test data sample size: ', test_dataset_size)

Training data sample size:  50000
Validation data sample size:  500
Test data sample size:  9500


## Define a distribution strategy
Create a `MirroredStrategy` object to handle distributed training.

In [7]:
strategy = tf.distribute.MirroredStrategy()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


In [8]:
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

Number of devices: 1


In [9]:
BUFFER_SIZE = 10000

BATCH_SIZE_PER_REPLICA = 64
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync

In [10]:
train_dataset = train_dataset.repeat().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
validation_dataset = validation_dataset.shuffle(BUFFER_SIZE).batch(validation_dataset_size)
test_dataset = test_dataset.batch(test_dataset_size)


In [11]:
STEPS_PER_EPOCH = train_dataset_size // BATCH_SIZE_PER_REPLICA
VALIDATION_STEPS = 1

In [12]:
def build_model():
  #with strategy.scope():
    model = tf.keras.Sequential([
      tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', name = 'conv_1',
        kernel_initializer='glorot_uniform', padding='same', input_shape = (32,32,3)),
      tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
      tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu', name = 'conv_2',
        kernel_initializer='glorot_uniform', padding='same'),
      tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
      tf.keras.layers.Flatten(name = 'flat_1'),
      tf.keras.layers.Dense(256, activation='relu', kernel_initializer='glorot_uniform', name = 'dense_64'),
      tf.keras.layers.Dense(10, activation='softmax', name = 'custom_class')
    ])
    model.build([None, 32, 32, 3])

    model.compile(
      loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
      optimizer=tf.keras.optimizers.Adam(),
      metrics=['accuracy'])
    return model


In [13]:
model = build_model()

Let's define some alias for file path to save model checkpoints.

In [14]:
MODEL_NAME = 'myCIFAR10-{}'.format(datetime.now().strftime("%Y%m%d-%H%M%S"))
print(MODEL_NAME)

myCIFAR10-20210321-152856


In [15]:

checkpoint_dir = './' + MODEL_NAME
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt-{epoch}")
print(checkpoint_prefix)


./myCIFAR10-20210321-152856/ckpt-{epoch}


In [16]:
myCheckPoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    monitor='val_accuracy',
    mode='max',
    save_weights_only = True,
    save_best_only = True
    )


#myEarlyStop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', 
#                                               patience=4)
myTensorBoard = tf.keras.callbacks.TensorBoard(log_dir='./tensorboardlogs/{}'.format(MODEL_NAME),
                  write_graph=True, write_images=True, histogram_freq=1)


In [17]:
myCallbacks = [
    myCheckPoint,
    myTensorBoard
]

In [18]:
model.fit(
    train_dataset,
    epochs=30,
    steps_per_epoch=STEPS_PER_EPOCH,
    validation_data=validation_dataset,
    validation_steps=VALIDATION_STEPS,
    callbacks=myCallbacks)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7fbcc85560a0>

In [19]:
# check the checkpoint directory

!ls -lrt {checkpoint_dir}


total 200904
-rw-r--r--  1 mbp16  staff  12853230 Mar 21 15:29 ckpt-1.data-00000-of-00001
-rw-r--r--  1 mbp16  staff      2086 Mar 21 15:29 ckpt-1.index
-rw-r--r--  1 mbp16  staff  12853230 Mar 21 15:29 ckpt-2.data-00000-of-00001
-rw-r--r--  1 mbp16  staff      2086 Mar 21 15:29 ckpt-2.index
-rw-r--r--  1 mbp16  staff  12853230 Mar 21 15:30 ckpt-3.data-00000-of-00001
-rw-r--r--  1 mbp16  staff      2086 Mar 21 15:30 ckpt-3.index
-rw-r--r--  1 mbp16  staff  12853230 Mar 21 15:30 ckpt-4.data-00000-of-00001
-rw-r--r--  1 mbp16  staff      2086 Mar 21 15:30 ckpt-4.index
-rw-r--r--  1 mbp16  staff  12853230 Mar 21 15:31 ckpt-5.data-00000-of-00001
-rw-r--r--  1 mbp16  staff      2086 Mar 21 15:31 ckpt-5.index
-rw-r--r--  1 mbp16  staff  12853230 Mar 21 15:31 ckpt-7.data-00000-of-00001
-rw-r--r--  1 mbp16  staff      2086 Mar 21 15:31 ckpt-7.index
-rw-r--r--  1 mbp16  staff  12853230 Mar 21 15:33 ckpt-10.data-00000-of-00001
-rw-r--r--  1 mbp16  staff      2086 Mar 21 15:33 ckpt-

In [20]:
tf.train.latest_checkpoint(checkpoint_dir)

'./myCIFAR10-20210321-152856/ckpt-12'

In [21]:
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fbcdbcb85b0>

In [22]:
!ls -lrt

total 160
-rw-r--r--@  1 mbp16  staff    162 Mar 17 20:05 ~$apter 9 Serving TensorFlow model.docx
-rw-r--r--   1 mbp16  staff    555 Mar 20 19:29 Untitled.ipynb
drwxr-xr-x   3 mbp16  staff     96 Mar 21 15:29 [34mtensorboardlogs[m[m
drwxr-xr-x  19 mbp16  staff    608 Mar 21 15:34 [34mmyCIFAR10-20210321-152856[m[m
-rw-r--r--@  1 mbp16  staff  29112 Mar 21 15:43 OReilly_C9_Distributed_CIFAR_V1.ipynb
-rw-r--r--@  1 mbp16  staff  39008 Mar 21 15:44 Chapter 9 Serving TensorFlow model.docx


In [23]:

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fbcb013b280>

In [25]:
KERAS_MODEL_PATH = "./models/HDF5/tfkeras_cifar10.h5"
model.save(KERAS_MODEL_PATH)

In [26]:
!ls -lrt {KERAS_MODEL_PATH}

-rw-r--r--  1 mbp16  staff  12891752 Mar 21 15:44 ./models/HDF5/tfkeras_cifar10.h5


In [28]:
#Reload h5 model for scoring
# Recreate the exact same model, including its weights and the optimizer
new_h5_model = tf.keras.models.load_model('./models/HDF5/tfkeras_cifar10.h5')

In [29]:
new_h5_model.predict(test_dataset)

array([[9.83229938e-07, 1.24387223e-10, 4.19323258e-02, ...,
        8.77290405e-03, 8.02751785e-08, 5.40408607e-09],
       [2.70792316e-06, 2.05851766e-10, 3.34585016e-03, ...,
        7.20194294e-05, 1.21017573e-12, 2.66253382e-13],
       [1.14282351e-07, 1.07032106e-13, 2.44066771e-02, ...,
        1.19126597e-02, 5.44378054e-06, 3.30457550e-10],
       ...,
       [5.95589782e-08, 1.03695122e-13, 2.33214572e-02, ...,
        3.58368561e-05, 5.46807155e-10, 4.90498440e-12],
       [5.41270783e-05, 7.05444157e-01, 1.26322921e-05, ...,
        1.99455556e-07, 2.05786344e-09, 1.22260246e-06],
       [4.94959898e-17, 5.90443357e-14, 1.07139420e-09, ...,
        9.99999523e-01, 2.54295486e-14, 1.23019809e-16]], dtype=float32)

In [30]:
# Save as protobuf
SAVED_MODEL_PATH = "./models/pb/tfsaved_cifar10"
tf.saved_model.save(model, SAVED_MODEL_PATH)

INFO:tensorflow:Assets written to: ./models/pb/tfsaved_cifar10/assets


In [31]:
!ls -lrt {SAVED_MODEL_PATH}

total 272
drwxr-xr-x  4 mbp16  staff     128 Mar 21 15:45 [34mvariables[m[m
drwxr-xr-x  2 mbp16  staff      64 Mar 21 15:45 [34massets[m[m
-rw-r--r--  1 mbp16  staff  138056 Mar 21 15:45 saved_model.pb


In [32]:
!ls -lrt {SAVED_MODEL_PATH}/variables

total 25120
-rw-r--r--  1 mbp16  staff  12856259 Mar 21 15:45 variables.data-00000-of-00001
-rw-r--r--  1 mbp16  staff      2303 Mar 21 15:45 variables.index


In [33]:
# Loading the model from a path on localhost.
load_strategy = tf.distribute.MirroredStrategy()
with load_strategy.scope():
  load_options = tf.saved_model.LoadOptions(experimental_io_device='/job:localhost')
  loaded_pb = tf.keras.models.load_model(SAVED_MODEL_PATH, options=load_options)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


In [34]:
loaded_pb.predict(test_dataset)

array([[9.83229938e-07, 1.24387223e-10, 4.19323258e-02, ...,
        8.77290405e-03, 8.02751785e-08, 5.40408607e-09],
       [2.70792316e-06, 2.05851766e-10, 3.34585016e-03, ...,
        7.20194294e-05, 1.21017573e-12, 2.66253382e-13],
       [1.14282351e-07, 1.07032106e-13, 2.44066771e-02, ...,
        1.19126597e-02, 5.44378054e-06, 3.30457550e-10],
       ...,
       [5.95589782e-08, 1.03695122e-13, 2.33214572e-02, ...,
        3.58368561e-05, 5.46807155e-10, 4.90498440e-12],
       [5.41270783e-05, 7.05444157e-01, 1.26322921e-05, ...,
        1.99455556e-07, 2.05786344e-09, 1.22260246e-06],
       [4.94959898e-17, 5.90443357e-14, 1.07139420e-09, ...,
        9.99999523e-01, 2.54295486e-14, 1.23019809e-16]], dtype=float32)

In [35]:
print(list(loaded_pb.signatures.keys()))

['serving_default']


In [36]:
loaded_pb.predict(test_images[500:])

array([[9.83229938e-07, 1.24386987e-10, 4.19323482e-02, ...,
        8.77290778e-03, 8.02750222e-08, 5.40406520e-09],
       [2.70792543e-06, 2.05851364e-10, 3.34585016e-03, ...,
        7.20195021e-05, 1.21018039e-12, 2.66253382e-13],
       [1.14282244e-07, 1.07032106e-13, 2.44066827e-02, ...,
        1.19126532e-02, 5.44377508e-06, 3.30457550e-10],
       ...,
       [5.95592056e-08, 1.03695515e-13, 2.33214572e-02, ...,
        3.58369907e-05, 5.46810319e-10, 4.90499351e-12],
       [5.41268419e-05, 7.05444515e-01, 1.26322866e-05, ...,
        1.99454888e-07, 2.05786455e-09, 1.22260064e-06],
       [4.94963670e-17, 5.90443357e-14, 1.07140241e-09, ...,
        9.99999523e-01, 2.54296943e-14, 1.23019809e-16]], dtype=float32)

## TensorFlow Serving
Save the model with a specific structure for TFS.

In [38]:
# Save as protobuf
SAVED_MODEL_PATH = "./models/CIFAR10/001"
tf.saved_model.save(model, SAVED_MODEL_PATH)

INFO:tensorflow:Assets written to: ./models/CIFAR10/001/assets
