# Optimising a TensorFlow SavedModel for Serving

This notebooks shows how to optimise the TensorFlow exported SavedModel by **shrinking** its size (to have less memory and disk footprints), and **improving** prediction latency. This can be accopmlished by applying the following:
* **Freezing**: That is, converting the variables stored in a checkpoint file of the SavedModel into constants stored directly in the model graph.
* **Pruning**: That is, stripping unused nodes during the prediction path of the graph, merging duplicate nodes, as well as removing other node ops like summary, identity, etc.
* **Quantisation**:  That is, converting any large float Const op into an eight-bit equivalent, followed by a float conversion op so that the result is usable by subsequent nodes.
* **Other refinements**: That includes constant folding, batch_norm folding, fusing convolusion, etc.

The optimisation operations we apply in this example are from the TensorFlow [Graph Conversion Tool](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/graph_transforms/README.md#fold_constants), which is a c++ command-line tool. We use the Python APIs to call the c++ libraries. 

The Graph Transform Tool is designed to work on models that are saved as GraphDef files, usually in a binary protobuf format. However, the model exported after training and estimator is in SavedModel format (saved_model.pb file + variables folder with variables.data-* and variables.index files). 

We need to optimise the mode and keep it the SavedModel format. Thus, the optimisation steps will be:
1. Freeze the SavedModel: SavedModel -> GraphDef
2. Optimisae the freezed model: GraphDef -> GraphDef
3. Convert the optimised freezed model to SavedModel: GraphDef -> SavedModel

In [1]:
import os
import sys
import numpy as np
from datetime import datetime
from absl import flags
import tarfile

from six.moves import urllib

import tensorflow as tf

from official.utils.flags import core as flags_core
from official.utils.logs import hooks_helper
from official.resnet import resnet_model
from official.resnet import resnet_run_loop

import matplotlib.pyplot as plt
print ("TensorFlow : {}".format(tf.__version__))

#tf.logging.set_verbosity(tf.logging.INFO)

TensorFlow : 1.13.1


## 1. Train and Export a TensorFlow DNNClassifier

### 1.1 Import Data

In [2]:
MODELS_LOCATION = 'models\\resnet'
MODEL_NAME = 'dnn'
D_DIR= 'dataset'
model_dir = os.path.join(MODELS_LOCATION, MODEL_NAME)
datadir = os.path.join(MODELS_LOCATION, D_DIR)

DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
_HEIGHT = 32
_WIDTH = 32
_NUM_CHANNELS = 3
_DEFAULT_IMAGE_BYTES = _HEIGHT * _WIDTH * _NUM_CHANNELS
# The record is the image plus a one-byte label
_RECORD_BYTES = _DEFAULT_IMAGE_BYTES + 1
_NUM_CLASSES = 10
_NUM_DATA_FILES = 5

_NUM_IMAGES = {
    'train': 50000,
    'validation': 10000,
}

DATASET_NAME = 'CIFAR-10'

print(model_dir)

def checkdata(FLAGS):  
  """Download and extract the tarball from Alex's website."""
  if not os.path.exists(FLAGS.data_dir):
    os.makedirs(FLAGS.data_dir)

  filename = DATA_URL.split('/')[-1]
  filepath = os.path.join(FLAGS.data_dir, filename)

  if not os.path.exists(filepath):
    def _progress(count, block_size, total_size):
      sys.stdout.write('\r>> Downloading %s %.1f%%' % (
          filename, 100.0 * count * block_size / total_size))
      sys.stdout.flush()

    filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
    print()
    statinfo = os.stat(filepath)
    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')

  tarfile.open(filepath, 'r:gz').extractall(FLAGS.data_dir)

models\resnet\dnn


In [3]:
def get_filenames(is_training, data_dir):
  """Returns a list of filenames."""
  data_dir = os.path.join(data_dir, 'cifar-10-batches-bin')

  assert os.path.exists(data_dir), (
      'Run cifar10_download_and_extract.py first to download and extract the '
      'CIFAR-10 data.')

  if is_training:
    return [
        os.path.join(data_dir, 'data_batch_%d.bin' % i)
        for i in range(1, _NUM_DATA_FILES + 1)
    ]
  else:
    return [os.path.join(data_dir, 'test_batch.bin')]


def parse_record(raw_record, is_training):
  """Parse CIFAR-10 image and label from a raw record."""
  # Convert bytes to a vector of uint8 that is record_bytes long.
  record_vector = tf.decode_raw(raw_record, tf.uint8)

  # The first byte represents the label, which we convert from uint8 to int32
  # and then to one-hot.
  label = tf.cast(record_vector[0], tf.int32)

  # The remaining bytes after the label represent the image, which we reshape
  # from [depth * height * width] to [depth, height, width].
  depth_major = tf.reshape(record_vector[1:_RECORD_BYTES],
                           [_NUM_CHANNELS, _HEIGHT, _WIDTH])

  # Convert from [depth, height, width] to [height, width, depth], and cast as
  # float32.
  image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32)

  image = preprocess_image(image, is_training)

  return image, label


def preprocess_image(image, is_training):
  """Preprocess a single image of layout [height, width, depth]."""
  if is_training:
    # Resize the image to add four extra pixels on each side.
    image = tf.image.resize_image_with_crop_or_pad(
        image, _HEIGHT + 8, _WIDTH + 8)

    # Randomly crop a [_HEIGHT, _WIDTH] section of the image.
    image = tf.random_crop(image, [_HEIGHT, _WIDTH, _NUM_CHANNELS])

    # Randomly flip the image horizontally.
    image = tf.image.random_flip_left_right(image)

  # Subtract off the mean and divide by the variance of the pixels.
  image = tf.image.per_image_standardization(image)
  return image


def input_fn(is_training, data_dir, batch_size, num_epochs=1, num_gpus=None):
  """Input_fn using the tf.data input pipeline for CIFAR-10 dataset.

  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_gpus: The number of gpus used for training.

  Returns:
    A dataset that can be used for iteration.
  """
  filenames = get_filenames(is_training, data_dir)
  dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)

  return resnet_run_loop.process_record_dataset(
      dataset=dataset,
      is_training=is_training,
      batch_size=batch_size,
      shuffle_buffer=_NUM_IMAGES['train'],
      parse_record_fn=parse_record,
      num_epochs=num_epochs,
      num_gpus=num_gpus,
      examples_per_epoch=_NUM_IMAGES['train'] if is_training else None
  )


def get_synth_input_fn():
  return resnet_run_loop.get_synth_input_fn(
      _HEIGHT, _WIDTH, _NUM_CHANNELS, _NUM_CLASSES)

### 1.2 Estimator

#### 1.2.1 Model Function

In [4]:
class Cifar10Model(resnet_model.Model):
  """Model class with appropriate defaults for CIFAR-10 data."""

  def __init__(self, resnet_size, data_format=None, num_classes=_NUM_CLASSES,
               resnet_version=resnet_model.DEFAULT_VERSION,
               dtype=resnet_model.DEFAULT_DTYPE):
    """These are the parameters that work for CIFAR-10 data.

    Args:
      resnet_size: The number of convolutional layers needed in the model.
      data_format: Either 'channels_first' or 'channels_last', specifying which
        data format to use when setting up the model.
      num_classes: The number of output classes needed from the model. This
        enables users to extend the same model to their own datasets.
      resnet_version: Integer representing which version of the ResNet network
      to use. See README for details. Valid values: [1, 2]
      dtype: The TensorFlow dtype to use for calculations.

    Raises:
      ValueError: if invalid resnet_size is chosen
    """
    if resnet_size % 6 != 2:
      raise ValueError('resnet_size must be 6n + 2:', resnet_size)

    num_blocks = (resnet_size - 2) // 6

    super(Cifar10Model, self).__init__(
        resnet_size=resnet_size,
        bottleneck=False,
        num_classes=num_classes,
        num_filters=16,
        kernel_size=3,
        conv_stride=1,
        first_pool_size=None,
        first_pool_stride=None,
        block_sizes=[num_blocks] * 3,
        block_strides=[1, 2, 2],
        final_size=64,
        resnet_version=resnet_version,
        data_format=data_format,
        dtype=dtype
    )

In [5]:
def cifar10_model_fn(features, labels, mode, params):
  """Model function for CIFAR-10."""
  print("feature: {}".format(features))
  if isinstance(features,dict):
     features = features['image'] 
  features = tf.reshape(features, [-1, _HEIGHT, _WIDTH, _NUM_CHANNELS])

  learning_rate_fn = resnet_run_loop.learning_rate_with_decay(
      batch_size=params['batch_size'], batch_denom=128,
      num_images=_NUM_IMAGES['train'], boundary_epochs=[100, 150, 200],
      decay_rates=[1, 0.1, 0.01, 0.001])

  # We use a weight decay of 0.0002, which performs better
  # than the 0.0001 that was originally suggested.
  weight_decay = 2e-4

  # Empirical testing showed that including batch_normalization variables
  # in the calculation of regularized loss helped validation accuracy
  # for the CIFAR-10 dataset, perhaps because the regularization prevents
  # overfitting on the small data set. We therefore include all vars when
  # regularizing and computing loss during training.
  def loss_filter_fn(_):
    return True

  return resnet_run_loop.resnet_model_fn(
      features=features,
      labels=labels,
      mode=mode,
      model_class=Cifar10Model,
      resnet_size=params['resnet_size'],
      weight_decay=weight_decay,
      learning_rate_fn=learning_rate_fn,
      momentum=0.9,
      data_format=params['data_format'],
      resnet_version=params['resnet_version'],
      loss_scale=params['loss_scale'],
      loss_filter_fn=loss_filter_fn,
      dtype=params['dtype'],
      fine_tune=params['fine_tune']
  )


### 1.3 Train and Evaluate

#### 1.3.1 Experiment Function

In [6]:
def run_cifar(flags_obj):
  """Run ResNet CIFAR-10 training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.
  """
  input_function = (flags_obj.use_synthetic_data and get_synth_input_fn()
                    or input_fn)
  return resnet_run_loop.resnet_main(
      flags_obj, cifar10_model_fn, input_function, DATASET_NAME,
      shape=[_HEIGHT, _WIDTH, _NUM_CHANNELS])


#### 1.3.3 Run Experiment

In [None]:
def define_cifar_flags():
    resnet_run_loop.define_resnet_flags()
    flags.adopt_module_key_flags(resnet_run_loop)
    flags_core.set_defaults(data_dir=datadir,
                          model_dir=model_dir,
                          resnet_size='32',
                          train_epochs=2,
                          epochs_between_evals=1,
                          batch_size=128)

define_cifar_flags()

if tf.gfile.Exists(model_dir):
    print("Removing previous artifacts...")
    tf.gfile.DeleteRecursively(model_dir)

sys.argv = "-f test".split(" ")
flags.FLAGS(sys.argv)
tf.logging.set_verbosity(tf.logging.INFO)
checkdata(flags.FLAGS)
estimator = run_cifar(flags.FLAGS)



>> Downloading cifar-10-binary.tar.gz 100.0%
Successfully downloaded cifar-10-binary.tar.gz 170052171 bytes.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

INFO:tensorflow:Initializing RunConfig with distribution strategies.


I0506 14:57:11.892715  1928 run_config.py:532] Initializing RunConfig with distribution strategies.


INFO:tensorflow:Not using Distribute Coordinator.


I0506 14:57:11.893713  1928 estimator_training.py:166] Not using Distribute Coordinator.


INFO:tensorflow:Using config: {'_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002028696EAC8>, '_session_config': allow_soft_placement: true
, '_evaluation_master': '', '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_eval_distribute': None, '_log_step_count_steps': 100, '_train_distribute': <tensorflow.contrib.distribute.python.one_device_strategy.OneDeviceStrategy object at 0x0000020287A8F358>, '_save_checkpoints_steps': None, '_global_id_in_cluster': 0, '_task_type': 'worker', '_distribute_coordinator_mode': None, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_save_checkpoints_secs': 600, '_task_id': 0, '_device_fn': None, '_model_dir': 'models\\resnet\\dnn', '_master': '', '_is_chief': True, '_experimental_distribute': None, '_protocol': None, '_service': None, '_num_worker_replicas': 1}


I0506 14:57:11.895707  1928 estimator.py:201] Using config: {'_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002028696EAC8>, '_session_config': allow_soft_placement: true
, '_evaluation_master': '', '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_eval_distribute': None, '_log_step_count_steps': 100, '_train_distribute': <tensorflow.contrib.distribute.python.one_device_strategy.OneDeviceStrategy object at 0x0000020287A8F358>, '_save_checkpoints_steps': None, '_global_id_in_cluster': 0, '_task_type': 'worker', '_distribute_coordinator_mode': None, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_save_checkpoints_secs': 600, '_task_id': 0, '_device_fn': None, '_model_dir': 'models\\resnet\\dnn', '_master': '', '_is_chief': True, '_experimental_distribute': None, '_protocol': None, '_service': None, '_num_worker_replicas': 1}


INFO:tensorflow:Benchmark run: {'machine_config': {'cpu_info': {'cpu_info': 'Intel(R) Core(TM) i5-8250U CPU @ 1.60GHz', 'mhz_per_cpu': 1600.0, 'num_cores': 8}, 'gpu_info': {'count': 0}, 'memory_available': 3000918016, 'memory_total': 8436670464}, 'run_date': '2019-05-06T21:57:11.896706Z', 'tensorflow_version': {'git_hash': "b'unknown'", 'version': '1.13.1'}, 'test_id': None, 'run_parameters': [{'name': 'batch_size', 'long_value': 128}, {'name': 'dtype', 'string_value': "<dtype: 'float32'>"}, {'name': 'resnet_size', 'string_value': '32'}, {'name': 'resnet_version', 'string_value': '2'}, {'name': 'synthetic_data', 'bool_value': 'False'}, {'name': 'train_epochs', 'long_value': 2}], 'tensorflow_environment_variables': [{'name': 'TF_ENABLE_WINOGRAD_NONFUSED', 'value': '1'}], 'model_name': 'resnet', 'dataset': {'name': 'CIFAR-10'}}


I0506 14:57:15.993318  1928 logger.py:151] Benchmark run: {'machine_config': {'cpu_info': {'cpu_info': 'Intel(R) Core(TM) i5-8250U CPU @ 1.60GHz', 'mhz_per_cpu': 1600.0, 'num_cores': 8}, 'gpu_info': {'count': 0}, 'memory_available': 3000918016, 'memory_total': 8436670464}, 'run_date': '2019-05-06T21:57:11.896706Z', 'tensorflow_version': {'git_hash': "b'unknown'", 'version': '1.13.1'}, 'test_id': None, 'run_parameters': [{'name': 'batch_size', 'long_value': 128}, {'name': 'dtype', 'string_value': "<dtype: 'float32'>"}, {'name': 'resnet_size', 'string_value': '32'}, {'name': 'resnet_version', 'string_value': '2'}, {'name': 'synthetic_data', 'bool_value': 'False'}, {'name': 'train_epochs', 'long_value': 2}], 'tensorflow_environment_variables': [{'name': 'TF_ENABLE_WINOGRAD_NONFUSED', 'value': '1'}], 'model_name': 'resnet', 'dataset': {'name': 'CIFAR-10'}}


INFO:tensorflow:Using config: {'_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000020287ABDD68>, '_session_config': allow_soft_placement: true
, '_evaluation_master': '', '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_eval_distribute': None, '_log_step_count_steps': 100, '_train_distribute': <tensorflow.contrib.distribute.python.one_device_strategy.OneDeviceStrategy object at 0x0000020287ABDE80>, '_save_checkpoints_steps': None, '_global_id_in_cluster': 0, '_task_type': 'worker', '_distribute_coordinator_mode': None, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_save_checkpoints_secs': 600, '_task_id': 0, '_device_fn': None, '_model_dir': 'models\\resnet\\dnn', '_master': '', '_is_chief': True, '_experimental_distribute': None, '_protocol': None, '_service': None, '_num_worker_replicas': 1}


I0506 14:57:15.999259  1928 estimator.py:201] Using config: {'_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000020287ABDD68>, '_session_config': allow_soft_placement: true
, '_evaluation_master': '', '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_eval_distribute': None, '_log_step_count_steps': 100, '_train_distribute': <tensorflow.contrib.distribute.python.one_device_strategy.OneDeviceStrategy object at 0x0000020287ABDE80>, '_save_checkpoints_steps': None, '_global_id_in_cluster': 0, '_task_type': 'worker', '_distribute_coordinator_mode': None, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_save_checkpoints_secs': 600, '_task_id': 0, '_device_fn': None, '_model_dir': 'models\\resnet\\dnn', '_master': '', '_is_chief': True, '_experimental_distribute': None, '_protocol': None, '_service': None, '_num_worker_replicas': 1}


INFO:tensorflow:Starting cycle: 0/2


I0506 14:57:16.002240  1928 resnet_run_loop.py:482] Starting cycle: 0/2


Instructions for updating:
Use `tf.data.experimental.map_and_batch(...)`.


W0506 14:57:16.072904  1928 deprecation.py:323] From C:\tmp\git\models\official\resnet\resnet_run_loop.py:98: map_and_batch (from tensorflow.contrib.data.python.ops.batching) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.data.experimental.map_and_batch(...)`.


Instructions for updating:
Colocations handled automatically by placer.


W0506 14:57:16.133448  1928 deprecation.py:323] From c:\users\steve\anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py:3632: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


W0506 14:57:16.229541  1928 deprecation.py:323] From c:\users\steve\anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\image_ops_impl.py:1241: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


INFO:tensorflow:Calling model_fn.


I0506 14:57:16.487293  1928 estimator.py:1111] Calling model_fn.


INFO:tensorflow:Calling model_fn.


I0506 14:57:16.488329  1928 estimator.py:1111] Calling model_fn.


feature: Tensor("IteratorGetNext:0", shape=(?, 32, 32, 3), dtype=float32, device=/device:CPU:0)
Instructions for updating:
Use keras.layers.conv2d instead.


W0506 14:57:16.495277  1928 deprecation.py:323] From C:\tmp\git\models\official\resnet\resnet_model.py:95: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.conv2d instead.


Instructions for updating:
Use keras.layers.batch_normalization instead.


W0506 14:57:16.527101  1928 deprecation.py:323] From C:\tmp\git\models\official\resnet\resnet_model.py:54: batch_normalization (from tensorflow.python.layers.normalization) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.batch_normalization instead.


Instructions for updating:
Use keras.layers.dense instead.


W0506 14:57:18.108753  1928 deprecation.py:323] From C:\tmp\git\models\official\resnet\resnet_model.py:546: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.dense instead.


Instructions for updating:
Use tf.cast instead.


W0506 14:57:18.145655  1928 deprecation.py:323] From c:\users\steve\anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\losses\losses_impl.py:209: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.


Instructions for updating:
Use tf.cast instead.


W0506 14:57:18.599140  1928 deprecation.py:323] From c:\users\steve\anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\ops\math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.


INFO:tensorflow:Done calling model_fn.


I0506 14:57:19.863752  1928 estimator.py:1113] Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


I0506 14:57:19.864749  1928 estimator.py:1113] Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


I0506 14:57:20.099123  1928 basic_session_run_hooks.py:527] Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


I0506 14:57:20.965530  1928 monitored_session.py:222] Graph was finalized.


INFO:tensorflow:Running local_init_op.


I0506 14:57:21.551629  1928 session_manager.py:491] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0506 14:57:21.620287  1928 session_manager.py:493] Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into models\resnet\dnn\model.ckpt.


I0506 14:57:23.218422  1928 basic_session_run_hooks.py:594] Saving checkpoints for 0 into models\resnet\dnn\model.ckpt.


INFO:tensorflow:Initialize strategy


I0506 14:57:23.854934  1928 util.py:164] Initialize strategy


INFO:tensorflow:cross_entropy = 2.397293, learning_rate = 0.1, train_accuracy = 0.1015625


I0506 14:57:29.112064  1928 basic_session_run_hooks.py:249] cross_entropy = 2.397293, learning_rate = 0.1, train_accuracy = 0.1015625


INFO:tensorflow:loss = 2.6375375, step = 0


I0506 14:57:29.115056  1928 basic_session_run_hooks.py:249] loss = 2.6375375, step = 0


### 1.4 Export the model

In [8]:
def make_serving_input_receiver_fn():
    inputs = {'image':tf.placeholder(shape=[None,_HEIGHT, _WIDTH,_NUM_CHANNELS], dtype=tf.float32, name='image')}
    return tf.estimator.export.build_raw_serving_input_receiver_fn(inputs)

export_dir = os.path.join(model_dir, 'export')

if tf.gfile.Exists(export_dir):
    tf.gfile.DeleteRecursively(export_dir)
        
estimator.export_savedmodel(
    export_dir_base=export_dir,
    serving_input_receiver_fn=make_serving_input_receiver_fn()
)

INFO:tensorflow:Calling model_fn.


I0506 10:23:13.596155 46044 estimator.py:1111] Calling model_fn.


feature: {'image': <tf.Tensor 'image:0' shape=(?, 32, 32, 3) dtype=float32>}
INFO:tensorflow:Done calling model_fn.


I0506 10:23:14.553626 46044 estimator.py:1113] Done calling model_fn.


Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.


W0506 10:23:14.554592 46044 deprecation.py:323] From c:\users\steve\anaconda2\envs\tensorflow\lib\site-packages\tensorflow\python\saved_model\signature_def_utils_impl.py:205: build_tensor_info (from tensorflow.python.saved_model.utils_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.


INFO:tensorflow:Signatures INCLUDED in export for Regress: None


I0506 10:23:14.556592 46044 export.py:587] Signatures INCLUDED in export for Regress: None


INFO:tensorflow:Signatures INCLUDED in export for Classify: None


I0506 10:23:14.557586 46044 export.py:587] Signatures INCLUDED in export for Classify: None


INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict', 'serving_default']


I0506 10:23:14.558584 46044 export.py:587] Signatures INCLUDED in export for Predict: ['predict', 'serving_default']


INFO:tensorflow:Signatures INCLUDED in export for Eval: None


I0506 10:23:14.559579 46044 export.py:587] Signatures INCLUDED in export for Eval: None


INFO:tensorflow:Signatures INCLUDED in export for Train: None


I0506 10:23:14.560577 46044 export.py:587] Signatures INCLUDED in export for Train: None


INFO:tensorflow:Restoring parameters from models\resnet\dnn\model.ckpt-782


I0506 10:23:14.730169 46044 saver.py:1270] Restoring parameters from models\resnet\dnn\model.ckpt-782


INFO:tensorflow:Assets added to graph.


I0506 10:23:14.871752 46044 builder_impl.py:654] Assets added to graph.


INFO:tensorflow:No assets to write.


I0506 10:23:14.872750 46044 builder_impl.py:449] No assets to write.


INFO:tensorflow:SavedModel written to: models\resnet\dnn\export\temp-b'1557163393'\saved_model.pb


I0506 10:23:15.086852 46044 builder_impl.py:414] SavedModel written to: models\resnet\dnn\export\temp-b'1557163393'\saved_model.pb


b'models\\resnet\\dnn\\export\\1557163393'

## 2. Inspect the Exported SavedModel

In [9]:
%%bash

saved_models_base=models/resnet/dnn/export/
saved_model_dir=${saved_models_base}$(ls ${saved_models_base} | tail -n 1)
echo ${saved_model_dir}
ls ${saved_model_dir}
saved_model_cli show --dir=${saved_model_dir} --all

models/resnet/dnn/export/1557163393
saved_model.pb
variables

MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['predict']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['image'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 32, 32, 3)
        name: image:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['class_ids'] tensor_info:
        dtype: DT_INT64
        shape: (-1, 1)
        name: strided_slice:0
    outputs['classes'] tensor_info:
        dtype: DT_INT64
        shape: (-1)
        name: ArgMax_2:0
    outputs['probabilities'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 10)
        name: softmax_tensor_1:0
  Method name is: tensorflow/serving/predict

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['image'] tensor_info:
        dtype: DT_FLOAT
        shap

### Prediction with SavedModel

In [15]:
def inference_test(saved_model_dir, signature="predict", input_name='image', batch_size=300, repeat=100):

#    es = dataset.test(datadir)
#    iter = images.make_one_shot_iterator()
    #print ("Eval data shape: {}".format(eval_data.shape))
#    eval_data, eval_labels = iter.get_next()
    tf.logging.set_verbosity(tf.logging.ERROR)
    
    time_start = datetime.utcnow() 
    
    predictor = tf.contrib.predictor.from_saved_model(
        export_dir = saved_model_dir,
        signature_def_key=signature
    )
    time_end = datetime.utcnow() 
        
    time_elapsed = time_end - time_start
   
    print ("")
    print("Model loading time: {} seconds".format(time_elapsed.total_seconds()))
    print ("")
    
    time_start = datetime.utcnow() 
    output = None
#    def eval_input_fn():
#        return dataset.test(datadir).batch(
#            batch_size).make_one_shot_iterator().get_next()       
    
    def eval_input_fn():
        input_function = (flags.FLAGS.use_synthetic_data and get_synth_input_fn()
                    or input_fn)        
        return input_function(is_training=False, data_dir=flags.FLAGS.data_dir,
            batch_size=batch_size,
                num_epochs=1).make_one_shot_iterator().get_next()

    with tf.Session() as sess:
        for i in range(repeat):
            input_images, labels = sess.run(eval_input_fn())        
            #print(input_images[0])
            input_images = input_images.reshape(batch_size, _HEIGHT, _WIDTH, _NUM_CHANNELS)
            output = predictor(
                {
                    input_name: input_images
                }
            )
    
    time_end = datetime.utcnow() 

    time_elapsed_sec = (time_end - time_start).total_seconds()
    
    print ("Inference elapsed time: {} seconds".format(time_elapsed_sec))
    print ("")
    #print ("output {}".format(output))
    print ("Prediction produced for {} instances batch, repeated {} times".format(len(output['class_ids']), repeat))
    print ("Average latency per batch: {} seconds".format(time_elapsed_sec/repeat))
    print ("")
    
    print ("Prediction output for the last instance:")
    for key in output.keys():
        print ("{}: {}".format(key,output[key][0]))

## 3. Test Prediction with SavedModel 

In [16]:
saved_model_dir = os.path.join(export_dir, os.listdir(export_dir)[-1]) 
print(saved_model_dir)
inference_test(saved_model_dir)

models\resnet\dnn\export\1557163393

Model loading time: 0.452602 seconds

Inference elapsed time: 264.680527 seconds

Prediction produced for 300 instances batch, repeated 100 times
Average latency per batch: 2.6468052699999998 seconds

Prediction output for the last instance:
classes: 9
class_ids: [9]
probabilities: [1.2987635e-02 2.2308657e-01 3.0801960e-04 4.4191489e-03 2.3142706e-05
 1.4559949e-03 1.7188782e-03 4.6349727e-04 7.1629445e-05 7.5546551e-01]


### Describe GraphDef

In [17]:
def describe_graph(graph_def, show_nodes=False):
    
    print ('Input Feature Nodes: {}'.format([node.name for node in graph_def.node if node.op=='Placeholder']))
    print ("")
    print ('Unused Nodes: {}'.format([node.name for node in graph_def.node if 'unused'  in node.name]))
    print ("")
    print ('Output Nodes: {}'.format( [node.name for node in graph_def.node if 'predictions' in node.name]))
    print ("")
    print ('Quanitization Nodes: {}'.format( [node.name for node in graph_def.node if 'quant' in node.name]))
    print ("")
    print ('Constant Count: {}'.format( len([node for node in graph_def.node if node.op=='Const'])))
    print ("")
    print ('Variable Count: {}'.format( len([node for node in graph_def.node if 'Variable' in node.op])))
    print ("")
    print ('Identity Count: {}'.format( len([node for node in graph_def.node if node.op=='Identity'])))
    print ("")
    print ('Total nodes: {}'.format( len(graph_def.node)))
    print ('')
    node_names = [node.name for node in graph_def.node if node.op=='Identity']
    print(node_names)

    if show_nodes==True:
        for node in graph_def.node:
            print ('Op:{} - Name: {}'.format(node.op, node.name))

## 4. Describe the SavedModel Graph (before optimisation)

### Load GraphDef from a SavedModel Directory

In [18]:
def get_graph_def_from_saved_model(saved_model_dir):
    
    print (saved_model_dir)
    print ("")
    
    from tensorflow.python.saved_model import tag_constants
    
    with tf.Session() as session:
        meta_graph_def = tf.saved_model.loader.load(
            session,
            tags=[tag_constants.SERVING],
            export_dir=saved_model_dir
        )
        
    return meta_graph_def.graph_def

In [19]:
describe_graph(get_graph_def_from_saved_model(saved_model_dir))

models\resnet\dnn\export\1557163393

Input Feature Nodes: ['image']

Unused Nodes: []

Output Nodes: []

Quanitization Nodes: []

Constant Count: 318

Variable Count: 161

Identity Count: 169

Total nodes: 1218

['global_step/read', 'resnet_model/conv2d/kernel/read', 'resnet_model/initial_conv', 'resnet_model/batch_normalization/gamma/read', 'resnet_model/batch_normalization/beta/read', 'resnet_model/batch_normalization/moving_mean/read', 'resnet_model/batch_normalization/moving_variance/read', 'resnet_model/conv2d_1/kernel/read', 'resnet_model/conv2d_2/kernel/read', 'resnet_model/batch_normalization_1/gamma/read', 'resnet_model/batch_normalization_1/beta/read', 'resnet_model/batch_normalization_1/moving_mean/read', 'resnet_model/batch_normalization_1/moving_variance/read', 'resnet_model/conv2d_3/kernel/read', 'resnet_model/batch_normalization_2/gamma/read', 'resnet_model/batch_normalization_2/beta/read', 'resnet_model/batch_normalization_2/moving_mean/read', 'resnet_model/batch_normal

### Get model size

In [20]:
def get_size(model_dir):
    
    print (model_dir)
    print ("")
    
    pb_size = os.path.getsize(os.path.join(model_dir,'saved_model.pb'))
    
    variables_size = 0
    if os.path.exists(os.path.join(model_dir,'variables/variables.data-00000-of-00001')):
        variables_size = os.path.getsize(os.path.join(model_dir,'variables/variables.data-00000-of-00001'))
        variables_size += os.path.getsize(os.path.join(model_dir,'variables/variables.index'))

    print ("Model size: {} KB".format(round(pb_size/(1024.0),3)))
    print ("Variables size: {} KB".format(round( variables_size/(1024.0),3)))
    print ("Total Size: {} KB".format(round((pb_size + variables_size)/(1024.0),3)))
    

In [21]:
get_size(saved_model_dir)

models\resnet\dnn\export\1557163393

Model size: 350.947 KB
Variables size: 1838.546 KB
Total Size: 2189.493 KB


## 5. Freeze SavedModel

This function will convert the SavedModel into a GraphDef file (freezed_model.pb), and storing the variables as constrant to the freezed_model.pb

You need to define the graph output nodes for freezing. We are only interested in the **class_id**, which is produced by **head/predictions/ExpandDims** node

In [22]:
def freeze_graph(saved_model_dir):
    
    from tensorflow.python.tools import freeze_graph
    from tensorflow.python.saved_model import tag_constants
    
    output_graph_filename = os.path.join(saved_model_dir, "freezed_model.pb")
#    output_node_names = "head/predictions/ExpandDims"
#    output_node_names = "PREDICT/predictions/probabilities"
    output_node_names = "strided_slice"
    
    initializer_nodes = ""

    freeze_graph.freeze_graph(
        input_saved_model_dir=saved_model_dir,
        output_graph=output_graph_filename,
        saved_model_tags = tag_constants.SERVING,
        output_node_names=output_node_names,
        initializer_nodes=initializer_nodes,

        input_graph=None, 
        input_saver=False,
        input_binary=False, 
        input_checkpoint=None, 
        restore_op_name=None, 
        filename_tensor_name=None, 
        clear_devices=False,
        input_meta_graph=False,
    )
    
    print ("SavedModel graph freezed!")

In [23]:
node_names = [node.name for node in tf.get_default_graph().as_graph_def().node]

freeze_graph(saved_model_dir)

SavedModel graph freezed!


In [24]:
%%bash
saved_models_base=models/mnist/cnn_classifier/export/
saved_model_dir=${saved_models_base}$(ls ${saved_models_base} | tail -n 1)
echo ${saved_model_dir}
ls ${saved_model_dir}

models/mnist/cnn_classifier/export/1556574193
freezed_model.pb
optimised_model.pb
saved_model.pb
variables


## 6. Describe the freezed_model.pb Graph (after freezing)

### Load GraphDef from GraphDef File

In [25]:
def get_graph_def_from_file(graph_filepath):
    
    print (graph_filepath)
    print ("")
    
    from tensorflow.python import ops
    
    with ops.Graph().as_default():
        with tf.gfile.GFile(graph_filepath, "rb") as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())
            
            return graph_def
            

In [26]:
freezed_filepath=os.path.join(saved_model_dir,'freezed_model.pb')
describe_graph(get_graph_def_from_file(freezed_filepath))

models\resnet\dnn\export\1557163393\freezed_model.pb

Input Feature Nodes: ['image']

Unused Nodes: []

Output Nodes: []

Quanitization Nodes: []

Constant Count: 171

Variable Count: 0

Identity Count: 166

Total nodes: 460

['resnet_model/conv2d/kernel/read', 'resnet_model/initial_conv', 'resnet_model/batch_normalization/gamma/read', 'resnet_model/batch_normalization/beta/read', 'resnet_model/batch_normalization/moving_mean/read', 'resnet_model/batch_normalization/moving_variance/read', 'resnet_model/conv2d_1/kernel/read', 'resnet_model/conv2d_2/kernel/read', 'resnet_model/batch_normalization_1/gamma/read', 'resnet_model/batch_normalization_1/beta/read', 'resnet_model/batch_normalization_1/moving_mean/read', 'resnet_model/batch_normalization_1/moving_variance/read', 'resnet_model/conv2d_3/kernel/read', 'resnet_model/batch_normalization_2/gamma/read', 'resnet_model/batch_normalization_2/beta/read', 'resnet_model/batch_normalization_2/moving_mean/read', 'resnet_model/batch_normalizatio

## 8. Optimise the freezed_model.pb

### Optimise GraphDef

In [27]:
def optimize_graph(model_dir, graph_filename, transforms):
    
    from tensorflow.tools.graph_transforms import TransformGraph
    
    input_names = []
#    output_names = ['head/predictions/ExpandDims']
    output_names = ['strided_slice']
    
    graph_def = get_graph_def_from_file(os.path.join(model_dir, graph_filename))
    optimised_graph_def = TransformGraph(graph_def, 
                                         input_names,
                                         output_names,
                                         transforms 
                                        )
    tf.train.write_graph(optimised_graph_def,
                        logdir=model_dir,
                        as_text=False,
                        name='optimised_model.pb')
    
    print ("Freezed graph optimised!")

In [28]:
transforms = [
    'remove_nodes(op=Identity)', 
    'fold_constants(ignore_errors=true)',
    'fold_batch_norms',
#    'fuse_resize_pad_and_conv',
#    'quantize_weights',
#    'quantize_nodes',
    'merge_duplicate_nodes',
    'strip_unused_nodes', 
    'sort_by_execution_order'
]

optimize_graph(saved_model_dir, 'freezed_model.pb', transforms)

models\resnet\dnn\export\1557163393\freezed_model.pb

Freezed graph optimised!


In [29]:
%%bash
saved_models_base=models/mnist/cnn_classifier/export/
saved_model_dir=${saved_models_base}$(ls ${saved_models_base} | tail -n 1)
echo ${saved_model_dir}
ls ${saved_model_dir}

models/mnist/cnn_classifier/export/1556574193
freezed_model.pb
optimised_model.pb
saved_model.pb
variables


## 8. Describe the Optimised Graph

In [30]:
optimised_filepath=os.path.join(saved_model_dir,'optimised_model.pb')
describe_graph(get_graph_def_from_file(optimised_filepath))

models\resnet\dnn\export\1557163393\optimised_model.pb

Input Feature Nodes: ['image']

Unused Nodes: []

Output Nodes: []

Quanitization Nodes: []

Constant Count: 168

Variable Count: 0

Identity Count: 0

Total nodes: 291

[]


## 9. Convert Optimised graph (GraphDef) to SavedModel

In [31]:
def convert_graph_def_to_saved_model(graph_filepath):

    from tensorflow.python import ops
    export_dir=os.path.join(saved_model_dir,'optimised')

    if tf.gfile.Exists(export_dir):
        tf.gfile.DeleteRecursively(export_dir)

    graph_def = get_graph_def_from_file(graph_filepath)
    
    with tf.Session(graph=tf.Graph()) as session:
        tf.import_graph_def(graph_def, name="")
        tf.saved_model.simple_save(session,
                export_dir,
                inputs={
                    node.name: session.graph.get_tensor_by_name("{}:0".format(node.name)) 
                    for node in graph_def.node if node.op=='Placeholder'},
                outputs={
                    "class_ids": session.graph.get_tensor_by_name("strided_slice:0"),
                }
            )

        print ("Optimised graph converted to SavedModel!")

In [32]:
optimised_filepath=os.path.join(saved_model_dir,'optimised_model.pb')
convert_graph_def_to_saved_model(optimised_filepath)

models\resnet\dnn\export\1557163393\optimised_model.pb

Optimised graph converted to SavedModel!


### Optimised SavedModel Size

In [33]:
optimised_saved_model_dir = os.path.join(saved_model_dir,'optimised') 
get_size(optimised_saved_model_dir)

models\resnet\dnn\export\1557163393\optimised

Model size: 1885.037 KB
Variables size: 0.0 KB
Total Size: 1885.037 KB


In [37]:
%%bash

saved_models_base=models/resnet/dnn/export/
saved_model_dir=${saved_models_base}$(ls ${saved_models_base} | tail -n 1)/optimised
ls ${saved_model_dir}
saved_model_cli show --dir ${saved_model_dir} --all

saved_model.pb
variables

MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['image'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 32, 32, 3)
        name: image:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['class_ids'] tensor_info:
        dtype: DT_INT64
        shape: (-1, 1)
        name: strided_slice:0
  Method name is: tensorflow/serving/predict


## 10. Prediction with the Optimised SavedModel

In [38]:
optimised_saved_model_dir = os.path.join(saved_model_dir,'optimised') 
print(optimised_saved_model_dir)
inference_test(saved_model_dir=optimised_saved_model_dir, signature='serving_default', input_name='image')

models\resnet\dnn\export\1557163393\optimised

Model loading time: 0.088734 seconds

Inference elapsed time: 737.303699 seconds

Prediction produced for 300 instances batch, repeated 100 times
Average latency per batch: 7.37303699 seconds

Prediction output for the last instance:
class_ids: [9]


# Cloud ML Engine Deployment and Prediction

In [32]:
PROJECT = 'steven-wang-playground'
BUCKET = 'steven-gcs-cloudml'
REGION = 'europe-west1'
MODEL_NAME = 'mnist_classifier'

os.environ['BUCKET'] = BUCKET
os.environ['PROJECT'] = PROJECT
os.environ['REGION'] = REGION
os.environ['MODEL_NAME'] = MODEL_NAME

## 1. Upload the model artefacts to Google Cloud Storage bucket

In [None]:
%%bash

gsutil -m rm -r gs://${BUCKET}/tf-model-optimisation

In [None]:
%%bash

saved_models_base=models/mnist/cnn_classifier/export/
saved_model_dir=${saved_models_base}$(ls ${saved_models_base} | tail -n 1)

echo ${saved_model_dir}

gsutil -m cp -r ${saved_model_dir} gs://${BUCKET}/tf-model-optimisation/original

In [None]:
%%bash

saved_models_base=models/mnist/cnn_classifier/export/
saved_model_dir=${saved_models_base}$(ls ${saved_models_base} | tail -n 1)/optimised

echo ${saved_model_dir}

gsutil -m cp -r ${saved_model_dir} gs://${BUCKET}/tf-model-optimisation

## 2. Deploy models to Cloud ML Engine

Don't forget to delete the model and the model version if they were previously deployed!

In [None]:
%%bash

echo ${MODEL_NAME}

gcloud ml-engine models create ${MODEL_NAME} --regions=${REGION}

**Version: v_org** is the original SavedModel (before optimisation)

In [None]:
%%bash

MODEL_VERSION='v_org'
MODEL_ORIGIN=gs://${BUCKET}/tf-model-optimisation/original

gcloud ml-engine versions create ${MODEL_VERSION}\
            --model=${MODEL_NAME} \
            --origin=${MODEL_ORIGIN} \
            --runtime-version=1.10

**Version: v_opt** is the optimised SavedModel (after optimisation)

In [None]:
%%bash

MODEL_VERSION='v_opt'
MODEL_ORIGIN=gs://${BUCKET}/tf-model-optimisation/optimised

gcloud ml-engine versions create ${MODEL_VERSION}\
            --model=${MODEL_NAME} \
            --origin=${MODEL_ORIGIN} \
            --runtime-version=1.10

## 3. Cloud ML Engine online predictions

In [None]:
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials

credentials = GoogleCredentials.get_application_default()
api = discovery.build(
    'ml', 'v1', 
    credentials=credentials, 
    discoveryServiceUrl='https://storage.googleapis.com/cloud-ml/discovery/ml_v1_discovery.json'
)

    
def predict(version, instances):

    request_data = {'instances': instances}

    model_url = 'projects/{}/models/{}/versions/{}'.format(PROJECT, MODEL_NAME, version)
    response = api.projects().predict(body=request_data, name=model_url).execute()

    class_ids = None
    
    try:
        class_ids = [item["class_ids"] for item in response["predictions"]]
    except:
        print response
    
    return class_ids

In [None]:
def inference_cmle(version, batch=100, repeat=10):
    
    instances = [
            {'input_image': [float(i) for i in list(eval_data[img])] }
        for img in range(batch)
    ]

    #warmup request
    predict(version, instances[0])
    print 'Warm up request performed!'
    print 'Timer started...'
    print ''
    
    time_start = datetime.utcnow() 
    output = None
    
    for i in range(repeat):
        output = predict(version, instances)
    
    time_end = datetime.utcnow() 

    time_elapsed_sec = (time_end - time_start).total_seconds()
    
    print "Inference elapsed time: {} seconds".format(time_elapsed_sec)
    print ""
    
    print "Prediction produced for {} instances batch, repeated {} times".format(len(output), repeat)
    print "Average latency per batch: {} seconds".format(time_elapsed_sec/repeat)
    print ""
    
    print "Prediction output for the last instance: {}".format(output[0])


In [None]:
version='v_org'
inference_cmle(version)

In [None]:
version='v_opt'
inference_cmle(version)

## Happy serving!