# Convert saved_model to ONNX

Requirement: tf2onnx and onnxruntime needs to be installed
```
pip install tf2onnx
pip install onnxruntime
```

First fix batch size of Keras model

In [1]:
# Show inputs and outputs of the saved_model
!saved_model_cli show --dir saved_model/mobilenetv2 --tag_set serve --signature_def serving_default

2020-09-27 23:41:52.111385: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
The given SavedModel SignatureDef contains the following input(s):
  inputs['Input'] tensor_info:
      dtype: DT_FLOAT
      shape: (-1, 160, 160, 3)
      name: serving_default_Input:0
The given SavedModel SignatureDef contains the following output(s):
  outputs['predictions'] tensor_info:
      dtype: DT_FLOAT
      shape: (-1, 1)
      name: StatefulPartitionedCall:0
Method name is: tensorflow/serving/predict


In [2]:
# Convert the model to ONNX format
!python3 -m tf2onnx.convert --opset 10 --fold_const --saved-model saved_model/mobilenetv2 --output saved_model/mobilenetv2_ONNX/model.onnx

2020-09-27 23:41:54.875292: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
2020-09-27 23:41:55.924140: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2020-09-27 23:41:56.605934: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-09-27 23:41:56.606634: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:42:00.0 name: GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.65GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2020-09-27 23:41:56.606659: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
2020-09-27 23:41:56.607898: I tensorflow/stream_executo

Test that the onnx model works by performing inference with onnxruntime

In [3]:
import onnxruntime as rt
import tensorflow as tf
import tensorflow_datasets as tfds
import time

In [4]:
tfds.disable_progress_bar()
ds, metadata = tfds.load(
    'cats_vs_dogs',
    split='train',
    with_info=True,
    as_supervised=True)
get_label_name = metadata.features['label'].int2str
decode_prediction = lambda x: 1 if x>=0 else 0

In [5]:
sess = rt.InferenceSession('saved_model/mobilenetv2_ONNX/model.onnx')
outputs = ["Identity:0"]

n_predictions = 0
n_correct_predictions = 0
start_time = time.time()
for image, label in ds.take(1000):
    x = tf.cast(image, tf.float32)
    x = (x/127.5)-1
    x = tf.image.resize(x, (160,160))
    x = tf.expand_dims(x, axis=0)
    x = x.numpy()

    preds = sess.run(outputs, {"input:0": x})

    n_predictions += 1
    prediction = preds[0][0,0] # only process first object at first batch index
    decoded_pred = decode_prediction(prediction)
    correct_prediction = label.numpy() == decoded_pred
    if correct_prediction:
        n_correct_predictions += 1
elapsed_time = time.time() - start_time
accuracy = n_correct_predictions / n_predictions
print('predicteded {} images with accuracy of {:.2f}% with a rate of {:.2f} images/s'.format(n_predictions, accuracy * 100, n_predictions/elapsed_time))

predicteded 1000 images with accuracy of 99.90% with a rate of 89.79 images/s
