## Installation

In [1]:
!pip install transformers onnxruntime==1.11.0 tf2onnx -q

In [2]:
!pip install tensorflow -q

## Imports

In [10]:
from transformers import ViTFeatureExtractor, TFViTForImageClassification
import numpy as np

import onnx
import timeit
import tf2onnx
import tensorflow as tf
import onnxruntime as ort

## Load model and feature extractor

In [4]:
model_ckpt = "google/vit-base-patch16-224"

model = TFViTForImageClassification.from_pretrained(model_ckpt)
feature_extractor = ViTFeatureExtractor.from_pretrained(model_ckpt)

Downloading config.json: 100%|██████████| 68.0k/68.0k [00:00<00:00, 2.40MB/s]
Downloading tf_model.h5: 100%|██████████| 330M/330M [00:03<00:00, 97.1MB/s] 
2022-07-30 13:47:50.073866: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-07-30 13:47:50.073906: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-07-30 13:47:50.073924: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (c2-onnx): /proc/driver/nvidia/version does not exist
2022-07-30 13:47:50.074160: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operatio

## Convert to ONNX

In [5]:
input_size = feature_extractor.size
input_signature = [
    tf.TensorSpec([None, 3, input_size, input_size], tf.float32, name="pixel_values")
]
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature, opset=15)
onnx_model_path = model_ckpt.split("/")[-1] + ".onnx"
onnx.save(onnx_model, onnx_model_path)

2022-07-30 13:48:35.619513: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2022-07-30 13:48:35.619679: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session


Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`


2022-07-30 13:48:44.081644: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2022-07-30 13:48:44.082179: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session


In [6]:
!ls -lh {onnx_model_path}

-rw-r--r-- 1 jupyter jupyter 331M Jul 30 13:49 vit-base-patch16-224.onnx


## Benchmarking speed

In [7]:
dummy_inputs = tf.random.normal((1, 3, input_size, input_size))
dummy_inputs_numpy = dummy_inputs.numpy()

In [8]:
tf_outputs = model(dummy_inputs, training=False)

sess = ort.InferenceSession(onnx_model_path)
ort_outputs = sess.run(None, {"pixel_values": dummy_inputs_numpy})

np.allclose(tf_outputs.logits.numpy(), ort_outputs, rtol=1e-5, atol=1e-05)

True

In [11]:
print("Benchmarking TF model...")
for _ in range(2):
    _ = model(dummy_inputs, training=False)

# Timing
tf_runtimes = timeit.repeat(
    lambda: model(dummy_inputs, training=False), number=1, repeat=25
)
print(f"Average latency (seconds): {np.mean(tf_runtimes)}.")

Benchmarking TF model...
Average latency (seconds): 0.33623785984000276.


In [12]:
for _ in range(2):
    _ = sess.run(None, {"pixel_values": dummy_inputs_numpy})

# Timing
onnx_runtimes = timeit.repeat(
    lambda: sess.run(None, {"pixel_values": dummy_inputs_numpy}), number=1, repeat=25
)
print(f"Average latency (seconds): {np.mean(onnx_runtimes)}.")

Average latency (seconds): 0.21896604576000755.
