In [1]:
import time
import common
import tensorflow as tf
import tensorrt as trt
import tensorflow_datasets as tfds
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

Load the TRT engine and do inference

In [2]:
tfds.disable_progress_bar()
ds, metadata = tfds.load(
    'cats_vs_dogs',
    split='train',
    with_info=True,
    as_supervised=True)
get_label_name = metadata.features['label'].int2str
decode_prediction = lambda x: 1 if x>=0 else 0


In [3]:
with open('saved_model/mobilenetv2_ONNX/model.engine', 'rb') as f, \
        trt.Runtime(TRT_LOGGER) as runtime:
    with runtime.deserialize_cuda_engine(f.read()) as engine, \
            engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)

        n_predictions = 0
        n_correct_predictions = 0
        start_time = time.time()
        for image, label in ds.take(1000):
            x = tf.cast(image, tf.float32)
            x = (x/127.5)-1
            x = tf.image.resize(x, (160,160))
            x = tf.expand_dims(x, axis=0)
            inputs[0].host = x

            preds =  common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
            n_predictions += 1
            prediction = preds[0][0] # only process first object at first batch index
            decoded_pred = decode_prediction(prediction)
            correct_prediction = label == decoded_pred
            if correct_prediction:
                n_correct_predictions += 1
        elapsed_time = time.time() - start_time
        accuracy = n_correct_predictions / n_predictions
        print('predicted {} images with accuracy of {:.2f}% with a rate of {:.2f} images/s'.format(n_predictions, accuracy * 100, n_predictions/elapsed_time))

predicted 1000 images with accuracy of 99.00% with a rate of 227.98 images/s
