# Import Libraries

In [1]:
import tqdm
import tensorflow as tf
import numpy as np

from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet  import preprocess_input
from tensorflow.python.compiler.tensorrt import trt_convert as trt

# Prepare Test Data Set

In [2]:
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

In [3]:
test_dataset = test_datagen.flow_from_directory(
    directory='./CNN_Transfer_Learning_for_Cats_versus_Dogs/test',
    target_size=(150, 150), 
    batch_size=32,
    shuffle=True,
    class_mode='binary',
    seed=21
)

X_test, y_test = next(test_dataset)

for i in tqdm.tqdm(range(len(test_dataset)-1)): 
    img, label = next(test_dataset)
    X_test = np.append(X_test, img, axis=0)
    y_test = np.append(y_test, label, axis=0)
    
X_test_sample = X_test[:32]
y_test_sample = y_test[:32]

Found 1249 images belonging to 2 classes.


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:45<00:00,  1.18s/it]


# Load Tensorflow Model

In [4]:
model = tf.keras.models.load_model('my_model')

2022-10-17 11:09:36.976872: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-17 11:09:36.989114: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-17 11:09:36.990321: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-17 11:09:36.991652: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate

# Test Inference with TensorFlow TensorRT FP16

In [5]:
conversion_params = trt.TrtConversionParams(
    precision_mode=trt.TrtPrecisionMode.FP16,
)

converter = trt.TrtGraphConverterV2(
    input_saved_model_dir='my_model',
    conversion_params=conversion_params
)

converter.convert()

INFO:tensorflow:Linked TensorRT version: (7, 2, 2)
INFO:tensorflow:Loaded TensorRT version: (7, 2, 2)


2022-10-17 11:09:44.570643: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-17 11:09:44.570698: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2022-10-17 11:09:44.570843: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2022-10-17 11:09:44.574047: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-17 11:09:44.575797: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-17 11:09:44.577377: I tensorflow/stream_executor/cuda/cuda_gpu_exec

<ConcreteFunction pruned(resnet50_input) at 0x7F626A5ABBB0>

In [6]:
converter.save('my_quantized_model_FP16')
FP16_model = tf.saved_model.load('my_quantized_model_FP16')
func_FP16 = FP16_model.signatures["serving_default"]

INFO:tensorflow:Could not find TRTEngineOp_0_0 in TF-TRT cache. This can happen if build() is not called, which means TensorRT engines will be built and cached at runtime.


2022-10-17 11:09:48.188531: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at trt_engine_resource_ops.cc:198 : NOT_FOUND: Container TF-TRT does not exist. (Could not find resource: TF-TRT/TRTEngineOp_0_0)


INFO:tensorflow:Assets written to: my_quantized_model_FP16/assets


In [7]:
predictions_proba_FP16 = func_FP16(resnet50_input=X_test_sample)
predictions_proba_FP16 = np.array(predictions_proba_FP16['dense_3'])
y_predict_FP16 = predictions_proba_FP16.reshape(1,-1)[0]>0.5

print(classification_report(y_test_sample, y_predict_FP16,digits=4,target_names=['Cat','Dog']))

2022-10-17 11:09:56.983968: I tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc:436] TRTEngineOp not using explicit QDQ
2022-10-17 11:09:56.989679: I tensorflow/compiler/tf2tensorrt/common/utils.cc:94] Linked TensorRT version: 7.2.2
2022-10-17 11:09:56.989948: I tensorflow/compiler/tf2tensorrt/common/utils.cc:96] Loaded TensorRT version: 7.2.2


              precision    recall  f1-score   support

         Cat     1.0000    0.9375    0.9677        16
         Dog     0.9412    1.0000    0.9697        16

    accuracy                         0.9688        32
   macro avg     0.9706    0.9688    0.9687        32
weighted avg     0.9706    0.9688    0.9687        32



In [8]:
predictions_proba = model.predict(X_test_sample)
y_predict = predictions_proba.reshape(1,-1)[0]>0.5

print(classification_report(y_test_sample, y_predict,digits=4,target_names=['Cat','Dog']))

2022-10-17 11:10:38.292197: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8100
2022-10-17 11:10:38.602951: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


              precision    recall  f1-score   support

         Cat     1.0000    0.9375    0.9677        16
         Dog     0.9412    1.0000    0.9697        16

    accuracy                         0.9688        32
   macro avg     0.9706    0.9688    0.9687        32
weighted avg     0.9706    0.9688    0.9687        32



2022-10-17 11:10:39.408687: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


In [9]:
print("Original Model Probabilities | FP16 Model Probabilities")
list(zip(predictions_proba.reshape(1,-1)[0],predictions_proba_FP16.reshape(1,-1)[0]))[:10]

Original Model Probabilities | FP16 Model Probabilities


[(0.94051033, 0.9404625),
 (0.042042427, 0.042009056),
 (0.7816651, 0.7821637),
 (0.9119552, 0.9119669),
 (0.4342294, 0.4324828),
 (0.07813772, 0.078925885),
 (0.5645249, 0.56481874),
 (0.90995723, 0.91006696),
 (0.024338817, 0.02433019),
 (0.07112902, 0.071073666)]

# Time Inference With TensorRT

In [10]:
%%timeit
prediction = func_FP16(resnet50_input=X_test_sample)

12 ms ± 205 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Time Inference Without TensorRT

In [11]:
%%timeit
prediction = model.predict(X_test_sample)

80.4 ms ± 589 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
