# Import Libraries

In [1]:
import tqdm
import tensorflow as tf
import numpy as np

from sklearn.metrics import classification_report
from imutils import paths
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications.resnet  import preprocess_input
from tensorflow.python.compiler.tensorrt import trt_convert as trt

# Prepare Test Data Set

In [2]:
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

In [3]:
test_dataset = test_datagen.flow_from_directory(
    directory='./CNN_Transfer_Learning_for_Cats_versus_Dogs/test',
    target_size=(150, 150), 
    batch_size=32,
    shuffle=True,
    class_mode='binary',
    seed=21
)

X_test, y_test = next(test_dataset)

for i in tqdm.tqdm(range(len(test_dataset)-1)): 
    img, label = next(test_dataset)
    X_test = np.append(X_test, img, axis=0)
    y_test = np.append(y_test, label, axis=0)
    
X_test_sample = X_test[:32]
y_test_sample = y_test[:32]

Found 1249 images belonging to 2 classes.


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:46<00:00,  1.18s/it]


# Load Tensorflow Model

In [4]:
model = tf.keras.models.load_model('my_model')

2022-10-17 11:04:57.761638: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-17 11:04:57.783464: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-17 11:04:57.785288: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-17 11:04:57.786285: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate

# Test Inference with TensorFlow TensorRT INT8

In [5]:
batch_size = 8
batched_input = np.zeros((batch_size, 150, 150, 3), dtype=np.float32)

In [6]:
img_path_dog = './CNN_Transfer_Learning_for_Cats_versus_Dogs/train/Dog'
img_path_cat = './CNN_Transfer_Learning_for_Cats_versus_Dogs/train/Cat'

In [7]:
np.random.seed(42)

dog_images = np.random.choice(list(paths.list_images(img_path_dog)),4)
cat_images = np.random.choice(list(paths.list_images(img_path_cat)),4)

all_images = np.concatenate([dog_images,cat_images])

In [8]:
for index,img_path in enumerate(all_images):
    img = load_img(img_path, target_size=(150, 150))
    x = img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    batched_input[index, :] = x
    
batched_input = tf.constant(batched_input)

def calibration_input_fn():
    yield (batched_input, )

In [9]:
conversion_params = trt.TrtConversionParams(
    precision_mode=trt.TrtPrecisionMode.INT8
)

converter = trt.TrtGraphConverterV2(
    input_saved_model_dir='my_model',
    conversion_params=conversion_params
)

converter.convert(calibration_input_fn=calibration_input_fn)

INFO:tensorflow:Linked TensorRT version: (7, 2, 2)
INFO:tensorflow:Loaded TensorRT version: (7, 2, 2)


2022-10-17 11:05:05.667273: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-17 11:05:05.667327: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2022-10-17 11:05:05.667872: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2022-10-17 11:05:05.671309: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-17 11:05:05.672771: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-17 11:05:05.674268: I tensorflow/stream_executor/cuda/cuda_gpu_exec

<ConcreteFunction pruned(resnet50_input) at 0x7F550BCD7820>

In [10]:
converter.save('my_quantized_model_INT8')
INT8_model = tf.saved_model.load('my_quantized_model_INT8')
func_INT8 = INT8_model.signatures["serving_default"]

INFO:tensorflow:Assets written to: my_quantized_model_INT8/assets


In [11]:
predictions_proba_INT8 = func_INT8(resnet50_input=X_test_sample)
predictions_proba_INT8 = np.array(predictions_proba_INT8['dense_3'])
y_predict_INT8 = predictions_proba_INT8.reshape(1,-1)[0]>0.5

print(classification_report(y_test_sample, y_predict_INT8,digits=4,target_names=['Cat','Dog']))

2022-10-17 11:06:34.668869: I tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc:436] TRTEngineOp not using explicit QDQ


              precision    recall  f1-score   support

         Cat     1.0000    0.9375    0.9677        16
         Dog     0.9412    1.0000    0.9697        16

    accuracy                         0.9688        32
   macro avg     0.9706    0.9688    0.9687        32
weighted avg     0.9706    0.9688    0.9687        32



In [12]:
predictions_proba = model.predict(X_test_sample)
y_predict = predictions_proba.reshape(1,-1)[0]>0.5

print(classification_report(y_test_sample, y_predict,digits=4,target_names=['Cat','Dog']))

              precision    recall  f1-score   support

         Cat     1.0000    0.9375    0.9677        16
         Dog     0.9412    1.0000    0.9697        16

    accuracy                         0.9688        32
   macro avg     0.9706    0.9688    0.9687        32
weighted avg     0.9706    0.9688    0.9687        32



In [13]:
print("Original Model Probabilities | INT8 Model Probabilities")
list(zip(predictions_proba.reshape(1,-1)[0],predictions_proba_INT8.reshape(1,-1)[0]))[:10]

Original Model Probabilities | INT8 Model Probabilities


[(0.94052213, 0.9364014),
 (0.042032767, 0.044680867),
 (0.78132147, 0.7791538),
 (0.91191137, 0.91949815),
 (0.43425876, 0.42214724),
 (0.07807078, 0.07935289),
 (0.56445783, 0.57606506),
 (0.90999925, 0.92093205),
 (0.024335448, 0.031084951),
 (0.07119049, 0.06828854)]

# Time Inference With TensorRT

In [14]:
%%timeit
prediction = func_INT8(resnet50_input=X_test_sample)

8.09 ms ± 170 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Time Inference Without TensorRT

In [15]:
%%timeit
prediction = model.predict(X_test_sample)

81.5 ms ± 1.01 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
