In [1]:
#https://keras.io/api/applications/vgg/

# Imports

In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg19 import preprocess_input, decode_predictions
import numpy as np
import tensorrt as trt
from tensorflow.python.compiler.tensorrt import trt_convert
import random
import multiprocessing
from tensorflow.python.saved_model import signature_constants, tag_constants
from tensorflow.python.framework import convert_to_constants
import subprocess as sp
import os


# Load VGG19 model

In [None]:
def load_model():

    model = tf.keras.applications.VGG19(
        include_top=True,
        weights="imagenet",
        input_tensor=None,
        input_shape=None,
        pooling=None,
        classes=1000,
        classifier_activation=None,
    )
    return model

In [None]:
model = load_model()
opt = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer=opt, metrics=['accuracy'])
model.summary()

In [None]:
print(f'Input Shape{model.input_shape}')

# Test base model

In [3]:


img_path = 'assets/test_image.jpg'
#There is an interpolation method to match the source size with the target size
#image loaded in PIL (Python Imaging Library)
img = image.load_img(img_path, target_size=(224, 224))

In [4]:
image_as_array = image.img_to_array(img)
#image_as_array = image_as_array.astype(np.int8)
image_as_array = image_as_array.reshape(((1, image_as_array.shape[0], image_as_array.shape[1], image_as_array.shape[2])))
test_image = tf.keras.applications.vgg19.preprocess_input(image_as_array)

In [5]:
print(type(image_as_array[0][0][0][0]))
print(test_image.shape)
image_as_array

<class 'numpy.float32'>
(1, 224, 224, 3)


array([[[[-74.68     , -22.779    ,  29.060997 ],
         [-74.68     , -19.779    ,  31.060997 ],
         [-72.68     , -20.779    ,  31.060997 ],
         ...,
         [-81.68     , -30.779    ,  21.060997 ],
         [-83.68     , -31.779    ,  20.060997 ],
         [-86.68     , -35.779    ,  14.060997 ]],

        [[-75.68     , -16.779    ,  36.060997 ],
         [-75.68     , -16.779    ,  36.060997 ],
         [-72.68     , -13.778999 ,  39.060997 ],
         ...,
         [-81.68     , -26.779    ,  26.060997 ],
         [-79.68     , -29.779    ,  25.060997 ],
         [-82.68     , -32.779    ,  22.060997 ]],

        [[-75.68     , -16.779    ,  36.060997 ],
         [-74.68     , -15.778999 ,  37.060997 ],
         [-73.68     , -13.778999 ,  41.060997 ],
         ...,
         [-80.68     , -26.779    ,  28.060997 ],
         [-79.68     , -29.779    ,  25.060997 ],
         [-82.68     , -32.779    ,  22.060997 ]],

        ...,

        [[-86.68     , -41.779    ,  -

In [None]:
pred = model.predict(test_image)

In [None]:

# convert the probabilities to class labels
label = decode_predictions(pred)
# retrieve the most likely result, e.g. highest probability
label = label[0][0]
# print the classification
print('%s (%.2f%%)' % (label[1], label[2]))

# Save Model

In [6]:
base_model_path = 'saved_model/base_model'

In [None]:
model.save(base_model_path)

# Convert model to TensorRT

In [None]:
test_image.shape

In [7]:

def optimize_model(precision_mode='FP32'):
    converter = trt_convert.TrtGraphConverterV2(input_saved_model_dir=base_model_path,
                                        conversion_params = tf.experimental.tensorrt.ConversionParams(
                                            precision_mode=precision_mode,
                                        ), max_workspace_size_bytes=1 << 28
                                       )


    def my_input_fn():
        # Input for a single inference call, for a network that has two input tensors:
        yield (np.asanyarray([test_image]),)


    converter.convert()
    converter.build(my_input_fn)
    model_path = './optimized' + '_' + precision_mode
    converter.save(model_path)

p = multiprocessing.Process(target=optimize_model)
p.start()
p.join()


#p = multiprocessing.Process(target=optimize_model('FP16'))
#p.start()
#p.join()
#optimize_model('FP16')

INFO:tensorflow:Linked TensorRT version: (8, 0, 3)
INFO:tensorflow:Loaded TensorRT version: (8, 0, 3)


2022-05-17 10:35:10.770147: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1050] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-17 10:35:10.814350: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1050] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-17 10:35:10.814774: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1050] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-17 10:35:10.815838: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1050] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-17 10:35:10.816287: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1050] successful NUMA node read f

# Evaluate model

In [None]:
model_path_32 = './optimized' + "_" + 'FP32'

In [None]:


def get_gpu_memory():
  _output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]

  ACCEPTABLE_AVAILABLE_MEMORY = 1024
  COMMAND = "nvidia-smi --query-gpu=memory.free --format=csv"
  memory_free_info = _output_to_list(sp.check_output(COMMAND.split()))[1:]
  memory_free_values = [int(x.split()[0]) for i, x in enumerate(memory_free_info)]
  return memory_free_values

def get_func_from_saved_model(saved_model_dir):
  saved_model_loaded = tf.saved_model.load(
      saved_model_dir, tags=[tag_constants.SERVING])
  graph_func = saved_model_loaded.signatures[
      signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
  graph_func = convert_to_constants.convert_variables_to_constants_v2(graph_func)
  return graph_func

def evaluate_model(saved_model_dir):
    print("Model: " + saved_model_dir)
    mem_before = get_gpu_memory()[0]
    print("Available GPU Memory before loading: ", mem_before)

    model_func = get_func_from_saved_model(saved_model_dir)
    mem_after = get_gpu_memory()[0]
    print("Available GPU Memory after loading: ", mem_after)

    print("GPU Memory Usage: " + str(mem_before - mem_after) + " MiB")
    model_func(test_image)
    print('\nTest accuracy:', float(success) / num_of_iteration)

p = multiprocessing.Process(target=evaluate_model(model_path_32))
p.start()
p.join()