# Converting and Compiling the Model

In [4]:
import tensorflow as tf
from tensorflow.python.compiler.tensorrt import trt_convert as trt
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
output_saved_model_dir="ssd_mobilenet_v2_trt_optimized"

In [2]:
# Enable GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            print("Enabling Memory Growth on " + str(gpu))
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

Enabling Memory Growth on PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
1 Physical GPUs, 1 Logical GPUs


In [13]:
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS
conversion_params = conversion_params._replace(max_workspace_size_bytes=(1<<32))
conversion_params = conversion_params._replace(precision_mode="FP16")
conversion_params = conversion_params._replace(maximum_cached_engines=100)

# Optimize with TRT
converter = trt.TrtGraphConverterV2(
    input_saved_model_dir="ssd_mobilenet_v2",
    conversion_params=conversion_params)

INFO:tensorflow:Linked TensorRT version: (8, 0, 1)
INFO:tensorflow:Loaded TensorRT version: (8, 0, 1)


In [14]:
# Convert the model into TensorRT. This is gonna take a while...
converter.convert()

<ConcreteFunction pruned(input_tensor) at 0x7E45F1CE48>

In [18]:
import numpy as np
def my_input_fn():
    # Input for a single inference call, for a network that has one input tensor:
    inp1 = np.random.normal(size=(1, 1, 320, 320, 3)).astype(np.uint8)
    yield (inp1)

# Do some kind of "pre-optimization" on the model. Assume a single (1, 320, 320, 3) input tensor
converter.build(input_fn=my_input_fn)

output_saved_model_dir="ssd_mobilenet_v2_trt_optimized"

# Write initial TRT-optimized model to disk
converter.save(output_saved_model_dir)



INFO:tensorflow:Assets written to: ssd_mobilenet_v2_trt_optimized/assets


INFO:tensorflow:Assets written to: ssd_mobilenet_v2_trt_optimized/assets


In [5]:
print("Loading model...")
# Load model into tensorflow
saved_model_loaded = tf.saved_model.load(output_saved_model_dir, tags=[tf.saved_model.SERVING])
print("Model loaded.")

# Load Deep Neural Network
print("Extracting DNN...")
graph_func = saved_model_loaded.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]

# Optimize model by freezing in constants (model can no longer be trained)
print("Freezing DNN...")
frozen_func = convert_variables_to_constants_v2(graph_func)

In [6]:
# Print interesting info
layers = [op.name for op in frozen_func.graph.get_operations()]
print("-" * 50)
print("Frozen model layers: ")
for layer in layers:
    print(layer)
print("-" * 50)
print("Frozen model inputs: ")
print(frozen_func.inputs)
print("Frozen model outputs: ")
print(frozen_func.outputs)

--------------------------------------------------
Frozen model layers: 
input_tensor
StatefulPartitionedCall
Identity
Identity_1
Identity_2
Identity_3
Identity_4
Identity_5
Identity_6
Identity_7
--------------------------------------------------
Frozen model inputs: 
[<tf.Tensor 'input_tensor:0' shape=(1, None, None, 3) dtype=uint8>]
Frozen model outputs: 
[<tf.Tensor 'Identity:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_1:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_2:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_3:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_4:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_5:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_6:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_7:0' shape=<unknown> dtype=float32>]


In [7]:
# Save frozen graph from frozen ConcreteFunction to hard drive
tf.io.write_graph(graph_or_graph_def=frozen_func.graph,
                  logdir="./frozen_trt_optimized_models",
                  name="frozen_ssd_mobilenet_v2_attempt2.pb",
                  as_text=False)

'./frozen_trt_optimized_models/frozen_ssd_mobilenet_v2_attempt2.pb'

# Running the Model

First, import what we need from the tensorflow libraries

In [1]:
import tensorflow as tf
import os

In [2]:
# Enable GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            print("Enabling Memory Growth on " + str(gpu))
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

Enabling Memory Growth on PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
1 Physical GPUs, 1 Logical GPUs


In [3]:
### USAGE ##
def wrap_frozen_graph(graph_def, inputs, outputs, print_graph=False):
    def _imports_graph_def():
        tf.compat.v1.import_graph_def(graph_def, name="")

    wrapped_import = tf.compat.v1.wrap_function(_imports_graph_def, [])
    import_graph = wrapped_import.graph

    print("-" * 50)
    print("Frozen model layers: ")
    layers = [op.name for op in import_graph.get_operations()]
    if print_graph == True:
        for layer in layers:
            print(layer)
    print("-" * 50)
    
    import_graph.inputs
    import_graph.outputs

    return wrapped_import.prune(
        tf.nest.map_structure(import_graph.as_graph_element, inputs),
        tf.nest.map_structure(import_graph.as_graph_element, outputs))

In [4]:
## Example Usage ###
# Load frozen graph using TensorFlow 1.x functions
with tf.io.gfile.GFile("./frozen_trt_optimized_models/frozen_ssd_mobilenet_v2_attempt2.pb", "rb") as f:
    graph_def = tf.compat.v1.GraphDef()
    print("Parsing model...")
    loaded = graph_def.ParseFromString(f.read())
    print("Model parsed")

Parsing model...
Model parsed


In [5]:
# Wrap frozen graph to ConcreteFunctions
print("getting input function")
frozen_func = wrap_frozen_graph(graph_def=graph_def,
                                inputs=["input_tensor:0"],
                                outputs=["Identity:0", "Identity_1:0", "Identity_2:0", "Identity_3:0", "Identity_4:0", "Identity_5:0", "Identity_6:0", "Identity_7:0"],
                                print_graph=True)
print("Got frozen function")

getting input function
--------------------------------------------------
Frozen model layers: 
input_tensor
StatefulPartitionedCall
Identity
Identity_1
Identity_2
Identity_3
Identity_4
Identity_5
Identity_6
Identity_7
--------------------------------------------------
Got frozen function


In [6]:
print("Frozen model inputs: ")
print(frozen_func.inputs)

print("Frozen model outputs: ")
print(frozen_func.outputs)

# Load some test data
test_image = tf.io.read_file("test.jpg")

# Turn test data into tensor
test_image_tensor = tf.image.decode_jpeg(test_image, ratio=2)

print("Original Image Shape: ")
print(test_image_tensor.shape)

#test_image_tensor = tf.image.resize_with_pad(test_image_tensor, 320, 320)

# Indicate batch-size == 1 by adding a dimension 1 into the first posision of the tensor's shape
test_image_tensor_batch = tf.expand_dims(input=test_image_tensor, axis=0)

print("Model Input Shape: ")
print(test_image_tensor_batch.shape)

Frozen model inputs: 
[<tf.Tensor 'input_tensor:0' shape=(1, None, None, 3) dtype=uint8>]
Frozen model outputs: 
[<tf.Tensor 'Identity:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_1:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_2:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_3:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_4:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_5:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_6:0' shape=<unknown> dtype=float32>, <tf.Tensor 'Identity_7:0' shape=<unknown> dtype=float32>]
Original Image Shape: 
(320, 240, 3)
Model Input Shape: 
(1, 320, 240, 3)


In [7]:
# Get predictions for test images
predictions = frozen_func(test_image_tensor_batch)
# Print the prediction for the first image
print("-" * 50)
#print("Example prediction reference:")
#print(predictions)

--------------------------------------------------


In [22]:
print("Output Tensors: {}".format(len(predictions)))
print("-" * 50)
for i in range(len(predictions)):
    print("Tensor {} Shape: {}".format(i, predictions[i].shape))
    print("Tensor {} Produced By: {}".format(i, predictions[i].device))
    print("-" * 50)
    
#print(predictions[0].shape) # (1, 100) ?? detection_scores ?? detection_anchor_indices ??
#print(predictions[1].shape) # (1, 100, 4) detection_boxes
#print(predictions[2].shape) # (1, 100) detection_classes
#print(predictions[3].shape) # (1, 100, 91) ?? detection_multiclass_scores ??
#print(predictions[4].shape) # (1, 100) ?? detection_scores ?? detection_anchor_indices ??
#print(predictions[5].shape) # (1) num_detections == n == 100
#print(predictions[6].shape) # (1, 1917, 4) -> raw_detection_boxes, m == 1971? 
#print(predictions[7].shape) # (1, 1917, 91) -> raw_detection_multiclass_scores, n == 1917

Output Tensors: 8
--------------------------------------------------
Tensor 0 Shape: (1, 100)
Tensor 0 Produced By: /job:localhost/replica:0/task:0/device:GPU:0
--------------------------------------------------
Tensor 1 Shape: (1, 100, 4)
Tensor 1 Produced By: /job:localhost/replica:0/task:0/device:GPU:0
--------------------------------------------------
Tensor 2 Shape: (1, 100)
Tensor 2 Produced By: /job:localhost/replica:0/task:0/device:GPU:0
--------------------------------------------------
Tensor 3 Shape: (1, 100, 91)
Tensor 3 Produced By: /job:localhost/replica:0/task:0/device:GPU:0
--------------------------------------------------
Tensor 4 Shape: (1, 100)
Tensor 4 Produced By: /job:localhost/replica:0/task:0/device:GPU:0
--------------------------------------------------
Tensor 5 Shape: (1,)
Tensor 5 Produced By: /job:localhost/replica:0/task:0/device:GPU:0
--------------------------------------------------
Tensor 6 Shape: (1, 1917, 4)
Tensor 6 Produced By: /job:localhost/repl

In [9]:
import json

## Returns a Dict of all object categories and their string representation, key'd by ID
def load_coco_object_categories():
    categories = {}
    with open('instances_val2017.json', 'r') as f:
        js = json.loads(f.read())
        
        print("-" * 50)
        print("All Object Categories:")
        for supercategory in js['categories']:
            print("{} => {}".format(supercategory['id'], supercategory['name']))
            categories[supercategory['id']] = supercategory['name']
        print("-" * 50)
    
    return categories
    
object_categories = load_coco_object_categories()

--------------------------------------------------
All Object Categories:
1 => person
2 => bicycle
3 => car
4 => motorcycle
5 => airplane
6 => bus
7 => train
8 => truck
9 => boat
10 => traffic light
11 => fire hydrant
13 => stop sign
14 => parking meter
15 => bench
16 => bird
17 => cat
18 => dog
19 => horse
20 => sheep
21 => cow
22 => elephant
23 => bear
24 => zebra
25 => giraffe
27 => backpack
28 => umbrella
31 => handbag
32 => tie
33 => suitcase
34 => frisbee
35 => skis
36 => snowboard
37 => sports ball
38 => kite
39 => baseball bat
40 => baseball glove
41 => skateboard
42 => surfboard
43 => tennis racket
44 => bottle
46 => wine glass
47 => cup
48 => fork
49 => knife
50 => spoon
51 => bowl
52 => banana
53 => apple
54 => sandwich
55 => orange
56 => broccoli
57 => carrot
58 => hot dog
59 => pizza
60 => donut
61 => cake
62 => chair
63 => couch
64 => potted plant
65 => bed
67 => dining table
70 => toilet
72 => tv
73 => laptop
74 => mouse
75 => remote
76 => keyboard
77 => cell phone
78 =>

In [10]:
# Make sure we did this correctly....
print(object_categories)

{1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus', 7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant', 13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird', 17: 'cat', 18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear', 24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag', 32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard', 37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove', 41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle', 46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon', 51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange', 56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut', 61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant', 65: 'bed', 67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse', 75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'micro

In [71]:
NUM_DETECTIONS_IDX = 5
BBOXES_IDX = 1
CLASS_IDX = 2
CONFIDENCE_IDX = 4

# These ID's are only relevant as they related to DNN output
X0_IDX = 1
Y0_IDX = 0
X1_IDX = 3
Y1_IDX = 2

CONFIDENCE_THRESH = 0.5

def parse_boxes(outputs):
    num_detections = outputs[NUM_DETECTIONS_IDX]
    bboxes = outputs[BBOXES_IDX]
    classes = outputs[CLASS_IDX]
    scores = outputs[CONFIDENCE_IDX]
        
    print("Total Detections: {}".format(int(num_detections[0])))
    print("-" * 50)
    
    detections = []
    # iterate through each bounding box
    for i in range(bboxes.shape[1]):

        bbox = bboxes[0][i]
        class_raw = int(classes[0][i])
        score = scores[0][i]
        
        if score > CONFIDENCE_THRESH:
            print("Detection {}:".format(i))
            print("Confidence: {}".format(score))
            print("Bounding Box: {}".format(bbox))
            print("Object Class (raw): {}".format(class_raw))
            print("Object Class: {}".format(object_categories[class_raw]))
            print("-" * 50)
        
            detections.append(dict(
                class_raw=class_raw,
                class_name=object_categories[class_raw],
                confidence=float(score),
                bbox=[
                    float(bbox[X0_IDX]),
                    float(bbox[Y0_IDX]),
                    float(bbox[X1_IDX]),
                    float(bbox[Y1_IDX])
                ]
            ))

    print("Total Detections with Confidence >= {}: {}".format(CONFIDENCE_THRESH, len(detections)))
            
    return detections

detections = parse_boxes(predictions)

Total Detections: 100
--------------------------------------------------
Detection 0:
Confidence: 0.8661324977874756
Bounding Box: [0.29043815 0.4980969  0.5980177  0.92759645]
Object Class (raw): 13
Object Class: stop sign
--------------------------------------------------
Detection 1:
Confidence: 0.7836574912071228
Bounding Box: [0.20658568 0.2026435  0.9878688  0.49672437]
Object Class (raw): 1
Object Class: person
--------------------------------------------------
Total Detections with Confidence >= 0.5: 2


In [74]:
# Lets visualize the output
import ipywidgets.widgets as widgets
import cv2

height = test_image_tensor.shape[0]
width = test_image_tensor.shape[1]

image_widget = widgets.Image(format='jpeg', width=test_image_tensor.shape[0], height=test_image_tensor.shape[1])

cv2_test_image = cv2.imread("test.jpg")
height = len(cv2_test_image)
width = len(cv2_test_image[0])

for d in detections:
    cv2_test_image = cv2.rectangle(cv2_test_image, (int(width * d['bbox'][0]), int(height * d['bbox'][1])), (int(width * d['bbox'][2]), int(height * d['bbox'][3])), (255, 0, 0), 2)
    pass

result, img_buff = cv2.imencode('.jpg', cv2_test_image)
# print(img_buff)

image_widget.value = bytes(img_buff)

display(image_widget)

print("Image Resolution: {}x{}".format(width, height))

Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x02\x01\x0…

Image Resolution: 480x640
