# Object Detection Performance Check

# Imports

In [None]:
from __future__ import print_function

# from matplotlib import pyplot as plt
# from matplotlib.pyplot import imshow
import numpy as np
import os
import time
import tensorflow as tf
import xml.etree.ElementTree as ET
from PIL import Image
from PIL import ImageDraw
from PIL import ImageColor
from PIL import ImageFont
from scipy.stats import norm
from IPython.display import display

print("TensorFlow version:", tf.VERSION)

# Model preparation

Export a trained model. Set class info. Set path to test images. 

In [None]:
# What model to load
# MODEL_NAME = 'training/model1/trained_model'
MODEL_NAME = 'training/model3/trained_model'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data/', 'label_map_sdc.pbtxt')

In [None]:
# shoud match with the order in label_map_sdc.pbtxt
CLASSNAME_LIST = ['Green', 'Red', 'Yellow'] # list of class name 
COLOR_LIST = ['lawngreen', 'red', 'yellow'] # list of color to be used for visual purpose below 

## Load Frozen Tensorflow Model into Memory

In [None]:
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

## Helper Code

In [None]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

def filter_boxes(min_score, boxes, scores, classes):
    """Return boxes with a confidence >= `min_score`"""
    n = len(classes)
    idxs = []
    for i in range(n):
        if scores[i] >= min_score:
            idxs.append(i)
    
    filtered_boxes = boxes[idxs, ...]
    filtered_scores = scores[idxs, ...]
    filtered_classes = classes[idxs, ...]
    return filtered_boxes, filtered_scores, filtered_classes

def to_image_coords(boxes, height, width):
    """
    The original box coordinate output is normalized, i.e [0, 1].
    
    This converts it back to the original coordinate based on the image
    size.
    """
    box_coords = np.zeros_like(boxes)
    box_coords[:, 0] = boxes[:, 0] * height
    box_coords[:, 1] = boxes[:, 1] * width
    box_coords[:, 2] = boxes[:, 2] * height
    box_coords[:, 3] = boxes[:, 3] * width
    
    return box_coords

def draw_boxes(image, boxes, classes, scores, thickness=4):
    """Draw bounding boxes on the image"""
    image_draw = image.copy()
    draw = ImageDraw.Draw(image_draw)
    for i in range(len(boxes)):
        bot, left, top, right = boxes[i, ...]
        class_id = int(classes[i])
        color = COLOR_LIST[class_id-1]
        cls_name = CLASSNAME_LIST[class_id-1]
        percent = str(round(scores[i] * 100, 1))
        txt_display = cls_name + ": " + percent + "%"
        # print(class_id, cls_name, color, txt_display)
        # draw.rectangle([(left, top-15), (left+80, top-thickness)], fill= color)
        draw.rectangle([(left-2, bot-15), (left+80, bot)], fill= color)
        draw.line([(left, top), (left, bot), (right, bot), (right, top), (left, top)], width=thickness, fill=color)
        draw.text((left, bot-15), txt_display, fill="black")
    return image_draw

def load_graph(graph_file):
    """Loads a frozen inference graph"""
    graph = tf.Graph()
    with graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(graph_file, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')
    return graph

# Detection

In [None]:
detection_graph = load_graph(PATH_TO_FROZEN_GRAPH)

# The input placeholder for the image.
# `get_tensor_by_name` returns the Tensor with the associated name in the Graph.
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

# Each box represents a part of the image where a particular object was detected.
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')

# The classification of the object (integer id).
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')

In [None]:
# path to test image directory 
TEST_IMAGES_DIR = 'data/v2_sim_data_val'

# path to detected images
DETECTED_IMAGES_DIR = TEST_IMAGES_DIR + '/detected_images'
if not os.path.exists(DETECTED_IMAGES_DIR):
        os.makedirs(DETECTED_IMAGES_DIR)

# Read source filename_list.txt
test_files = open(TEST_IMAGES_DIR + '/filename_list.txt', 'r').read().split('\n')
if '' in test_files:
    test_files.remove('')

print("Using {} test images found in {}/images".format(len(test_files), TEST_IMAGES_DIR))

In [None]:
minor_errors = 0
major_errors = 0
total_time = 0.0
first_time = 0.0
DISPLAYING_IMAGES_ENABLED = False
SAVE_DETECTED_IMAGES = False
VERBOSE = False

sess = tf.Session(graph=detection_graph)

for i, filename in enumerate(test_files):

    image_path = TEST_IMAGES_DIR + '/images/' + filename + '.jpg'
    annotation_path = TEST_IMAGES_DIR + '/annotations/' + filename + '.xml'

    # Load the sample image.
    image = Image.open(image_path)
    image_np = np.expand_dims(np.asarray(image, dtype=np.uint8), 0)

    if VERBOSE:
        print("[{}] Processing {}".format(i+1, image_path))

    else:
        if i > 0 and (i+1) % 100 == 0:
            print("#  {}".format(i+1))

        elif i > 0 and (i+1) % 10 == 0:
            print("#", end='')

        else:
            print(".", end='')

    ground_truth = {
        'Red': 0,
        'Green': 0,
        'Yellow': 0
    }

    detection = {
        'Red': 0,
        'Green': 0,
        'Yellow': 0
    }

    # reading the annotation file
    annotation = ET.parse(annotation_path).getroot()
    for tl_object in annotation.findall('object'):
        tl_name = tl_object.find('name')
        ground_truth[tl_name.text] += 1
            
    # Actual detection.
    start_time = time.time()
    (boxes, scores, classes) = sess.run([detection_boxes, detection_scores, detection_classes], 
                                        feed_dict={image_tensor: image_np})
    time_elapsed = time.time() - start_time
    if i == 0:
        first_time = time_elapsed
    else:
        total_time += time_elapsed

    # Remove unnecessary dimensions
    boxes = np.squeeze(boxes)
    scores = np.squeeze(scores)
    classes = np.squeeze(classes)

    confidence_cutoff = 0.8
    # Filter boxes with a confidence score less than `confidence_cutoff`
    boxes, scores, classes = filter_boxes(confidence_cutoff, boxes, scores, classes)

    # collecting detection data
    for class_id in classes:
        detected_class = CLASSNAME_LIST[int(class_id) - 1]
        detection[detected_class] += 1

    # checking against the ground truth
    display_image = False
    suffix = None
    for light in ground_truth:

        if ground_truth[light] != detection[light]:

            display_image = True
            if ground_truth[light] == 0 or detection[light] == 0:
                major_errors += 1
                suffix = "major"
                if VERBOSE:
                    print("  *** Error: there's a significant difference for {} ".format(light) +
                          "(ground truth: {}, detected: {})".format(ground_truth[light], detection[light]))

            else:
                minor_errors += 1
                if suffix != "major":
                    suffix = "minor"

                if VERBOSE:
                    print("  * Warning: there's a difference for {} but it's still OK ".format(light) +
                          "(ground truth: {}, detected: {})".format(ground_truth[light], detection[light]))

    if display_image and DISPLAYING_IMAGES_ENABLED:
        # The current box coordinates are normalized to a range between 0 and 1.
        # This converts the coordinates actual location on the image.
        width, height = image.size
        box_coords = to_image_coords(boxes, height, width)

        # Each class with be represented by a differently colored box
        image_draw = draw_boxes(image, box_coords, classes, scores)

        display(image_draw)

        if SAVE_DETECTED_IMAGES:
            save_image_path = DETECTED_IMAGES_DIR + '/' + filename + "_detected_" + suffix + ".jpg"
            image_draw.save(save_image_path)


In [None]:
# Displaying some statistics
all_count = len(test_files)
passed = all_count - major_errors
print("Total image count:", all_count)
print("Minor errors: {} ({:.2f}%)".format(minor_errors, (100.0 * minor_errors / all_count)))
print("Major errors: {} ({:.2f}%)".format(major_errors, (100.0 * major_errors / all_count)))
print("Passed: {} ({:.2f}%))".format(passed, (100.0 * passed / all_count)))
print("Total time spent on inferencing: {:.3f} seconds".format(total_time))
print("Inference time for the first image: {:.3f} seconds".format(first_time))
print("Average inference time for all the other images: {:.3f} seconds".format(total_time / (all_count-1)))