In [1]:
# For running inference on the TF-Hub module.
import tensorflow as tf
import tensorflow_hub as hub

# For downloading the image.
import matplotlib.pyplot as plt

# For drawing onto the image.
import numpy as np
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageOps

# For measuring the inference time.
import time

import os
import warnings

# Print Tensorflow version
print(tf.__version__)

# Check available GPU devices.
print("The following GPU devices are available: %s" % tf.test.gpu_device_name())

2.6.0
The following GPU devices are available: /device:GPU:0


## Helper functions for visualization

In [2]:
def display_image(image):
    fig = plt.figure(figsize=(20, 15))
    plt.grid(False)
    plt.imshow(image)
    plt.show()
    print()

def draw_bounding_box_on_image(image,
                               ymin,
                               xmin,
                               ymax,
                               xmax,
                               color,
                               font,
                               thickness=4,
                               display_str_list=()):
    """Adds a bounding box to an image."""
    draw = ImageDraw.Draw(image)
    im_width, im_height = image.size
    (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                ymin * im_height, ymax * im_height)
    draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
             (left, top)],
            width=thickness,
            fill=color)

    # If the total height of the display strings added to the top of the bounding
    # box exceeds the top of the image, stack the strings below the bounding box
    # instead of above.
    display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
    # Each display_str has a top and bottom margin of 0.05x.
    total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)

    if top > total_display_str_height:
        text_bottom = top
    else:
        text_bottom = top + total_display_str_height
    # Reverse list and print from bottom to top.
    for display_str in display_str_list[::-1]:
        text_width, text_height = font.getsize(display_str)
        margin = np.ceil(0.05 * text_height)
        draw.rectangle([(left, text_bottom - text_height - 2 * margin),
                        (left + text_width, text_bottom)],
                       fill=color)
        draw.text((left + margin, text_bottom - text_height - margin),
                  display_str,
                  fill="black",
                  font=font)
        text_bottom -= text_height - 2 * margin


def draw_boxes(image, boxes, class_names, scores, max_boxes=10, min_score=0.1): # actual min_score was 0.1
    """Overlay labeled boxes on an image with formatted scores and label names."""
    colors = list(ImageColor.colormap.values())

    try:
        font = ImageFont.truetype(f"C:\\Users\\user\\fonts\\LiberationSansNarrow-Regular.ttf",
                              25)
    except IOError:
        print("Font not found, using default font.")
        font = ImageFont.load_default()

    for i in range(min(boxes.shape[0], max_boxes)):
        if scores[i] >= min_score:
            ymin, xmin, ymax, xmax = tuple(boxes[i])
            display_str = "{}: {}%".format(class_names[i].decode("ascii"),
                                         int(100 * scores[i]))
            color = colors[hash(class_names[i]) % len(colors)]
            image_pil = Image.fromarray(np.uint8(image)).convert("RGB")
            draw_bounding_box_on_image(
              image_pil,
              ymin,
              xmin,
              ymax,
              xmax,
              color,
              font,
              display_str_list=[display_str])
            np.copyto(image, np.array(image_pil))
    return image

## TF-Hub module

In [3]:
module_handle = "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1"

detector = hub.load(module_handle).signatures['default']

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


## Util functions

In [4]:
def selectCars(detection_class_entities):
    """
    detects which indexes have car
    
    detection_class_entities - bytes np.ndarray of class of objects
    return bool np.ndarray of indexes which have cars
    """
    cars = [b'Car',b'Bus',b'Van',b'Truck'] # any of these classes are considered as car
    
    boolean_arr = np.zeros(detection_class_entities.shape)
    
    for idx, obj in enumerate(detection_class_entities):
        if obj in cars:
            boolean_arr[idx] = 1
    
    return boolean_arr


def largestAreaIndex(detection_class_entities, detection_boxes, detection_scores, height, width):
    """
    detection_class_entities - bytes np.ndarray of class of objects
    detection_boxes - 2d float np.ndarray. Each array is of size 4 (upper, left, lower, right)
    height - height of actual image
    width - width of actual image
    returns the index of the biggest box which is a car
    """
    
    isCar = selectCars(detection_class_entities)
    
    maximumIndex = -1
    maximumArea = -1
    
    # iterate over the boolean array and calculate the area of cars
    for index, car in enumerate(isCar):
        if detection_scores[index] < 0.1: # if model confidence smaller than 0.1 ignore
            continue
        
        # if the object is a car, calculate the area of the bounding box
        if car:
            upper = height*detection_boxes[index][0]
            left = width*detection_boxes[index][1]
            lower = height*detection_boxes[index][2]
            right = width*detection_boxes[index][3]
            
            area = abs(upper-lower)*abs(left-right)
            
            # if area bigger than current max, replace
            if area > maximumArea:
                maximumArea = area
                maximumIndex = index
    
    return maximumIndex # index of car with the largest area


def image_shape(img_shape):
    """
    img_shape - tensor of image shape
    return: height and width as integers
    """
    height = int(img_shape[0])
    width = int(img_shape[1])

    return height, width

## Running detector functions

In [8]:
def load_img(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    return img

def run_detector(detector, path):
    print("image path:", path)
    img = load_img(path)
    
    # get height and width as integers
    height, width = image_shape(tf.shape(img))
    
    converted_img  = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
    start_time = time.time()
    result = detector(converted_img)
    end_time = time.time()

    result = {key:value.numpy() for key,value in result.items()}
    
    
    print("Inference time: ", end_time-start_time)
    
    # select index of car with largest area
    # this car should have detection_score >= 0.1
    index = largestAreaIndex(result["detection_class_entities"], 
                             result["detection_boxes"], 
                             result["detection_scores"], height, width)    
    
    if index == -1: # case where a car is not detected
        warnings.warn("Car was not found for this image. All returned values are -1")
        display_image(img)
        return -1, -1, -1, -1, -1
    
    # percentage is the detection_score of the car with the largest area
    percentage = round(np.array([result["detection_scores"][index]])[0], 2)
    print("percentage: ", str(percentage)+"%")
    if percentage < 0.9:
        warnings.warn("Percentage is "+ str(percentage) +" which is less than 0.9 - model is unsure of bounding box")
        
    # draw bounding box around the car at index    
    image_with_boxes = draw_boxes(
      img.numpy(), result["detection_boxes"][index].reshape(1,4),
      np.array([result["detection_class_entities"][index]]), np.array([result["detection_scores"][index]]) )
    
    display_image(image_with_boxes)
    
    print()
    return result["detection_boxes"], result["detection_class_entities"], result["detection_scores"], percentage, index

In [None]:
parent_dir = r"test_images/"
image_files = os.listdir(parent_dir)

# holds information for cropping
image_files_dict = {}

# running the detector for each image
for img in image_files:
    detection_boxes, detection_class_entities, detection_scores, percentage, index = run_detector(detector, 
                                                parent_dir+img)
    
    image_files_dict[img] = {
        "detection_boxes":detection_boxes, 
        "detection_class_entities":detection_class_entities, 
        "detection_scores":detection_scores, 
        "percentage":percentage, 
        "index":index
    }

## Cropping the car image

In [None]:
for img in image_files_dict.keys():
    detection_boxes = image_files_dict[img]["detection_boxes"]
    index = image_files_dict[img]["index"]
    
    with Image.open(parent_dir+img) as im:
        if index == -1:
            plt.figure(figsize=(20, 15))
            plt.imshow(im)
            plt.title("NO CAR DETECTED", color="red")
            plt.show()
            print()
            continue
        
        width, height = im.size

        upper = height*detection_boxes[index][0]
        left = width*detection_boxes[index][1]
        lower = height*detection_boxes[index][2]
        right = width*detection_boxes[index][3]

        w_ratio = (right-left)/width
        h_ratio = (lower-upper)/height

        im = im.crop((left, upper, right, lower))
        plt.figure(figsize=(20*w_ratio, 15*h_ratio))
        plt.imshow(im)
        plt.title(image_files_dict[img]["detection_class_entities"][index].decode("ascii"))
        plt.show()
        print()