# OpenVINO model setup

## Imports

In [12]:
from __future__ import print_function
import sys
import os
from argparse import ArgumentParser
import cv2
import time
import logging as log
from openvino.inference_engine import IENetwork, IEPlugin
import numpy as np
from matplotlib import pyplot as plt

## Class setup

In [13]:
class OpenVinoObjectDetectionModel(object):
    
    def __init__(self, **kwargs):
        """
        Builds an OpenVINO model.

        Keyword arguments (in order):
        model_path: Path to an .xml file with a trained model.
        cpu_extension: MKLDNN (CPU)-targeted custom layers. Absolute path to a shared library with the kernels impl.
        plugin_dir: Path to a plugin folder
        device: Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. CPU by default.
        labels_path: Labels mapping file (format .labels)
        prob_threshold: Probability threshold for detections filtering. Float between 0.0 and 1.0.
        """
        
        self.__dict__.update(kwargs)
        log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
        self.generate(**kwargs)
        log.info("Model initialized and loaded.")

    def generate(self, model_path, cpu_extension = None, plugin_dir = None, device = "CPU",
                labels_path = None, prob_threshold = 0.5):

        self.model_xml = model_path
        self.model_bin = os.path.splitext(self.model_xml)[0] + ".bin"

        # Plugin initialization for specified device and load extensions library if specified
        log.info("Initializing plugin for {} device...".format(device))
        self.plugin = IEPlugin(device=device, plugin_dirs=plugin_dir)
        if cpu_extension and 'CPU' in device:
            self.plugin.add_cpu_extension(cpu_extension)

        # Read IR
        log.info("Reading IR...")
        self.net = IENetwork.from_ir(model=self.model_xml, weights=self.model_bin)

        if "CPU" in self.plugin.device:
            supported_layers = self.plugin.get_supported_layers(self.net)
            not_supported_layers = [l for l in self.net.layers.keys() if l not in supported_layers]
            if len(not_supported_layers) != 0:
                log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
                          format(self.plugin.device, ', '.join(not_supported_layers)))
                log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l "
                          "or --cpu_extension command line argument")
                raise ValueError("Some layers are not supported by the plugin for the specified device {}".format(device))

        assert len(self.net.inputs.keys()) == 1, "Sample supports only single input topologies"
        assert len(self.net.outputs) == 1, "Sample supports only single output topologies"
        self.input_blob = next(iter(self.net.inputs))
        self.out_blob = next(iter(self.net.outputs))
        log.info("Loading IR to the plugin...")
        self.exec_net = self.plugin.load(network=self.net, num_requests=2)
        self.cur_request_id = 0
        self.next_request_id = 1
        
        self.n, self.c, self.h, self.w = self.net.inputs[self.input_blob].shape
        del self.net

    def detect_objects(self, image, resolution):
        """
        Runs inference on the supplied image.

        Keyword arguments:
        image: Image to be inferenced on
        resolution: Tuple of (width, height) of the image
        """
        is_async_mode = False
        
        image = cv2.resize(image, (self.w, self.h))
        image = image.transpose((2, 0, 1))  # Change data layout from HWC to CHW
        image = image.reshape((self.n, self.c, self.h, self.w))

        # Main sync point:
        # in the truly Async mode we start the NEXT infer request, while waiting for the CURRENT to complete
        # in the regular mode we start the CURRENT request and immediately wait for it's completion

        if is_async_mode:
            self.exec_net.start_async(request_id=self.next_request_id, inputs={self.input_blob: image})
        else:
            self.exec_net.start_async(request_id=self.cur_request_id, inputs={self.input_blob: image})
            
        if self.exec_net.requests[self.cur_request_id].wait(-1) == 0:

            # Parse detection results of the current request
            res = self.exec_net.requests[self.cur_request_id].outputs[self.out_blob]
            bboxes = []
            
            for obj in res[0][0]:
                # Draw only objects when probability more than specified threshold
                confidence = obj[2]
                if confidence > self.prob_threshold:
                    xmin = int(obj[3] * resolution[0])
                    ymin = int(obj[4] * resolution[1])
                    xmax = int(obj[5] * resolution[0])
                    ymax = int(obj[6] * resolution[1])
                    class_id = int(obj[1])
                    bboxes.append((class_id, confidence, xmin, ymin, xmax, ymax))
        
        self.next_request_id, self.cur_request_id = self.cur_request_id, self.next_request_id
        return image, bboxes
    
    def detect_objects_partition(self, subimages, partition):
        """
        Runs inference on the supplied subimages. Returns bboxes with original image coordinates

        Keyword arguments:
        subimages: List of images to be inferenced on
        partition: Dict with keys ("xmins", "xmaxs", "ymins", "ymaxs"), each of which contains 
                   a list of coordinates corresponding to the subimages        
        """
        bboxes = []
        
        for i, image in enumerate(subimages):
            image = cv2.resize(image, (self.w, self.h))
            image = image.transpose((2, 0, 1))  # Change data layout from HWC to CHW
            image = image.reshape((self.n, self.c, self.h, self.w))

            self.exec_net.start_async(request_id = 0, inputs = {self.input_blob: image})
                
            if self.exec_net.requests[0].wait(-1) == 0:
                
                part_height = partition["ymaxs"][i] - partition["ymins"][i]
                part_width = partition["xmaxs"][i] - partition["xmins"][i]
                
                # Parse detection results of the current request
                res = self.exec_net.requests[self.cur_request_id].outputs[self.out_blob]
                    
                for obj in res[0][0]:
                    # Draw only objects when probability more than specified threshold
                    confidence = obj[2]
                    if confidence > self.prob_threshold:
                        xmin = int(obj[3] * part_width) + partition["xmins"][i]
                        ymin = int(obj[4] * part_height) + partition["ymins"][i]
                        xmax = int(obj[5] * part_width) + partition["xmins"][i]
                        ymax = int(obj[6] * part_height) + partition["ymins"][i]
                        class_id = int(obj[1])
                        bboxes.append((class_id, confidence, xmin, ymin, xmax, ymax))
        
        return bboxes


# Detection and tracking model setup

## Class setup

In [14]:
class SafeHomeDetector(object):
    
    def __init__(self, model, classes, drawer, resize_resolution = None):
        
        self.timer = Timer()
        self.timer.start_task("Setup")
        
        self.model = model
        self.classes = classes
        self.drawer = drawer
        
        if resize_resolution is not None:
            self.resolution = resize_resolution
            self.resize_resolution = resize_resolution
        else:
            self.resolution = None
        
        self.model_name = "OpenVINO Object Detection"
        
    def execute(self, vid_source, output_path = None):
        """
        Starts a session of detection and tracking on the provided video using provided data.
        
        Keyword arguments:
        vid_source:         String. Full path to video or video stream
        output_path:        String. Full path to file where the output will be saved. 
                            If not provided, no video will be saved.
        """
        
        self._initialize_run(vid_source, output_path)
        
        # Initialize time variables to measure FPS
        time_start = time.time()
        time_this = time.time()
        time_prev = time.time()

        # ret will become false when all frames in the input video has been read
        ret = True

        while (ret):

            self.timer.start_task("Video/image processing")

            # Get next frame from video
            ret, self.current_image = self.cap.read()

            # If there are no more frames, exit the loop 
            if ret == False:
                break

            # Resize if needed
            if self.resize_resolution is not None:
                self.current_image = cv2.resize(self.current_image, self.resolution)

            # self.current_image_tracking = cv2.resize(self.current_image, self.tracking_resolution)

            self.counters["frames"] += 1

            # self.timer.start_task("Managing trackers")

            # Update trackers and boxes
            # self._update_trackers()

            # Do detection
            self.timer.start_task("Detection")
            _, bboxes_detected = self.model.detect_objects(self.current_image, self.resolution)

            # self.timer.start_task("Processing detection")
            # self._process_detections(bboxes_detected)

            self.timer.start_task("Drawing on image")
            
            # Calculate FPS 
            time_this = time.time()
            fps = 1 / (time_this - time_prev + 0.00001) 
            time_prev = time.time()
            
            # Draw overlay
            self.current_image = self.drawer.draw_overlay(self.current_image, fps)

            # Draw bboxes on the image
            self.current_image = self.drawer.draw_bboxes_on_image(self.current_image, 
                                                                  bboxes_detected, self.classes)

            # Clean up finished vehicles
            # self.trackers = [tracker for n, tracker in enumerate(self.trackers) if tracker["finished"] == False]

            self.timer.start_task("Video display")
            
            # Write frame to output if applicable
            if self.output_path is not None:
                self.video_output.write(self.current_image)

            # Show output in window
            cv2.imshow(self.model_name, self.current_image)
            key = cv2.waitKey(1) & 0xFF
            if key == ord("q"):
                break
                
        self._finalize_run()

    def _initialize_run(self, vid_source, output_path):
        self.output_path = output_path
        
        self.counters = {
            "n_vehicles": 0,
            "lost_trackers": 0,
            "frames": 0,
        }
            
        self.cap = cv2.VideoCapture(vid_source)
        
        # If an output path is specified, create a video output with the same attributes
        # as the input video
        framerate = self.cap.get(cv2.CAP_PROP_FPS)
        
        if self.resolution is None:
            self.resolution = (int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))

        if self.output_path is not None:
            codec = cv2.VideoWriter_fourcc('m','p','4','v') # fourcc stands for four character code
            self.video_output = cv2.VideoWriter(self.output_path, codec, framerate, self.resolution)
        self.timer.stop()
     
    def _finalize_run(self):
        cv2.destroyAllWindows()
        self.cap.release()
        if self.output_path is not None:
            self.video_output.release()

        self.timer.stop()
        timers = self.timer.get_timers()

        print("\n======== Timer summary ========")
        for key in timers:
            print("{}:{} {:0.4f} seconds \t({:0.4f} s per frame)".format(key, " " * (25-len(key)), timers[key], timers[key] / self.counters["frames"]))
    
    def _get_iou(self, box1, box2):
        """Returns the intersection over union (IoU) between box1 and box2
    
        Arguments:
        box1 -- first box, list object with coordinates (x1, y1, x2, y2)
        box2 -- second box, list object with coordinates (x1, y1, x2, y2)
        """

        # Calculate the (y1, x1, y2, x2) coordinates of the intersection of box1 and box2. Calculate its Area.
        xi1 = max(box1[0], box2[0])
        yi1 = max(box1[1], box2[1])
        xi2 = min(box1[2], box2[2])
        yi2 = min(box1[3], box2[3])
        inter_area = (xi2 - xi1) * (yi2 - yi1)

        # Calculate the Union area by using Formula: Union(A,B) = A + B - Inter(A,B)
        box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
        box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
        union_area = box1_area + box2_area - inter_area

        # compute the IoU
        iou = inter_area / union_area

        return iou

# Helper functions and classes

## Drawer object

In [15]:
class OverlayDrawer(object):
    
    def __init__(self, num_classes, colors = None):
        self.font = cv2.FONT_HERSHEY_SIMPLEX
        self.font_scale_small = 0.5
        self.font_scale_large = 1.0
        self.thickness_small = 1
        self.thickness_medium = 2
        self.thickness_large = 4
        self.padding = 4
        if colors == None:
            self.colors = plt.cm.hsv(np.linspace(0, 1, num_classes)).tolist()
            self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
        elif len(colors) != num_classes:
            print("Number of colors is not equal to number of classes.")
            self.colors = plt.cm.hsv(np.linspace(0, 1, num_classes)).tolist()
            self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
        else:
            self.colors = colors
            
        # Some standard colors to use
        self.c_fps = (30, 255, 20) # Green
        self.c_white = (255, 255, 255) # White
        
    def draw_bboxes_on_image(self, image, bboxes, classes):
        for n, bbox in enumerate(bboxes):
            classification = int(bbox[0])
            confidence = bbox[1]
            left, top, right, bottom = bbox[2], bbox[3], bbox[4], bbox[5]
            
            color = self.colors[classification]
            label = "{} {:.2f}".format(classes[classification], confidence)

            # Draw main box
            image = cv2.rectangle(image, (left, top), (right, bottom), color, 2)

            # Draw label box above the top left corner of the main box
            label_size = cv2.getTextSize(label, self.font, self.font_scale_small, self.thickness_small)
            label_width = int(label_size[0][0])
            label_height = int(label_size[0][1])

            # If there is space above the box, draw the label there; else draw it below
            if top - label_height > 0:
                label_top = top - label_height - self.padding
            else:
                label_top = bottom

            label_bottom = label_top + label_height + self.padding
            label_left = left
            label_right = left + label_width + self.padding
            image = cv2.rectangle(image, (label_left, label_top), (label_right, label_bottom), color, -1)
            image = cv2.putText(image, label, 
                                (label_left + int(self.padding * 0.5), label_bottom - int(self.padding * 0.5)),
                                self.font, self.font_scale_small, self.c_white, self.thickness_small)

        return image
    
    def draw_overlay(self, image, fps):
        resolution = (image.shape[1], image.shape[0])
                
        # Add FPS
        cv2.putText(image, "FPS: {:.2f}".format(fps), (3, 25), self.font, self.font_scale_large, 
                    self.c_fps, self.thickness_medium)

        return image

## Timer object

In [16]:
import time
class Timer(object):
    # Helper class to time task. Every tile start(task) is called, the previous task is stopped and recorded
    
    def __init__(self):
        self._timers = {}
        self._time_now = time.time()
        self._time_prev = time.time()
        self._curr_task = None
        
    def start_task(self, task):
        self._time_now = time.time()
        time_passed = self._time_now - self._time_prev
        if self._curr_task is not None:
            self._timers[self._curr_task] = self._timers.get(self._curr_task, 0.0) + time_passed
        self._curr_task = task
        self._time_prev = time.time()
        
    def stop(self):
        self._time_now = time.time()
        time_passed = self._time_now - self._time_prev
        if self._curr_task is not None:
            self._timers[self._curr_task] = self._timers.get(self._curr_task, 0.0) + time_passed
        self._curr_task = None
            
    def get_timers(self):
        return self._timers

# Execution

## Fixed parameters

In [28]:
### Fixed parameters. Normally do not need to be changed unless adding new models ###

# Should be validated when running on a new installation of OpenVINO
openvino_dir = "C:/Intel/computer_vision_sdk_2018.4.420"

# Class names
classes_mscoco = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
           'bus', 'train', 'truck', 'boat', 'traffic light',
           'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
           'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
           'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
           'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
           'kite', 'baseball bat', 'baseball glove', 'skateboard',
           'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
           'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
           'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
           'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
           'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
           'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
           'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
           'teddy bear', 'hair drier', 'toothbrush']
classes_intel = ["bg", "face", "unknown"]

# Device management
data_type = {"CPU": "FP32", "GPU": "FP16", "MYRIAD": "FP16"}

models = ["Intel face detection",
          "TF SSD MobileNet v2", 
          "TF SSD MobileNet FPN",
          "TF SSDlite MobileNet v2"]
model_paths = {"Intel face detection": ("deployment_tools/intel_models/face-detection-adas-0001","face-detection-adas-0001.xml"),
          "TF SSD MobileNet v2": ("deployment_tools/custom_models/tensorflow_ssd_mobilenet_v2_coco","ssd_mobilenet_v2_coco.xml"),
          "TF SSD MobileNet FPN": ("deployment_tools/custom_models/ssd_mobilenet_v1_fpn_640x640","ssd_mobilenet_v1_fpn_640x640.xml"),
          "TF SSDlite MobileNet v2": ("deployment_tools/custom_models/ssdlite_mobilenet_v2", "ssdlite_mobilenet_v2.xml")    
           }
classes = {"Intel face detection": classes_intel,
          "TF SSD MobileNet v2": classes_mscoco,
          "TF SSD MobileNet FPN": classes_mscoco,
          "TF SSDlite MobileNet v2": classes_mscoco    
           }

## Free parameters

In [29]:
model_choice = models[0]
cpu_extension = "C:/Intel/computer_vision_sdk_2018.3.343/deployment_tools/inference_engine/bin/intel64/Release/cpu_extension.dll"
plugin_dir = None
device = "CPU" # Options are CPU, GPU (may require updated GPU drivers) and MYRIAD (requires Movidius NCS)
labels_path = None 
prob_threshold = 0.5 # Confidence threshold for detections - all detection with lower confidence will be discarded

video_resolution = (1280, 720) # Video will be resized to this size. Set to None for no resizing
vid_source = "input/burglary.mp4" # Path to source video
output_path = None # If a path is provided (including file name .avi), it will write the output as a video file in the given path

## Run program

In [30]:
# Initialize model
model_path = "{}/{}/{}/{}".format(openvino_dir, model_paths[model_choice][0], data_type[device], model_paths[model_choice][1])
model = OpenVinoObjectDetectionModel(model_path = model_path, 
                                     cpu_extension = cpu_extension, 
                                     plugin_dir = plugin_dir, 
                                     device = device,
                                     labels_path = labels_path, 
                                     prob_threshold = prob_threshold)

[ INFO ] Initializing plugin for CPU device...
[ INFO ] Reading IR...
[ INFO ] Loading IR to the plugin...
[ INFO ] Model initialized and loaded.


In [31]:
# Initialize drawer object
drawer = OverlayDrawer(len(classes[model_choice]))

In [32]:
# Initialize controller class
program = SafeHomeDetector(model = model,
                           classes = classes[model_choice], 
                           drawer = drawer,
                           resize_resolution = video_resolution)

In [33]:
# Execute program
program.execute(vid_source = vid_source, output_path = output_path)


Setup:                     0.5392 seconds 	(0.0009 s per frame)
Video/image processing:    5.1763 seconds 	(0.0087 s per frame)
Detection:                 21.1673 seconds 	(0.0354 s per frame)
Drawing on image:          0.1209 seconds 	(0.0002 s per frame)
Video display:             2.7487 seconds 	(0.0046 s per frame)
