### ISS VSE CA2 Part 1

#### By: Kenneth Goh (A0198544N), Raymond Ng (A0198543R), Tan Heng Han (A0198502B)

### Imports

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import cv2
from time import time
from math import exp as exp

import numpy as np
import sys
sys.path.append('C:\Program Files (x86)\IntelSWTools\openvino_2019.2.275\python\python3.6')

try:
    from openvino import inference_engine as ie
    from openvino.inference_engine import IENetwork, IECore
except Exception as e:
    exception_type = type(e).__name__
    print("The following error happened while importing Python API module:\n[ {} ] {}".format(exception_type, e))

In [2]:
# Uncomment to change device type here
# deviceType = 'CPU' # This is using CPU
deviceType = 'GPU' # This is using GPU
# deviceType = 'MYRIAD' # This is using NCS for inferencing

### Support Methods for YOLOv3

In [3]:
labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \
              "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \
              "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \
              "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \
              "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \
              "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \
              "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \
              "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \
              "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \
              "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

In [4]:
class YoloV3Params:
    def __init__(self, param, side):
        self.num = 3 if 'num' not in param else int(param['num'])
        self.coords = 4 if 'coords' not in param else int(param['coords'])
        self.classes = 80 if 'classes' not in param else int(param['classes'])
        self.anchors = [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0,
                        198.0,
                        373.0, 326.0] if 'anchors' not in param else [float(a) for a in param['anchors'].split(',')]

        if 'mask' in param:
            mask = [int(idx) for idx in param['mask'].split(',')]
            self.num = len(mask)

            maskedAnchors = []
            for idx in mask:
                maskedAnchors += [self.anchors[idx * 2], self.anchors[idx * 2 + 1]]
            self.anchors = maskedAnchors

        self.side = side

In [5]:
def entry_index(side, coord, classes, location, entry):
    side_power_2 = side ** 2
    n = location // side_power_2
    loc = location % side_power_2
    return int(side_power_2 * (n * (coord + classes + 1) + entry) + loc)

In [6]:
def scale_bbox(x, y, h, w, class_id, confidence, h_scale, w_scale):
    xmin = int((x - w / 2) * w_scale)
    ymin = int((y - h / 2) * h_scale)
    xmax = int(xmin + w * w_scale)
    ymax = int(ymin + h * h_scale)
    return dict(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, class_id=class_id, confidence=confidence)

In [7]:
def intersection_over_union(box_1, box_2):
    width_of_overlap_area = min(box_1['xmax'], box_2['xmax']) - max(box_1['xmin'], box_2['xmin'])
    height_of_overlap_area = min(box_1['ymax'], box_2['ymax']) - max(box_1['ymin'], box_2['ymin'])
    if width_of_overlap_area < 0 or height_of_overlap_area < 0:
        area_of_overlap = 0
    else:
        area_of_overlap = width_of_overlap_area * height_of_overlap_area
    box_1_area = (box_1['ymax'] - box_1['ymin']) * (box_1['xmax'] - box_1['xmin'])
    box_2_area = (box_2['ymax'] - box_2['ymin']) * (box_2['xmax'] - box_2['xmin'])
    area_of_union = box_1_area + box_2_area - area_of_overlap
    if area_of_union == 0:
        return 0
    return area_of_overlap / area_of_union

In [8]:
def parse_yolo_region(blob, resized_image_shape, original_im_shape, params, threshold):
    _, _, out_blob_h, out_blob_w = blob.shape
    assert out_blob_w == out_blob_h, "Invalid size of output blob. It sould be in NCHW layout and height should " \
                                     "be equal to width. Current height = {}, current width = {}" \
                                     "".format(out_blob_h, out_blob_w)

    orig_im_h, orig_im_w = original_im_shape
    resized_image_h, resized_image_w = resized_image_shape
    objects = list()
    predictions = blob.flatten()
    side_square = params.side * params.side

    for i in range(side_square):
        row = i // params.side
        col = i % params.side
        for n in range(params.num):
            obj_index = entry_index(params.side, params.coords, params.classes, n * side_square + i, params.coords)
            scale = predictions[obj_index]
            if scale < threshold:
                continue
            box_index = entry_index(params.side, params.coords, params.classes, n * side_square + i, 0)
            x = (col + predictions[box_index + 0 * side_square]) / params.side * resized_image_w
            y = (row + predictions[box_index + 1 * side_square]) / params.side * resized_image_h
            # Value for exp is very big number in some cases so following construction is using here
            try:
                w_exp = exp(predictions[box_index + 2 * side_square])
                h_exp = exp(predictions[box_index + 3 * side_square])
            except OverflowError:
                continue
            w = w_exp * params.anchors[2 * n]
            h = h_exp * params.anchors[2 * n + 1]
            for j in range(params.classes):
                class_index = entry_index(params.side, params.coords, params.classes, n * side_square + i,
                                          params.coords + 1 + j)
                confidence = scale * predictions[class_index]
                if confidence < threshold:
                    continue
                objects.append(scale_bbox(x=x, y=y, h=h, w=w, class_id=j, confidence=confidence,
                                          h_scale=orig_im_h / resized_image_h, w_scale=orig_im_w / resized_image_w))
    return objects

### Convert Yolov3 Tensorflow Model

In [9]:
pb_file = './model/Full_yolov3_model.pb'
json_file = './model/yolo_v3.json'
output_dir = './model'
cpu_ext = 'C:\\Users\\raymo\\Documents\\Intel\\OpenVINO\\inference_engine_samples_build\\intel64\\Release\\cpu_extension.dll'

In [10]:
mo_tf_path = '"C:\Program Files (x86)\IntelSWTools\openvino\deployment_tools\model_optimizer\mo_tf.py"'

In [11]:
# Change data_type tuo FP32 if deviceType is CPU, else change data_type to FP16 if deviceType is GPU or MYRIAD (NCS)
if deviceType == 'CPU':
    %run -i {mo_tf_path} --input_model {pb_file} --output_dir {output_dir} --tensorflow_use_custom_operations_config {json_file} --batch 1 --data_type FP32
else:
    %run -i {mo_tf_path} --input_model {pb_file} --output_dir {output_dir} --tensorflow_use_custom_operations_config {json_file} --batch 1 --data_type FP16

Model Optimizer arguments:
Common parameters:
	- Path to the Input Model: 	D:\Workspace\GitHub\ISS-VSE-2019-09-23-IS1FT-CA2-Part1\./model/Full_yolov3_model.pb
	- Path for generated IR: 	D:\Workspace\GitHub\ISS-VSE-2019-09-23-IS1FT-CA2-Part1\./model
	- IR output name: 	Full_yolov3_model
	- Log level: 	ERROR
	- Batch: 	1
	- Input layers: 	Not specified, inherited from the model
	- Output layers: 	Not specified, inherited from the model
	- Input shapes: 	Not specified, inherited from the model
	- Mean values: 	Not specified
	- Scale values: 	Not specified
	- Scale factor: 	Not specified
	- Precision of IR: 	FP16
	- Enable fusing: 	True
	- Enable grouped convolutions fusing: 	True
	- Move mean values to preprocess section: 	False
	- Reverse input channels: 	False
TensorFlow specific parameters:
	- Input model in text protobuf format: 	False
	- Path to model dump for TensorBoard: 	None
	- List of shared libraries with TensorFlow custom layers implementation: 	None
	- Update the configuratio

















[ SUCCESS ] Generated IR model.
[ SUCCESS ] XML file: D:\Workspace\GitHub\ISS-VSE-2019-09-23-IS1FT-CA2-Part1\./model\Full_yolov3_model.xml
[ SUCCESS ] BIN file: D:\Workspace\GitHub\ISS-VSE-2019-09-23-IS1FT-CA2-Part1\./model\Full_yolov3_model.bin
[ SUCCESS ] Total execution time: 24.87 seconds. 


### Create OpenVino network

In [12]:
model_xml = './model/Full_yolov3_model.xml'
model_bin = './model/Full_yolov3_model.bin'

In [13]:
ie = IECore()
if deviceType == 'CPU':
    ie.add_extension(cpu_ext, "CPU")
    
network = IENetwork(
    model=model_xml,
    weights=model_bin)

network.batch_size = 1

iBlob = next(iter(network.inputs))
n, c, h, w = network.inputs[iBlob].shape

exec_net = ie.load_network(
    network=network,
    num_requests=2,
    device_name=deviceType) # Change device type at top of notebook

In [14]:
probability_threshold = 0.5
iou_threshold = 0.4
labels_map = None
render_time = 0

### Start Video Inferencing

In [15]:
vid = cv2.VideoCapture(0)
# Start live video and inferencing
while vid.isOpened():
    ret, frame = vid.read()
    if not ret:
        break
    
    in_frame = cv2.resize(frame, (w, h))
    in_frame = in_frame.transpose((2, 0 ,1))
    in_frame = in_frame.reshape((n, c, h, w))
    
    # This code is to start inferencing
    infer_start = time()
    exec_net.start_async(
        request_id=0, #ASync mode
        inputs={iBlob: in_frame})
    
    objects = []
    if exec_net.requests[0].wait(-1) == 0:
        output = exec_net.requests[0].outputs
    
        for layer_name, oBlob in output.items():
            layers_params = YoloV3Params(
                network.layers[layer_name].params,
                oBlob.shape[2])
            objects += parse_yolo_region(
                oBlob,
                in_frame.shape[2:],
                frame.shape[:-1],
                layers_params,
                probability_threshold)
        det_time = time() - infer_start
        
    objects = sorted(objects, key=lambda obj: obj['confidence'], reverse=True)
    for i in range(len(objects)):
        if objects[i]['confidence'] == 0:
            continue
        for j in range(i + 1, len(objects)):
            if intersection_over_union(objects[i], objects[j]) > iou_threshold:
                objects[j]['confidence'] = 0
    
    objects = [obj for obj in objects if obj['confidence'] >= probability_threshold]
    
    org_img_size = frame.shape[:-1]
    
    for obj in objects:
        if obj['xmax'] > org_img_size[1] or obj['ymax'] > org_img_size[0] or obj['xmin'] < 0 or obj['ymin'] < 0:
            continue
        color = (int(min(obj['class_id'] * 10, 255)),
                min(obj['class_id'] * 5, 255),
                min(obj['class_id'] * 3, 255))
        det_label = obj['class_id']
        
        cv2.rectangle(frame, (obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']), color, 2)
        cv2.putText(frame, str(labels[int(det_label)]), (obj['xmin'], obj['ymin'] - 7), cv2.FONT_HERSHEY_COMPLEX, 1, (212,175,55), 1)
    
    det_msg = f"{deviceType} Detection Speed: {(render_time * 1000) :.3f}ms"
    cv2.putText(frame, det_msg, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255,223,1), 1)
    
    cv2.imshow("Results", frame)
    render_time = time() - infer_start
    
    key = cv2.waitKey(1)
    if key == 27:
        break

cv2.destroyAllWindows()
vid.release()