# YOLO Inference on PYNQ with FINN

In [None]:
import glob
import time
import cv2
import numpy as np

from matplotlib import pyplot as plt

from driver import io_shape_dict
from driver_base import FINNExampleOverlay

In [None]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [None]:
def decode(pred, conf_thresh=0.3, prob_thresh=0.01):
    # https://github.com/motokimura/yolo_v1_pytorch
    S, B, C = 7, 2, 20
    boxes, labels, confidences, class_scores = [], [], [], []

    cell_size = 1.0 / float(S)
    conf = pred[:, :, 4] # [S, S, 1]
    conf = np.expand_dims(conf,2)

    for b in range(1, B):
        conf = np.concatenate((conf, np.expand_dims(pred[:, :, 5*b + 4],2)), 2)
    conf_mask = conf > conf_thresh # [S, S, B]

    # TBM, further optimization may be possible by replacing the following for-loops with tensor operations.
    for i in range(S): # for x-dimension.
        for j in range(S): # for y-dimension.
            class_label = np.argmax(pred[j, i, 5*B:], 0)
            class_score = pred[j, i, 5*B:][class_label]
            for b in range(B):
                conf = pred[j, i, 5*b + 4]
                prob = conf * class_score
                if float(prob) < prob_thresh:
                    continue

                # Compute box corner (x1, y1, x2, y2) from tensor.
                box = pred[j, i, 5*b : 5*b + 4]
                x0y0_normalized = np.array([i, j]) * cell_size # cell left-top corner. Normalized from 0.0 to 1.0 w.r.t. image width/height.
                xy_normalized = box[:2] * cell_size + x0y0_normalized   # box center. Normalized from 0.0 to 1.0 w.r.t. image width/height.
                wh_normalized = box[2:] # Box width and height. Normalized from 0.0 to 1.0 w.r.t. image width/height.
                box_xyxy = np.zeros(4) # [4,]
                box_xyxy[:2] = xy_normalized - 0.5 * wh_normalized # left-top corner (x1, y1).
                box_xyxy[2:] = xy_normalized + 0.5 * wh_normalized # right-bottom corner (x2, y2).

                # Append result to the lists.
                boxes.append(box_xyxy)
                labels.append(class_label)
                confidences.append(conf)
                class_scores.append(class_score)

    if len(boxes) > 0:
        boxes = np.stack(boxes, 0) # [n_boxes, 4]
        labels = np.stack(labels, 0)             # [n_boxes, ]
        confidences = np.stack(confidences, 0)   # [n_boxes, ]
        class_scores = np.stack(class_scores, 0) # [n_boxes, ]
    else:
        # If no box found, return empty tensors.
        boxes = np.zeros(0, 4)
        labels = np.zeros(0)
        confidences = np.zeros(0)
        class_scores = np.zeros(0)

    return boxes, labels, confidences, class_scores

In [None]:
def nms(boxes, scores, nms_thresh=0.5):
    # https://github.com/motokimura/yolo_v1_pytorch
    threshold = nms_thresh

    x1 = boxes[:, 0] # [n,]
    y1 = boxes[:, 1] # [n,]
    x2 = boxes[:, 2] # [n,]
    y2 = boxes[:, 3] # [n,]
    areas = (x2 - x1) * (y2 - y1) # [n,]

    ids_sorted = scores.argsort(0) # [n,]
    ids_sorted = ids_sorted[::-1] 
    ids = []
    while ids_sorted.size > 0:
        # Assume `ids_sorted` size is [m,] in the beginning of this iter.

        i = ids_sorted.item() if (ids_sorted.size == 1) else ids_sorted[0]
        ids.append(i)

        if ids_sorted.size == 1:
            break # If only one box is left (i.e., no box to supress), break.

        inter_x1 = x1[ids_sorted[1:]].clip(min=x1[i]) # [m-1, ]
        inter_y1 = y1[ids_sorted[1:]].clip(min=y1[i]) # [m-1, ]
        inter_x2 = x2[ids_sorted[1:]].clip(max=x2[i]) # [m-1, ]
        inter_y2 = y2[ids_sorted[1:]].clip(max=y2[i]) # [m-1, ]
        inter_w = (inter_x2 - inter_x1).clip(min=0) # [m-1, ]
        inter_h = (inter_y2 - inter_y1).clip(min=0) # [m-1, ]

        inters = inter_w * inter_h # intersections b/w/ box `i` and other boxes, sized [m-1, ].
        unions = areas[i] + areas[ids_sorted[1:]] - inters # unions b/w/ box `i` and other boxes, sized [m-1, ].
        ious = inters / unions # [m-1, ]

        # Remove boxes whose IoU is higher than the threshold.
        ids_keep = (ious <= threshold).nonzero()[0] # [m-1, ]. Because `nonzero()` adds extra dimension, squeeze it.
        if ids_keep.size == 0:
            break # If no box left, break.
        ids_sorted = ids_sorted[ids_keep+1] # `+1` is needed because `ids_sorted[0] = i`.

    return np.array(ids)

In [None]:
def visualize_boxes(image_bgr, boxes, class_names, probs, name_bgr_dict=None, line_thickness=2):
    # https://github.com/motokimura/yolo_v1_pytorch
    if name_bgr_dict is None:
        name_bgr_dict = VOC_CLASS_BGR

    image_boxes = image_bgr.copy()
    for box, class_name, prob in zip(boxes, class_names, probs):
        # Draw box on the image.
        left_top, right_bottom = box
        left, top = int(left_top[0]), int(left_top[1])
        right, bottom = int(right_bottom[0]), int(right_bottom[1])
        bgr = name_bgr_dict[class_name]
        cv2.rectangle(image_boxes, (left, top), (right, bottom), bgr, thickness=line_thickness)

        # Draw text on the image.
        text = '%s %.2f' % (class_name, prob)
        size, baseline = cv2.getTextSize(text,  cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, thickness=2)
        text_w, text_h = size

        x, y = left, top
        x1y1 = (x, y)
        x2y2 = (x + text_w + line_thickness, y + text_h + line_thickness + baseline)
        cv2.rectangle(image_boxes, x1y1, x2y2, bgr, -1)
        cv2.putText(image_boxes, text, (x + line_thickness, y + 2*baseline + line_thickness),
            cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.4, color=(255, 255, 255), thickness=1, lineType=8)

    return image_boxes

In [None]:
mean = np.array([122.67891434,116.66876762,104.00698793],dtype=np.float32)

# VOC class names and BGR color.
VOC_CLASS_BGR = {
    'aeroplane': (128, 0, 0),
    'bicycle': (0, 128, 0),
    'bird': (128, 128, 0),
    'boat': (0, 0, 128),
    'bottle': (128, 0, 128),
    'bus': (0, 128, 128),
    'car': (128, 128, 128),
    'cat': (64, 0, 0),
    'chair': (192, 0, 0),
    'cow': (64, 128, 0),
    'diningtable': (192, 128, 0),
    'dog': (64, 0, 128),
    'horse': (192, 0, 128),
    'motorbike': (64, 128, 128),
    'person': (192, 128, 128),
    'pottedplant': (0, 64, 0),
    'sheep': (128, 64, 0),
    'sofa': (0, 192, 0),
    'train': (128, 192, 0),
    'tvmonitor': (0, 64, 128)
}

class_name_list = list(VOC_CLASS_BGR.keys())

In [None]:
driver = FINNExampleOverlay(
    bitfile_name="../bitfile/finn-accel.bit",
    platform="zynq-iodma",
    io_shape_dict=io_shape_dict,
    batch_size=1,
    runtime_weight_dir="runtime_weights/",
)

In [None]:
output_folder = "../../test_outputs/"
test_img_folder = "../../test_samples/"
test_img_paths = glob.glob(test_img_folder + "*.jpg")

In [None]:
for number, test_img_path in enumerate(test_img_paths):
    org_img = cv2.imread(test_img_path)
    img = org_img.copy()
    
    h, w, _ = img.shape
    img = cv2.resize(org_img,(448,448), interpolation=cv2.INTER_LINEAR)
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    
    #plt.imshow(img)
    #plt.show()
    
    img = (img-mean)/255.0
    img = img.astype(np.uint8)
    driver_in = img.reshape(driver.ishape_normal)
    print("Input buffer shape is %s and datatype is %s" % (str(driver_in.shape), str(driver_in.dtype)))
    
    t1 = time.time()
    output = driver.execute(driver_in)
    t2 = time.time()
    print(f"Time passed for driver execution: {t2-t1} sec")
    print(f"Output buffer shape is {output.shape} and datatype is {output.dtype}")
    #print(f"Output: {output}")

    output = sigmoid(output)[0]
    #print(f"Output: {output}")
    
    # Get detected boxes_detected, labels, confidences, class-scores.
    boxes_normalized_all, class_labels_all, confidences_all, class_scores_all = decode(output)
    
    # Apply non maximum supression for boxes of each class.
    boxes_normalized, class_labels, probs = [], [], []

    for class_label in range(len(class_name_list)):
        mask = (class_labels_all == class_label)
        if np.sum(mask) == 0:
            continue # if no box found, skip that class.

        boxes_normalized_masked = boxes_normalized_all[mask]
        class_labels_maked = class_labels_all[mask]
        confidences_masked = confidences_all[mask]
        class_scores_masked = class_scores_all[mask]

        ids = nms(boxes_normalized_masked, confidences_masked)

        boxes_normalized.append(boxes_normalized_masked[ids])
        class_labels.append(class_labels_maked[ids])
        probs.append(confidences_masked[ids] * class_scores_masked[ids])

    boxes_normalized = np.concatenate(boxes_normalized, 0)
    class_labels = np.concatenate(class_labels, 0)
    probs = np.concatenate(probs, 0)
    
    # Postprocess for box, labels, probs.
    boxes_detected, class_names_detected, probs_detected = [], [], []
    for b in range(boxes_normalized.shape[0]):
        box_normalized = boxes_normalized[b]
        class_label = class_labels[b]
        prob = probs[b]

        x1, x2 = w * box_normalized[0], w * box_normalized[2] # unnormalize x with image width.
        y1, y2 = h * box_normalized[1], h * box_normalized[3] # unnormalize y with image height.
        boxes_detected.append(((x1, y1), (x2, y2)))

        class_label = int(class_label) 
        class_name = class_name_list[class_label]
        class_names_detected.append(class_name)

        prob = float(prob)
        probs_detected.append(prob)
    
    # Visualize.
    image_boxes = visualize_boxes(org_img, boxes_detected, class_names_detected, probs_detected)
    cv2.imwrite(output_folder+f"{number}.jpg", image_boxes)
    
    plt.imshow(cv2.cvtColor(image_boxes, cv2.COLOR_BGR2RGB))
    plt.show()
    
print(driver.throughput_test())