In [None]:
import cv2
import numpy as np
import onnx
import onnxruntime as ort
import time
import matplotlib.pyplot as plt

* Markdown

In [None]:
# letterbox procedure
def letterbox(src, dest_shape):
    # get src dims
    src_width = src.shape[1]    # img.shape returns tuple (rows, cols, chan)
    src_height = src.shape[0]   # NOTE: rows => height; cols => width

    # cons dest array (filled with gray), get dest dims
    # NOTE: each 32-bit [R, G, B] pixel value is [128, 128, 128]
    dest = np.full(dest_shape, np.uint8(128))
    dest_width = dest.shape[1]
    dest_height = dest.shape[0]

    # calculate width and height ratios
    width_ratio = dest_width / src_width        # NOTE: ratios are float values
    height_ratio = dest_height / src_height

    # init resized image width and height with max values (dest dims)
    rsz_width = dest_width
    rsz_height = dest_height

    # smallest scale factor will scale other dimension as well
    if width_ratio < height_ratio:
        rsz_height = int(src_height * width_ratio)  # NOTE: integer truncation
    else:
        rsz_width = int(src_width * height_ratio)

    # resize the image data using bi-linear interpolation
    rsz_dims = (rsz_width, rsz_height)
    rsz = cv2.resize(src, rsz_dims, 0, 0, cv2.INTER_LINEAR)

    # embed rsz into the center of dest
    dx = int((dest_width - rsz_width) / 2)          # NOTE: integer truncation
    dy = int((dest_height - rsz_height) / 2)
    dest[dy:dy+rsz_height, dx:dx+rsz_width, :] = rsz

    # letterboxing complete, return dest
    return dest

# pack_buffer procedure, ONNX model expects normalized float32 NCHW tensor
def pack_buffer(src):
    dest = np.array(src, dtype='float32')       # cons dest array via copy
    dest /= 255.0                               # normalize vals
    dest = np.transpose(dest, [2, 0, 1])        # make channel first dim
    dest = np.expand_dims(dest, 0)              # ins batch dim before chan dim
    return dest

# proc_results procedure
def proc_results(res):
    [boxes, scores, indices] = res
    out_boxes, out_scores, out_classes = [], [], []
    for idx in indices[0]:
        out_classes.append(idx[1])
        out_scores.append(scores[tuple(idx)])
        idx1 = (idx[0], idx[2])
        out_boxes.append(boxes[idx1])
    return list(zip(out_boxes, out_scores, out_classes))

# draw_annos procedure
def draw_annos(src, annos, coco_names):
    dest = np.copy(src)
    #print(f'>>> annos\n{annos}')
    green = (0, 255, 0)
    black = (0, 0, 0)
    face = cv2.FONT_HERSHEY_TRIPLEX
    scale = 0.5
    thickness = 1
    for anno in annos:
        pt1 = (int(anno[0][0]), int(anno[0][1]))
        pt2 = (int(anno[0][2]), int(anno[0][3]))
        text = f'{coco_names[anno[2]]}: {anno[1]:6.4f}'
        (w, h), _ = cv2.getTextSize(text, face, scale, thickness)
        pt3 = (pt1[0], int(pt1[1] - h))
        pt4 = (int(pt1[0] + w), pt1[1])
        dest = cv2.rectangle(src, pt1, pt2, green)
        dest = cv2.rectangle(dest, pt3, pt4, green, cv2.FILLED)
        dest = cv2.putText(dest, text, pt1, face, scale, black, thickness)
    return dest


In [None]:
# Load and display test image

# open test image
arr1 = cv2.imread('../data/dog.jpg')  # default: bgr for display
plt.imshow(arr1)
plt.show()

In [None]:
# Use openCV to reverse channel order
plt.imshow(cv2.cvtColor(arr1, cv2.COLOR_BGR2RGB))
plt.show()

In [None]:
# Convert channels a different way

arr2 = arr1[..., ::-1]                  # bgr -> rgb for inference
plt.imshow(arr2)
plt.show()

In [None]:
# letterbox the image to resize for NN input (size: (height, width, chan))
letterbox_img = letterbox(arr2, (416, 416, 3))
plt.imshow(letterbox_img)
plt.show()

In [None]:
# Read the model classes
def read_model_classes(pathname = '../model/coco.names'):
    file = open(pathname, 'r')
    classes = []
    while True:
        class_name = file.readline().strip()
        if not class_name:
            break
        classes.append(class_name)
    file.close()
    return classes

def run_inference(model, image_array):
    # cons input for ONNX model inference (packed images and their orig dims)
    img = pack_buffer(image_array)
    #dim4 = np.array([image_array.shape[1], image_array.shape[0]], dtype=np.float32).reshape(1, 2)

    # run ONNX model inference on input buffer to get results
    return model.run(None, {'input_1': img}) #, 'image_shape': dim4})

classes = read_model_classes()
#print("Classes:",classes)

model   = ort.InferenceSession('../model/modified_yolov3-tiny.onnx')
results = run_inference(model, letterbox_img)

print(len(results))      # a list of 2 np.arrays
print(results[0].shape)  # 13x13
print(results[1].shape)  # 26x26

In [None]:
import math

def sigmoid(x):
    return 1.0/(1.0 + math.exp(-x))

def iou():
    return None

def process_results(res, pobj_threshold = 0.1, pcls_threshold = 0.5, iou_threshold = 0.5,
                    orig_img_size = 416,
                    anchors = [[[81,82], [135,169], [344,319]],     # mask 0,1,2
                               [[10,14], [ 23, 27], [ 37, 58]]]):   # mask 3,4,5
                    #anchors = [[[10,14], [ 23, 27], [ 37, 58]],     # mask 0,1,2
                    #           [[81,82], [135,169], [344,319]]]):   # mask 3,4,5
    detections = []

    for blk in range(len(results)):
        stride = orig_img_size/res[blk].shape[1]  # ASSUMES square image
        #print("YOLO block", blk, ", stride =", stride)
        det = np.reshape(res[blk], (res[blk].shape[1], res[blk].shape[2], 3, 85))
        #print("************************", res[blk].shape, "-->", det.shape ,"*************************")
        for hi in range(res[blk].shape[1]):
            for wi in range(res[blk].shape[2]):
                for anchor_idx in range(3):
                    detection = det[hi][wi][anchor_idx]
                    pobj = sigmoid(detection[4])
                    
                    if pobj > pobj_threshold:
                        #print(hi, wi, anchor_idx, "Anchor:", anchors[blk][anchor_idx], "p(obj):", pobj)
                        # Compute the bounding box
                        x = stride*(wi + sigmoid(detection[0]))
                        y = stride*(hi + sigmoid(detection[1]))
                        w = math.exp(detection[2])*anchors[blk][anchor_idx][0]
                        h = math.exp(detection[3])*anchors[blk][anchor_idx][1]
                        #print("   p(obj) =", pobj,
                        #      "| bbox(x, y, w, h) =", [x, y, w, h])
                        
                        # Find possible classes
                        for ci in range(80):
                            pclass = sigmoid(detection[5+ci])
                            if pclass > pcls_threshold:
                                #print("      p(class) =", pclass, "| class =", classes[ci])
                                detections.append((pobj, pclass, ci, x-w/2., y-h/2., x+w/2., y+h/2.))
    return detections

def print_detections(detections):
    for det in detections:
        print("p(obj)=%.4f, (x,y)_lo=(%3d, %3d), (x,y)_hi=(%3d x %3d), pclass=%.4f, %s" %
              (det[0], det[3], det[4], det[5], det[6], det[1], classes[det[2]]))

# detections : pobj, pclass, ci, xl, yl, xh, yh
detections = process_results(results, pobj_threshold=0.2)
detections.sort()
print_detections(detections)

annos = []
for det in detections:
    annos.append(([det[3], det[4],
                   det[5], det[6]],
                  det[0]*det[1],
                  det[2]))

tmp_img = np.copy(letterbox_img)
anno_img = draw_annos(tmp_img, annos, classes)
plt.imshow(anno_img)
plt.show()

In [None]:
def overlap(lo1, hi1, lo2, hi2):
    lo = max(lo1, lo2)
    hi = min(hi1, hi2)
    return hi - lo


def iou(bbox1, bbox2):  # bbox : [xl, yl, xh, yh]
    area1 = (bbox1[2] - bbox1[0])*(bbox1[3] - bbox1[1])
    area2 = (bbox2[2] - bbox2[0])*(bbox2[3] - bbox2[1])

    #intersection
    wo = overlap(bbox1[0], bbox1[2], bbox2[0], bbox2[2])
    ho = overlap(bbox1[1], bbox1[3], bbox2[1], bbox2[3])
    i_area = 0.
    if wo > 0. and ho > 0.:
        i_area = wo*ho

    #union
    u_area = area1 + area2 - i_area

    return i_area / u_area


# REMEMBER: detections : (pobj, pclass, ci, xl, yl, xh, yh)
def basic_nms(dets, iou_threshold = 0.5):
    dets.sort(reverse=True);
    print("Original detections (sorted hi->lo):")
    print_detections(dets)

    filtered_detections = []
    while len(dets) > 0:
        curr_det = dets[0]
        filtered_detections.append(curr_det)
        print("** Keeping and comparing to:")
        print_detections([curr_det])

        dets = [d for d in dets if not (curr_det[2] == d[2] and iou(curr_det, d) > iou_threshold)]
        print("remaining dets:")
        print_detections(dets)
        
    print("Filtered detections:")
    print_detections(filtered_detections)
    return filtered_detections

filtered_dets = basic_nms(detections)

filtered_annos = []
for det in filtered_dets:
    filtered_annos.append(([det[3], det[4],
                             det[5], det[6]],
                             det[0]*det[1],
                             det[2]))

tmp1_img = np.copy(letterbox_img)
anno_img = draw_annos(tmp1_img, filtered_annos, classes)
plt.imshow(anno_img)
plt.show()