In [1]:
import numpy as np
import sys
import math

In [2]:
import cv2
import io
import matplotlib.pyplot as plt

In [3]:
# Read model weights and config 
yolo = cv2.dnn.readNet("model/trainedYolo.weights", "model/trainedYolo.cfg")
# Read class names file
f = open("model/cocoClasses.names" , "r")
objectClasses = [line.strip() for line in f.readlines()]

In [4]:
# Dictionary that designates the bounding box location to any of the 9 regions
mapping = {  
    0: "Top left",
    1: "Center Left",
    2: "Bottom Left",
    3: "Center Top",
    4: "Center",
    5: "Center Bottom",
    6: "Top Right",
    7: "Center Right",
    8: "Bottom Right"
}

# To compute the window block location of objects detected 
def disect(h, w, x1, y1, h1, w1):
    
    # disection of image in vertical direction(3 regions, 1, 3, 5)
    div_ver = [x*h//6 for x in range(1, 6, 2)]
    
    # disection of image in horizontal direction(3 regions 1, 3, 5)
    div_hor = [x*w//6 for x in range(1, 6, 2)]
    
    count = 0
    mn = sys.maxsize
    index = 0  # mapping index
    
    for x in div_hor:
        for y in div_ver:
            if math.sqrt((x-x1-w1/2)**2 + (y-y1-h1/2)**2) < mn:
                mn = math.sqrt((x-x1-w1/2)**2 + (y-y1-h1/2)**2)
                index = count
            count += 1

    return mapping[index]

In [5]:
def predict(img):

    layers = yolo.getLayerNames()
    outputLayerIndices = yolo.getUnconnectedOutLayers()
    outputLayers = [layers[i[0] - 1] for i in outputLayerIndices]

    # Reshaping image
    img = cv2.resize(img, None, fx=0.9, fy=0.9)
    height, width, channels = img.shape
    blobs = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    
    # Forward pass the image
    yolo.setInput(blobs)
    outputs = yolo.forward(outputLayers)
    ot = np.array(outputs)
    
    objCounts = [1]*100
    object_ids = []
    possibilities = []
    boxes = []

    for output in outputs:
        for detection in output:
            
            scores = detection[5:]
            # print('SCORE SHAPE = ',scores.shape)
            object_id = np.argmax(scores)
            # print('ObjectID = ', object_id)
            possibility = scores[object_id]

            # Keeping those BB with condidence scores more than 50%
            if(possibility > 0.5):
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)

                w = int(detection[2] * width)
                h = int(detection[3] * height)

                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                object_ids.append(object_id)
                possibilities.append(float(possibility))
                boxes.append([x, y, w, h])

    # Discarding multiple overlapping BB and considering the best fit ones 
    uniqueIndices = cv2.dnn.NMSBoxes(boxes, possibilities, 0.4, 0.6)
    font = cv2.FONT_HERSHEY_SIMPLEX
    object_list = []
    h, w = img.shape[0:2]
    output = []
    counter = 1

    # Processing bounding boxes of detected objects and generating verbose output from image
    for i in range(len(boxes)):
        if i in uniqueIndices:
            x, y, w1, h1 = boxes[i]
            label = str(objectClasses[object_ids[i]])
            cv2.rectangle(img, (x , y), (x + w1, y + h1), (0, 255, 255), 2)
            cv2.putText(img, label , (x , y - 10), font , 1 , (255, 255, 255), 2)
            object_list.append([x, y, w1, h1, label])
            
            # Finding the block location in which the current object belongs
            loc = disect(h, w, x, y, h1, w1)
            # Verbose info of the detected object
            output.append(f'{counter}: {label}{objCounts[object_ids[i]]} is in {loc}, ')
            objCounts[object_ids[i]] += 1
            counter += 1

    if not object_list:
        return ["No objects found",None]
    else:
        return [output,img]

In [None]:
# Sample image from which objects are to be detected 
img = cv2.imread("images/sample2.jpg");

# res = [detected objects as list, processed image]
res = predict(img)

# Detected objects as verbose output 
print('Objects detected:')
for obj in res[0]:
    print(obj)

# Processed image after applying bounding boxes
cv2.imshow("image", res[1])
cv2.waitKey(0)
cv2.destroyAllWindows()

Objects detected:
1: car1 is in Center Right, 
2: traffic light1 is in Top left, 
3: car2 is in Center Left, 
4: car3 is in Center, 
5: car4 is in Center Right, 
6: car5 is in Center Left, 
7: person1 is in Center, 
8: traffic light2 is in Top Right, 
9: traffic light3 is in Top Right, 
10: traffic light4 is in Top left, 
11: traffic light5 is in Center Left, 
12: car6 is in Center Left, 
