## OpenCV-YOLO workflow
**image/images -> cv2.dnn.blobFromImage [pre-processing images] -> blob -> yolo -> output predictions**

In [1]:
## importing important packages

import cv2         # computer vision
import numpy as np # for array structure computing
import matplotlib.pyplot as plt

In [2]:
## showing an image

img = cv2.imread('image3.jpg')
cv2.imshow('IMAGE', img) # showing image on 'IMAGE' window
cv2.waitKey(0)      # waits to press any key 
cv2.destroyAllWindows() # closing all open windows 
#plt.figure(figsize=(15, 15))
#plt.imshow(img)
#plt.title('IMAGE')
#plt.show()

In [7]:
img.shape

(800, 1200, 3)

In [3]:
height, width, _ = img.shape

In [4]:
## Reading yolov3 pre-trained weights

net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg') 

# getting all 80 class names from coco.names file
classes = []
with open('coco.names', 'r') as f:
      classes = f.read().splitlines()
print(len(classes))   
classes

80


['person',
 'bicycle',
 'car',
 'motorbike',
 'aeroplane',
 'bus',
 'train',
 'truck',
 'boat',
 'traffic light',
 'fire hydrant',
 'stop sign',
 'parking meter',
 'bench',
 'bird',
 'cat',
 'dog',
 'horse',
 'sheep',
 'cow',
 'elephant',
 'bear',
 'zebra',
 'giraffe',
 'backpack',
 'umbrella',
 'handbag',
 'tie',
 'suitcase',
 'frisbee',
 'skis',
 'snowboard',
 'sports ball',
 'kite',
 'baseball bat',
 'baseball glove',
 'skateboard',
 'surfboard',
 'tennis racket',
 'bottle',
 'wine glass',
 'cup',
 'fork',
 'knife',
 'spoon',
 'bowl',
 'banana',
 'apple',
 'sandwich',
 'orange',
 'broccoli',
 'carrot',
 'hot dog',
 'pizza',
 'donut',
 'cake',
 'chair',
 'sofa',
 'pottedplant',
 'bed',
 'diningtable',
 'toilet',
 'tvmonitor',
 'laptop',
 'mouse',
 'remote',
 'keyboard',
 'cell phone',
 'microwave',
 'oven',
 'toaster',
 'sink',
 'refrigerator',
 'book',
 'clock',
 'vase',
 'scissors',
 'teddy bear',
 'hair drier',
 'toothbrush']

In [6]:
## getting blob image of 'img'; cv2.dnn.blobFromImage function returns a blob which is our input image after 
## mean subtraction, normalizing, and channel swapping

blob = cv2.dnn.blobFromImage(img, 1/225, (416, 416), (0, 0, 0), swapRB=True, crop=False)
for b in blob:
    for n, img_blob in enumerate(b):
        print(n,' ', img_blob.shape)
        cv2.imshow(str(n), img_blob)

cv2.waitKey(0)
cv2.destroyAllWindows()

0   (416, 416)
1   (416, 416)
2   (416, 416)


In [7]:
## feeding blob image into network

net.setInput(blob)
output_layers_names = net.getUnconnectedOutLayersNames() # Returns names of layers with unconnected outputs
layerOutputs = net.forward(output_layers_names) # forward() - Runs a forward pass to compute the net output. i.e will give
# Numpy ndarray as output which you can use it to plot box on the given input image

In [8]:
output_layers_names

['yolo_82', 'yolo_94', 'yolo_106']

In [11]:
len(layerOutputs)

3

In [13]:
## start to get all required cordinates, class, confidences of predicted classes

boxes = []
confidences = []
class_ids = []

for output in layerOutputs:
    #print('each outputlayer has', len(output), 'outputs')
    for detection in output:
        #print('each output has', len(detection), 'decection')
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0]*width)
            center_y = int(detection[1]*height)
            w = int(detection[2]*width)
            h = int(detection[3]*height)

            x = int(center_x - w/2)
            y = int(center_y - h/2)

            boxes.append([x, y, w, h])
            confidences.append((float(confidence)))
            class_ids.append(class_id)

In [19]:
print(len(boxes))
print(len(class_ids))
print(class_ids)
print(confidences)

25
25
[2, 2, 3, 3, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 0, 0]
[0.7869579792022705, 0.768101155757904, 0.976364254951477, 0.8574442863464355, 0.5770198106765747, 0.7667111754417419, 0.7915807366371155, 0.7881016731262207, 0.8498850464820862, 0.826312780380249, 0.8652386665344238, 0.9609807133674622, 0.7774482369422913, 0.9725245833396912, 0.8578454256057739, 0.5049381256103516, 0.6707596778869629, 0.6279140710830688, 0.646953284740448, 0.8589062094688416, 0.5109004974365234, 0.7580952644348145, 0.814677357673645, 0.6933577656745911, 0.5432136654853821]


In [20]:
## using NonMaxSupperssion to eliminate redundent bounding boxes on a single class based on maximum confidence.

indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
print(len(indexes))
indexes.flatten()

12


array([ 2, 13, 11, 10, 14,  3,  8, 12,  1, 16, 18, 24], dtype=int32)

In [28]:
# plotting bounding boxes, confidence and labels on images

font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(boxes), 3))

for i in indexes.flatten():
    x, y, w, h = boxes[i]
    label = str(classes[class_ids[i]])
    confidence = str(round(confidences[i], 2))
    color = colors[i]
    cv2.rectangle(img, (x, y), (x+w, y+h), color, 2)
    cv2.putText(img, label + ' ' + confidence, (x, y+20), font, 2, (255, 255, 255), 2)
    
cv2.imshow('Image', img)
key = cv2.waitKey(0)
cv2.destroyAllWindows()

### Putting thogether all

In [29]:
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')

classes = []
with open('coco.names', 'r') as f:
      classes = f.read().splitlines()

img = cv2.imread('image3.jpg')
height, width, _ = img.shape

blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
net.setInput(blob)
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)

boxes = []
confidences = []
class_ids = []

for output in layerOutputs:
    for detection in output:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0]*width)
            center_y = int(detection[1]*height)
            w = int(detection[2]*width)
            h = int(detection[3]*height)

            x = int(center_x - w/2)
            y = int(center_y - h/2)

            boxes.append([x, y, w, h])
            confidences.append((float(confidence)))
            class_ids.append(class_id)

indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

# plotting
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(boxes), 3))

for i in indexes.flatten():
    x, y, w, h = boxes[i]
    label = str(classes[class_ids[i]])
    confidence = str(round(confidences[i], 2))
    color = colors[i]
    cv2.rectangle(img, (x, y), (x+w, y+h), color, 1)
    cv2.putText(img, label + ' ' + confidence, (x, y+20), font, 1, (255, 255, 255), 2)

cv2.imshow('Image', img)
key = cv2.waitKey()
cv2.destroyAllWindows()