In [1]:
#Importing libraries
import cv2
import numpy as np
print(cv2.__version__)

4.5.1


In [2]:
#Loading Yolo Weights & Configuration
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')

In [3]:
#Extracting the object names from the coco file & put everything into a array
classes = []
with open('coco.names','r') as f:
    classes = f.read().splitlines()

In [4]:
print(classes)

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [5]:
#Reading and scaling image
img = cv2.imread('image.jpg')
height, width, _ = img.shape

In [6]:
#Reading and scaling video
#while True:
#cap = cv2.VideoCapture('test.mp4')
#height, width, _ = img.shape

# What is Mean Substraction
Mean subtraction is used to help combat illumination changes in the input images in our dataset. We can therefore view mean subtraction as a technique used to aid our Convolutional Neural Networks.
## SwapRB parameter 
OpenCV assumes images are in BGR channel order; however, the `mean` value assumes we are using RGB order. To resolve this discrepancy we can swap the R and B channels in image  by setting this value to `True`. By default OpenCV performs this channel swapping for us.

In [7]:
# Normalizing by dividing the pixel value 255 & indicating image size / Mean subtraction / swapRB converting the RB our image 
# Mean Subsraction 
blob = cv2.dnn.blobFromImage(img, 1/255, (416,416), (0,0,0), swapRB=True, crop=False)

In [8]:
#Setting Input
net.setInput(blob)

In [9]:
#OutputLayerNames Connection
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)

In [10]:
#Extracting bounding boxes array
boxes = []
#Storing confidences
confidences = []
#Storing Class_Id
class_ids = []

In [11]:
#Creating 2 for loops
# First loop for extracting all the informations from the layers outputs
# Second for loop is used to extract the information from the each of the output 
for output in layerOutputs:
    for detection in output:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
#Center size of the object
            center_x = int(detection[0]*width)
            center_y = int(detection[1]*height)
            w = int(detection[2]*width)
            h = int(detection[3]*height)
            
            x = int(center_x - w/2)
            y = int(center_y - h/2)
            
            boxes.append([x,y,w,h])
            confidences.append((float(confidence)))
            class_ids.append(class_id)


            
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5,0.4)


In [12]:
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0,255, size=(len(boxes),3))

In [13]:
# Identify each of the objects detected
for i in indexes.flatten():
#Extracting each of the boxes(identify locations)   
    x,y,w,h = boxes[i]
    label = str(classes[class_ids[i]])
    confidence = str(round(confidences[i],2))
    color = colors[i]
    cv2.rectangle(img,(x,y), (x+w,y+h),color,2)
    cv2.putText(img, label + " " + confidence, (x, y+20), font,2,(255,255,255),2)

In [14]:
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()