In [1]:
import os
import cv2
import numpy as np
import requests

In [2]:
yolo_config = './yolov3.cfg'
if not os.path.isfile(yolo_config):
    url = 'https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg'
    r = requests.get(url)
    with open(yolo_config,'wb') as f:
        f.write(r.content)
yolo_weights = './yolov3.weights'
if not os.path.isfile(yolo_weights):
    url = 'https://pjreddie.com/media/files/yolov3.weights'
    r = requests.get(url)
    with open(yolo_weights, 'wb') as f:
        f.write(r.content)
# Download class names file
# Contains the names of the classes the network can detect
classes_file = './coco.names'
if not os.path.isfile(classes_file):
    url = 'https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names'
    r = requests.get(url)
    with open(classes_file, 'wb') as f:
        f.write(r.content)
# load class names
    with open(classes_file, 'r') as f:
        classes = [line.strip() for line in f.readlines()]
else:
    with open(classes_file, 'r') as f:
        classes = [line.strip() for line in f.readlines()]
    
# Download object detection image
#image_file = './Abbey_Road_Zebra_crossing.jpg'
image_file = './test.png'
# read and normalize image
img = cv2.imread(image_file)
cv2.imshow("img",img)
cv2.waitKey()
cv2.destroyAllWindows()
blob = cv2.dnn.blobFromImage(img, 1 / 255, (416, 416), (0, 0, 0),True, crop=False)


In [3]:
print(len(classes),classes)

80 ['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [4]:
net = cv2.dnn.readNet(yolo_weights,yolo_config)

In [5]:
blob

array([[[[0.38431376, 0.4666667 , 0.6156863 , ..., 0.9058824 ,
          0.9058824 , 0.9058824 ],
         [0.3921569 , 0.40784317, 0.4156863 , ..., 0.9058824 ,
          0.9058824 , 0.9058824 ],
         [0.3921569 , 0.3803922 , 0.38823533, ..., 0.9058824 ,
          0.9058824 , 0.9058824 ],
         ...,
         [0.2509804 , 0.40784317, 0.3647059 , ..., 0.18431373,
          0.19215688, 0.19607845],
         [0.2901961 , 0.42352945, 0.38823533, ..., 0.18431373,
          0.19215688, 0.19607845],
         [0.36078432, 0.41960788, 0.41960788, ..., 0.18823531,
          0.19215688, 0.19607845]],

        [[0.35686275, 0.4039216 , 0.5529412 , ..., 0.9450981 ,
          0.9450981 , 0.9450981 ],
         [0.36078432, 0.35686275, 0.36078432, ..., 0.9450981 ,
          0.9450981 , 0.9450981 ],
         [0.3647059 , 0.3647059 , 0.3647059 , ..., 0.9450981 ,
          0.9450981 , 0.9450981 ],
         ...,
         [0.27058825, 0.43529415, 0.3921569 , ..., 0.20784315,
          0.20392159, 0.2

In [6]:
net.setInput(blob)
layers_names = net.getLayerNames()
output_layers = [layers_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
outs = net.forward(output_layers)

In [7]:
output_layers

['yolo_82', 'yolo_94', 'yolo_106']

In [8]:
outs

[array([[0.03542386, 0.05464605, 0.3524644 , ..., 0.        , 0.        ,
         0.        ],
        [0.04990491, 0.03535479, 0.29963252, ..., 0.        , 0.        ,
         0.        ],
        [0.05038604, 0.03981064, 0.7385493 , ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.9616435 , 0.9505885 , 0.38219255, ..., 0.        , 0.        ,
         0.        ],
        [0.96440166, 0.965454  , 0.3120007 , ..., 0.        , 0.        ,
         0.        ],
        [0.968645  , 0.9603485 , 0.81717557, ..., 0.        , 0.        ,
         0.        ]], dtype=float32),
 array([[0.01749736, 0.0236836 , 0.03824591, ..., 0.        , 0.        ,
         0.        ],
        [0.01580204, 0.01844252, 0.37996742, ..., 0.        , 0.        ,
         0.        ],
        [0.02190317, 0.014988  , 0.06639002, ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.97220683, 0.9799352 , 0.04520325, ..., 0.        , 0.        ,
         0.        

In [9]:
# extract bounding boxes
class_ids = list()
confidences = list()
boxes = list()
# iterate over all classes
for out in outs:
 # iterate over the anchor boxes for each class
    for detection in out:
 # bounding box
         center_x = int(detection[0] * img.shape[1])
         center_y = int(detection[1] * img.shape[0])
         w = int(detection[2] * img.shape[1])
         h = int(detection[3] * img.shape[0])
         x = center_x - w // 2
         y = center_y - h // 2
         boxes.append([x, y, w, h])
         # class
         class_id = np.argmax(detection[5:])
         class_ids.append(class_id)
         # confidence
         confidence = detection[4]
         confidences.append(float(confidence))

In [10]:
confidences

[2.8683185604450046e-08,
 2.0403808420610403e-08,
 3.9016455843920994e-08,
 7.962862014210259e-07,
 3.1563658353661594e-08,
 1.5539786346607798e-08,
 2.5819417714956217e-06,
 3.914645674285566e-08,
 4.785367480053537e-08,
 1.1106779993497184e-06,
 4.083955573719322e-08,
 2.2109587050067603e-08,
 4.854287567468418e-07,
 1.5593201396768563e-08,
 1.2960001960493628e-08,
 5.539267533549719e-08,
 2.2643951158585196e-09,
 8.292783126861991e-10,
 1.494637658083775e-08,
 3.0305272180619625e-10,
 2.431820300685672e-10,
 7.927505762950204e-09,
 3.985686780616504e-11,
 7.397842755052508e-11,
 2.2923390474716143e-07,
 2.827230949353776e-10,
 4.375801110345634e-11,
 9.178597224490659e-07,
 5.4428336837553104e-11,
 5.444606571147759e-12,
 5.393382593865681e-07,
 1.4057555119961762e-10,
 1.9829036416313395e-12,
 1.7934237916961138e-07,
 2.712610865096554e-11,
 1.421052267828668e-13,
 6.542594377378919e-08,
 7.550665648281552e-10,
 8.767879478011054e-11,
 1.1722625714583046e-07,
 5.8030316552049044e-0

In [11]:
boxes

[[-192, -12, 481, 106],
 [-136, -323, 409, 700],
 [-436, -297, 1008, 654],
 [-7, -4, 378, 106],
 [-24, -247, 384, 553],
 [-316, -277, 979, 621],
 [39, 0, 437, 94],
 [40, -240, 455, 539],
 [-239, -266, 1045, 595],
 [71, -1, 602, 83],
 [104, -213, 539, 489],
 [-140, -251, 1028, 560],
 [181, 0, 596, 75],
 [197, -194, 567, 452],
 [-53, -261, 1079, 580],
 [168, 2, 841, 66],
 [270, -163, 640, 390],
 [73, -233, 1059, 531],
 [168, 3, 1018, 57],
 [337, -155, 665, 375],
 [103, -218, 1162, 497],
 [348, -4, 892, 55],
 [486, -155, 603, 378],
 [164, -207, 1278, 484],
 [467, -8, 866, 67],
 [572, -149, 630, 354],
 [280, -198, 1179, 448],
 [639, -5, 707, 69],
 [711, -139, 543, 324],
 [382, -201, 1235, 446],
 [817, -5, 538, 81],
 [852, -169, 462, 377],
 [521, -215, 1126, 457],
 [985, -2, 417, 89],
 [999, -152, 398, 339],
 [689, -211, 1032, 450],
 [1066, -11, 474, 98],
 [1133, -229, 359, 503],
 [783, -233, 1082, 506],
 [-113, 22, 336, 131],
 [-140, -183, 430, 559],
 [-431, -173, 1004, 538],
 [2, 7, 340, 

In [12]:
# non-max suppression
ids = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.3,
nms_threshold=0.5)


In [13]:
# draw the bounding boxes on the image
colors = np.random.uniform(0, 255, size=(len(classes), 3))
img_cpy = img.copy()
for i in ids:
    i = i[0]
    x, y, w, h = boxes[i]
    class_id = class_ids[i]
    color = colors[class_id]
    cv2.rectangle(img_cpy, (round(x), round(y)), (round(x + w),round(y + h)), color, 2)
    label = "%s: %.2f" % (classes[class_id], confidences[i])
    cv2.putText(img_cpy, label, (x - 10, y - 10),cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
    cv2.imshow("Object detection", img_cpy)
    cv2.waitKey(500)
cv2.destroyAllWindows()

In [14]:
### For a video
blob = cv2.dnn.blobFromImage(img, 1 / 255, (416, 416), (0, 0, 0),True, crop=False)

net = cv2.dnn.readNet(yolo_weights,yolo_config)

net.setInput(blob)
layers_names = net.getLayerNames()
output_layers = [layers_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
outs = net.forward(output_layers)

# extract bounding boxes
class_ids = list()
confidences = list()
boxes = list()
# iterate over all classes
for out in outs:
 # iterate over the anchor boxes for each class
    for detection in out:
 # bounding box
         center_x = int(detection[0] * img.shape[1])
         center_y = int(detection[1] * img.shape[0])
         w = int(detection[2] * img.shape[1])
         h = int(detection[3] * img.shape[0])
         x = center_x - w // 2
         y = center_y - h // 2
         boxes.append([x, y, w, h])
         # class
         class_id = np.argmax(detection[5:])
         class_ids.append(class_id)
         # confidence
         confidence = detection[4]
         confidences.append(float(confidence))
ids = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.3,nms_threshold=0.5)
colors = np.random.uniform(0, 255, size=(len(classes), 3))
img_cpy = img.copy()
for i in ids:
    i = i[0]
    x, y, w, h = boxes[i]
    class_id = class_ids[i]
    color = colors[class_id]
    cv2.rectangle(img_cpy, (round(x), round(y)), (round(x + w),round(y + h)), color, 2)
    label = "%s: %.2f" % (classes[class_id], confidences[i])
    cv2.putText(img_cpy, label, (x - 10, y - 10),cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
    cv2.imshow("Object detection", img_cpy)
    cv2.waitKey(1)
cv2.destroyAllWindows()

In [None]:
cap = cv2.VideoCapture('./try.mp4')
while(1):
    _ , img=cap.read()
    #cv2.namedWindow('video',cv2.WINDOW_NORMAL)
    #cv2.imshow('video',img)
    blob = cv2.dnn.blobFromImage(img, 1 / 255, (416, 416), (0, 0, 0),True, crop=False)

    net = cv2.dnn.readNet(yolo_weights,yolo_config)

    net.setInput(blob)
    layers_names = net.getLayerNames()
    output_layers = [layers_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    outs = net.forward(output_layers)

    # extract bounding boxes
    class_ids = list()
    confidences = list()
    boxes = list()
    # iterate over all classes
    for out in outs:
     # iterate over the anchor boxes for each class
        for detection in out:
     # bounding box
             center_x = int(detection[0] * img.shape[1])
             center_y = int(detection[1] * img.shape[0])
             w = int(detection[2] * img.shape[1])
             h = int(detection[3] * img.shape[0])
             x = center_x - w // 2
             y = center_y - h // 2
             boxes.append([x, y, w, h])
             # class
             class_id = np.argmax(detection[5:])
             class_ids.append(class_id)
             # confidence
             confidence = detection[4]
             confidences.append(float(confidence))
    ids = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=0.3,nms_threshold=0.5)
    colors = np.random.uniform(0, 255, size=(len(classes), 3))
    img_cpy = img.copy()
    for i in ids:
        i = i[0]
        x, y, w, h = boxes[i]
        class_id = class_ids[i]
        color = colors[class_id]
        cv2.rectangle(img_cpy, (round(x), round(y)), (round(x + w),round(y + h)), color, 2)
        label = "%s: %.2f" % (classes[class_id], confidences[i])
        cv2.putText(img_cpy, label, (x - 10, y - 10),cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
        #cv2.imshow("Object detection", img_cpy)
        #cv2.waitKey(1000)
        #cv2.destroyAllWindows()
        #cv2.imshow("Object detection", img_cpy)
        #k=cv2.waitKey(1) & 0xFF
        #if k==27:
         #   break
    cv2.imshow("Object detection", img_cpy)
    k=cv2.waitKey(1) & 0xFF
    if k==27:
        break
cap.release()
cv2.destroyAllWindows()