In [31]:
import cv2
import numpy as np

In [32]:
image = cv2.imread("image1.jpeg")
image = cv2.resize(image,(800,800))
image.shape

(800, 800, 3)

In [33]:
height,width,_ = image.shape

In [34]:
cv2.imshow("image",image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [35]:
## Setup the neural network
net = cv2.dnn.readNet("yolov3.weights",'yolov3.cfg')

In [36]:
classes = []
with open('coco.names','r') as f:
    classes = f.read().splitlines()

In [37]:
classes

['person',
 'bicycle',
 'car',
 'motorbike',
 'aeroplane',
 'bus',
 'train',
 'truck',
 'boat',
 'traffic light',
 'fire hydrant',
 'stop sign',
 'parking meter',
 'bench',
 'bird',
 'cat',
 'dog',
 'horse',
 'sheep',
 'cow',
 'elephant',
 'bear',
 'zebra',
 'giraffe',
 'backpack',
 'umbrella',
 'handbag',
 'tie',
 'suitcase',
 'frisbee',
 'skis',
 'snowboard',
 'sports ball',
 'kite',
 'baseball bat',
 'baseball glove',
 'skateboard',
 'surfboard',
 'tennis racket',
 'bottle',
 'wine glass',
 'cup',
 'fork',
 'knife',
 'spoon',
 'bowl',
 'banana',
 'apple',
 'sandwich',
 'orange',
 'broccoli',
 'carrot',
 'hot dog',
 'pizza',
 'donut',
 'cake',
 'chair',
 'sofa',
 'pottedplant',
 'bed',
 'diningtable',
 'toilet',
 'tvmonitor',
 'laptop',
 'mouse',
 'remote',
 'keyboard',
 'cell phone',
 'microwave',
 'oven',
 'toaster',
 'sink',
 'refrigerator',
 'book',
 'clock',
 'vase',
 'scissors',
 'teddy bear',
 'hair drier',
 'toothbrush']

In [38]:
### Preprocessing

blob = cv2.dnn.blobFromImage(image,1/255,(416,416),(0,0,0),swapRB=True,crop=False)

In [39]:
for each in blob:
    for n, img in enumerate(each):
        cv2.imshow(str(n),img)
        cv2.waitKey(1000)
        cv2.destroyAllWindows()

In [40]:
net.setInput(blob)
outputLayerNames = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(outputLayerNames)

In [41]:
print(layerOutputs)

[array([[0.0420658 , 0.04935352, 0.47844043, ..., 0.        , 0.        ,
        0.        ],
       [0.04183805, 0.03146551, 0.3179572 , ..., 0.        , 0.        ,
        0.        ],
       [0.04990423, 0.03824469, 0.7919381 , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.9592936 , 0.9544945 , 0.44492567, ..., 0.        , 0.        ,
        0.        ],
       [0.9669001 , 0.96307755, 0.30746004, ..., 0.        , 0.        ,
        0.        ],
       [0.9671124 , 0.9638399 , 0.8108865 , ..., 0.        , 0.        ,
        0.        ]], dtype=float32), array([[0.02958412, 0.02393023, 0.04992761, ..., 0.        , 0.        ,
        0.        ],
       [0.02240303, 0.02390786, 0.32753637, ..., 0.        , 0.        ,
        0.        ],
       [0.02205037, 0.01756134, 0.07928602, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.9694878 , 0.9724175 , 0.06430112, ..., 0.        , 0.        ,
        0.        ],
       [0.9761178 

In [42]:
boxes = []
confidences = []
class_ids = []

for each in layerOutputs:
    for detection in each:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2]*width)
            h = int(detection[3]*height)
            
            x = int(center_x - (w/2))
            y = int(center_y - (h/2))
            
            boxes.append([x,y,w,h])
            confidences.append(float(confidence))
            class_ids.append(class_id)
print(boxes)
print(confidences)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5,0.4)
print(indexes.flatten())

font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0,255,size = (len(boxes),3))

for i in indexes.flatten():
    x,y,w,h = boxes[i]
    label = str(classes[class_ids[i]])
    confidence = str(round(confidences[i],2))
    color = colors[i]
    cv2.rectangle(image,(x,y),(x+w,y+h),color,2)
    cv2.putText(image,label+" " + confidence, (x,y+20),font,2,(255,255,255),)
    

[[431, 72, 173, 403], [229, 92, 188, 514], [535, 73, 208, 699], [28, 206, 252, 514], [357, 262, 286, 504], [285, 550, 200, 186], [2, 532, 94, 209], [297, 538, 157, 200], [720, 501, 78, 257]]
[0.9173274636268616, 0.9921201467514038, 0.998635470867157, 0.9987161755561829, 0.9882053136825562, 0.9983173608779907, 0.9279079437255859, 0.6637046933174133, 0.7286281585693359]
[3 2 5 1 4 6 0 8]


In [43]:
cv2.imshow("image",image)
cv2.waitKey(0)
cv2.destroyAllWindows()