In [43]:
import cv2
import numpy as np

In [44]:
net=cv2.dnn.readNet('yolov3.weights','yolov3.cfg')
classes=[]

with open('coco.names','r') as f:
    classes=f.read().splitlines()
classes[:5]

['person', 'bicycle', 'car', 'motorbike', 'aeroplane']

In [45]:
img=cv2.imread('images.jpeg')
height,width,_=img.shape
cv2.imshow('Image',img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [46]:
# 1/255 -Scaling
# (448,448) - Size of image
# (0,0,0) - No Mean Subtraction
# swapRB=True  -To convert BGR to RGB
# crop = False - To avoid cropping

# Blob is done because the model accepts a 4D input
blob=cv2.dnn.blobFromImage(img,1/255,(448,448),(0,0,0),swapRB=True,crop=False)

blob.shape

(1, 3, 448, 448)

In [47]:
# To see what is in the blob
for b in blob:
    for n,img_blob in enumerate(b):
        cv2.imshow(str(n),img_blob)

        
cv2.waitKey(0)
cv2.destroyAllWindows()

In [48]:
net.setInput(blob)

# Getting the name of the output layers
#  To pass in the next function to get the output of these layers
output_layers_names=net.getUnconnectedOutLayersNames()

# Getting the output of the yolo network
layerOutputs=net.forward(output_layers_names)

In [49]:
layerOutputs[0][0]

array([3.1048585e-02, 3.6705948e-02, 4.0877530e-01, 1.0960779e-01,
       2.4772417e-09, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e

In [50]:
# To store bounding boxes
boxes=[]

# To store confidence
confidences=[]

# To store classes_id
class_ids=[]

In [61]:
# To extract above information from the Output of the yolo network (layerOutputs)

for output in layerOutputs:
    for detection in output:
        # Storing class probabilities
        scores=detection[5:]
        
        # Storing Highest score location
        class_id=np.argmax(scores)
        
        # Storing confidence/probabilty of detected object
        confidence=scores[class_id]
        
        # Storing bounding box cordinares of objects with probabilty match of 50% and above
        if (confidence > 0.75):
            
            # Multiplying with width to rescale to size of original image
           
            center_x=int(detection[0]*width)              
            center_y=int(detection[1]*height)
            w=int(detection[2]*width)
            h=int(detection[3]*height)
            
            # Getting Position of upper left corner
            x=int(center_x-w/2)
            y=int(center_y-h/2)
            
            # Storing bounding box coordinates, confidences and class_id
            boxes.append([x, y, w, h])
            confidences.append(float(confidence))
            class_ids.append(class_id)

            
            

In [62]:
# Checking how many boxes where detected
print(len(boxes))

68


In [63]:
# Surpressing Common detected boxes
indexes=cv2.dnn.NMSBoxes(boxes,confidences,0.75,0.65)

# Cheking ow many redundant boxes were there
detected_obj=indexes.flatten()
detected_obj

array([ 7,  2,  1, 14,  9, 39, 35, 33, 24, 22, 18, 36, 23,  4],
      dtype=int32)

In [64]:
font=cv2.FONT_HERSHEY_PLAIN

# Assigning color for each detected object
# 3 is the number of channels
colors=np.random.uniform(0,255,size=(len(boxes),3))

for i in detected_obj:
    x,y,w,h=boxes[i]
    label=str(classes[class_ids[i]])
    confidence=str(round(confidences[i],2))
    color=colors[i]
    cv2.rectangle(img,(x,y),(x+w,y+h),color,2)
    cv2.putText(img,label+" "+confidence, (x,y+20),font,2,color,2)
    print(label,confidence)


person 1.0
person 1.0
person 1.0
person 0.96
person 0.96
mouse 0.93
mouse 0.88
cup 0.85
laptop 0.85
laptop 0.84
tvmonitor 0.79
pottedplant 0.78
laptop 0.78
person 0.77


In [66]:
cv2.imshow('Image',img)
cv2.waitKey()
cv2.destroyAllWindows()

