In [1]:
import numpy as np
import cv2
import time

In [2]:
#reading images 
img1=cv2.imread('i1.jpg',1)
img2=cv2.imread('i2.jpg',1)

In [3]:
#loading classes and model
classesFile = 'coco.names'
with open(classesFile) as f:
    classes=[line.strip() for line in f]
    
modelConfig = 'yolov3.cfg'
modelWeights = 'yolov3.weights'

net = cv2.dnn.readNetFromDarknet(modelConfig , modelWeights)

In [4]:
#initializing parameters
confThreshold = 0.5
nmsThreshold = 0.3
colours = np.random.randint(0, 255, size=(len(classes), 3), dtype='uint8')

In [5]:
#getting ending layers
def getOutputsName(net):
    layerNames = net.getLayerNames()
    return [layerNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

In [6]:
#converting image to an input blob
blob1 = cv2.dnn.blobFromImage(img1, 1/255.0, (416,416), swapRB = True, crop=False)

blob2 = cv2.dnn.blobFromImage(img2, 1/255.0, (416,416), swapRB = True, crop=False)

In [12]:
img=img1
blob=blob1
objects=[]
for t in range(2):
    #implementing forward pass
    net.setInput(blob)
    start = time.time()
    outputFromNet = net.forward(getOutputsName(net))  
    end = time.time()
    print('Current frame took {:.5f} seconds'.format(end - start))
    
    #getting bounding boxes
    bounding_boxes = []
    confidences = []
    class_numbers = []
    
    for result in outputFromNet:
        for detectedObjects in result:
            scores = detectedObjects[5:]
            classCurrent = np.argmax(scores)
            confidenceCurrent = scores[classCurrent]
            
            #removing boxes with confidence score less than confThreshold
            if confidenceCurrent > confThreshold:
                imgHeight = img.shape[0]
                imgWidth = img.shape[1]
                x_center = int(detectedObjects[0] * imgWidth)
                y_center = int(detectedObjects[1] * imgHeight)
                box_width = int(detectedObjects[2] * imgWidth)
                box_height = int(detectedObjects[3] * imgHeight)

                class_numbers.append(classCurrent)
                confidences.append(float(confidenceCurrent))
                bounding_boxes.append([x_center, y_center, box_width, box_height])

    #performing non maximum suppression 
    results = cv2.dnn.NMSBoxes(bounding_boxes, confidences, confThreshold, nmsThreshold)
    
    #drawing predicted bounding boxes
    if len(results) >0:
        o=[]
        for i in results.flatten():
            obj = classes[int(class_numbers[i])]
            o.append(obj)

            x_center, y_center = bounding_boxes[i][0], bounding_boxes[i][1]
            box_width, box_height = bounding_boxes[i][2], bounding_boxes[i][3]
            colour_box_current = colours[class_numbers[i]].tolist()
            cv2.circle(img, (x_center, y_center),
                          (max(box_width, box_height)//2),
                          colour_box_current, 2)
            text_box_current = '{}: {:.4f}'.format(classes[int(class_numbers[i])],confidences[i])
            cv2.putText(img, text_box_current, (x_center - (box_width//2), y_center - (box_height//2) - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, colour_box_current, 2)
        objects.append(o)
    img=img2
    blob=blob2


Current frame took 0.46019 seconds
Current frame took 0.40705 seconds


In [13]:
#printing output
final_list = list(set().union(objects[0], objects[1]))
print('Objects detected are: ' + ', '.join(map(str,final_list)))
    
cv2.imshow('img1', img1)
cv2.imshow('img2', img2)
if cv2.waitKey(0) & 0xFF == ord('q'):
    cv2.destroyAllWindows

Objects detected are: chair, teddy bear
