In [1]:
# Using OpenCV-DNN for detection using a caffe model
# https://github.com/djmv/MobilNet_SSD_opencv

In [2]:
import numpy as np
import cv2 

In [3]:
# Labels of Network.
classNames = { 0: 'background',
    1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat',
    5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat', 9: 'chair',
    10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse',
    14: 'motorbike', 15: 'person', 16: 'pottedplant',
    17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor' }

In [4]:
img_path = "/home/jayasimha/Projects/OpenCV/MobilNet_SSD_opencv-master/img.jpeg"
prototxt_path = "/home/jayasimha/Projects/OpenCV/MobilNet_SSD_opencv-master/MobileNetSSD_deploy.prototxt"
weights_path = "/home/jayasimha/Projects/OpenCV/MobilNet_SSD_opencv-master/MobileNetSSD_deploy.caffemodel"

threshold = 0.6

In [8]:
#Load the Caffe model 
net = cv2.dnn.readNetFromCaffe(prototxt_path, weights_path)

In [9]:
# Load image from the path
frame = cv2.imread(img_path)
frame_resized = cv2.resize(frame,(300,300)) # resize frame for prediction
heightFactor = frame.shape[0]/300.0
widthFactor = frame.shape[1]/300.0 
# MobileNet requires fixed dimensions for input image(s)
# so we have to ensure that it is resized to 300x300 pixels.
# set a scale factor to image because network the objects has differents size. 
# We perform a mean subtraction (127.5, 127.5, 127.5) to normalize the input;
# after executing this command our "blob" now has the shape:
# (1, 3, 300, 300)
blob = cv2.dnn.blobFromImage(frame_resized, 0.007843, (300, 300), (127.5, 127.5, 127.5), False)
#Set to network the input blob 
net.setInput(blob)
#Prediction of network
detections = net.forward()

In [10]:
frame_copy = frame.copy()
frame_copy2 = frame.copy()
#Size of frame resize (300x300)
cols = frame_resized.shape[1] 
rows = frame_resized.shape[0]

In [11]:
#For get the class and location of object detected, 
# There is a fix index for class, location and confidence
# value in @detections array .
for i in range(detections.shape[2]):
    confidence = detections[0, 0, i, 2] #Confidence of prediction 
    if confidence > threshold: # Filter prediction 
        class_id = int(detections[0, 0, i, 1]) # Class label

        # Object location 
        xLeftBottom = int(detections[0, 0, i, 3] * cols) 
        yLeftBottom = int(detections[0, 0, i, 4] * rows)
        xRightTop   = int(detections[0, 0, i, 5] * cols)
        yRightTop   = int(detections[0, 0, i, 6] * rows)

        xLeftBottom_ = int(widthFactor * xLeftBottom) 
        yLeftBottom_ = int(heightFactor* yLeftBottom)
        xRightTop_   = int(widthFactor * xRightTop)
        yRightTop_   = int(heightFactor * yRightTop)
        # Draw location of object  
        cv2.rectangle(frame_resized, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop),
                      (0, 255, 0))

        cv2.rectangle(frame_copy, (xLeftBottom_, yLeftBottom_), (xRightTop_, yRightTop_),
                      (0, 255, 0),-1)
opacity = 0.3
cv2.addWeighted(frame_copy, opacity, frame, 1 - opacity, 0, frame)

array([[[ 93, 100, 119],
        [109, 116, 135],
        [113, 123, 141],
        ...,
        [233, 229, 240],
        [234, 230, 241],
        [234, 230, 241]],

       [[ 92,  99, 118],
        [105, 115, 133],
        [112, 122, 140],
        ...,
        [236, 232, 243],
        [236, 232, 243],
        [236, 232, 243]],

       [[ 87,  97, 114],
        [100, 110, 127],
        [110, 120, 137],
        ...,
        [238, 231, 244],
        [238, 231, 244],
        [238, 231, 244]],

       ...,

       [[ 21,  20,   0],
        [ 21,  20,   0],
        [ 21,  20,   0],
        ...,
        [128, 114,  86],
        [114, 100,  72],
        [110,  96,  68]],

       [[ 21,  20,   0],
        [ 21,  20,   0],
        [ 21,  20,   0],
        ...,
        [134, 118,  89],
        [127, 111,  82],
        [116, 100,  71]],

       [[ 21,  20,   0],
        [ 21,  20,   0],
        [ 21,  20,   0],
        ...,
        [137, 121,  92],
        [131, 115,  86],
        [122, 106,  77]]

In [None]:
for i in range(detections.shape[2]):
    confidence = detections[0, 0, i, 2] #Confidence of prediction
    if confidence > 0.7 :# Threshold 
        class_id = int(detections[0, 0, i, 1]) # Class label

        # Object location 
        xLeftBottom = int(detections[0, 0, i, 3] * cols) 
        yLeftBottom = int(detections[0, 0, i, 4] * rows)
        xRightTop   = int(detections[0, 0, i, 5] * cols)
        yRightTop   = int(detections[0, 0, i, 6] * rows)

        xLeftBottom_ = int(widthFactor * xLeftBottom) 
        yLeftBottom_ = int(heightFactor* yLeftBottom)
        xRightTop_   = int(widthFactor * xRightTop)
        yRightTop_   = int(heightFactor * yRightTop)
        cv2.rectangle(frame, (xLeftBottom_, yLeftBottom_), (xRightTop_, yRightTop_),
          (0, 0, 0),2)
        # Draw label and confidence of prediction in frame resized
        if class_id in classNames:
            label = classNames[class_id] + ": " + str(confidence)
            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_TRIPLEX, 0.8, 1)

            yLeftBottom_ = max(yLeftBottom_, labelSize[1])
            cv2.rectangle(frame, (xLeftBottom_, yLeftBottom_ - labelSize[1]),
                                 (xLeftBottom_ + labelSize[0], yLeftBottom_ + baseLine),
                                 (255, 255, 255), cv2.FILLED)
            cv2.putText(frame, label, (xLeftBottom_, yLeftBottom_),
                        cv2.FONT_HERSHEY_TRIPLEX, 0.8, (0, 0, 0))
            print(label) #print class and confidence 
cv2.namedWindow("frame", cv2.WINDOW_NORMAL)
cv2.imshow("frame", frame)
cv2.waitKey(0)
cv2.destroyAllWindows()

person: 0.84372514
person: 0.7661178
