In [1]:
import numpy as np
import cv2

In [7]:
# load MobileNet model
proto = 'object-detection-deep-learning/MobileNetSSD_deploy.prototxt.txt'
model = 'object-detection-deep-learning/MobileNetSSD_deploy.caffemodel'

# load SSD model
net = cv2.dnn.readNetFromCaffe(proto, model)

In [3]:
# define MobileNet classes
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
           "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
           "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
           "sofa", "train", "tvmonitor"]

In [4]:
selected_class = set(["person"])
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))

In [5]:
# set a minimum confidence to consider a valid detection
confidence = 0.75

In [16]:
# Load video
video = cv2.VideoCapture('video.mp4')

# loop over the frames from the video stream
while video.isOpened():
    ret, frame = video.read()
    
    if ret == True:
        # resize frame
        frame = cv2.resize(frame, (400, 360))

        # get frame dimensions
        (h, w) = frame.shape[:2]

        # create a blob
        blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 
                                     0.007843, (300, 300), 127.5)

        # pass the blob through the network to obtain detections
        net.setInput(blob)
        detections = net.forward()

        # loop over the detections
        for i in np.arange(0, detections.shape[2]):
            # get the confidence of the prediction
            conf = detections[0, 0, i, 2]

            # only consider detection bigger than the threshold confidence
            if conf > confidence:
                # get the index of the class label from the detections
                idx = int(detections[0, 0, i, 1])

                # detect only the selected_class
                if CLASSES[idx] in selected_class:

                    # compute the coordinates of the bounding box for the object
                    box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                    (startX, startY, endX, endY) = box.astype("int")

                    # draw the bounding box on the frame
                    label = "{}: {:.2f}%".format(CLASSES[idx],
                        confidence * 100)
                    cv2.rectangle(frame, (startX, startY), (endX, endY),
                        COLORS[idx], 2)
                    y = startY - 15 if startY - 15 > 15 else startY + 15
                    cv2.putText(frame, label, (startX, y),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)

                # ignore other classes
                else:
                    continue

        # show the output frame
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
            cv2.destroyWindow('Video')

        
    else:
        cv2.destroyAllWindows()
        break