In [1]:
import cv2
import numpy as np

In [2]:
# loading MobileNet downloaded from https://github.com/chuanqi305/MobileNet-SSD

configFile = 'deploy.prototxt'
modelFile = 'mobilenet_iter_73000.caffemodel'
net = cv2.dnn.readNetFromCaffe(configFile, modelFile)

In [3]:
def detectHumans(frame, network):

    results = []
    h, w = frame.shape[:2]

    # Pre-processing: mean substraction and scaling to match model's training set.
    blob = cv2.dnn.blobFromImage(frame, 0.007843, (300, 300), [127.5, 127.5, 127.5])
    network.setInput(blob)

    # Run an inference of the model, passing blob through the network.
    network_output = network.forward()

    # Loop over all results.
    for i in np.arange(0, network_output.shape[2]):
        class_id = network_output[0, 0, i, 1]
        confidence = network_output[0, 0, i, 2]

        # Filter for only detected people (classID 15) and high confidence.
        # https://github.com/chuanqi305/MobileNet-SSD/blob/master/demo.py#L21
        if confidence > 0.7 and class_id == 15:
            # Remap 0-1 position outputs to size of image for bounding box.
            box = network_output[0, 0, i, 3:7] * np.array([w, h, w, h])
            box = box.astype('int')

            # Calculate the person center from the bounding box.
            center_x = int((box[0] + box[2]) / 2)
            center_y = int((box[1] + box[3]) / 2)

            results.append((confidence, box, (center_x, center_y)))
    return results

In [6]:
def exportDetection(input_path):
    
    cap = cv2.VideoCapture(input_path)

    fontface = cv2.FONT_HERSHEY_DUPLEX
    white = (255, 255, 255)
    output_path = 'output_' + str(input_path)
    
    writer = None

    prev_frame_time = 0
    new_frame_time = 0
    counter = 0

    print("Processing frames please wait ...")

    while cap.isOpened():
        ret, frame = cap.read()
        
        if not ret:
            break

        results = detectHumans(frame, network=net)

        t, _ = net.getPerfProfile()
        label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())

        for index, (prob, bounding_box, centroid) in enumerate(results):
            start_x, start_y, end_x, end_y = bounding_box
            cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), white, 2)
            cv2.putText(frame, label, (2, frame.shape[0] - 4), fontface, 0.4, white)

        cv2.imshow('frame', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        if writer is None:
            fourcc = cv2.VideoWriter_fourcc(*'MP4V')
            writer = cv2.VideoWriter(
                output_path, fourcc, 25, (frame.shape[1], frame.shape[0]), True)

        if writer:
            writer.write(frame)

    cap.release()
    writer.release()
    print(f'Video saved to {output_path}')
    cv2.destroyAllWindows()

In [7]:
input_path = 'pharcyde_sm.mp4'
exportDetection(input_path)

Processing frames please wait ...
Video saved to output_pharcyde_sm.mp4
