In [0]:
# Mount Google Drive (If using Colab)

from google.colab import drive
drive.mount("drive/")

In [0]:
# Install Face detection module from PyPI

!pip install face-detection

In [0]:
# Import Required Packages

import numpy as np
import cv2
import face_detection 

In [0]:
# Path to the main working environment

# If using Google Colab
BASE_PATH = "drive/My Drive/Social_Distancing/"

# If on a local environment, no path required  
# BASE_PATH  = ""

In [0]:
# Initialize a face detector

# Confidence threshold can be adjusted to detect clear faces
detector = face_detection.build_detector("DSFDDetector", confidence_threshold=.5, nms_iou_threshold=.3)

In [0]:
# Load Yolo v3
net = cv2.dnn.readNet(BASE_PATH+"yolov3.weights", BASE_PATH+"yolov3.cfg")
classes = []

with open(BASE_PATH+"coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]
    
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# Initialize output video stream
out_stream = cv2.VideoWriter(
    'output.mkv',
    cv2.VideoWriter_fourcc(*'XVID'),
    25.,
    (1920,1080))

# Path to input video file in the BASE_PATH
FILE_PATH = "test.mkv"
cap = cv2.VideoCapture(BASE_PATH + FILE_PATH )

frame_count = 0
person_count = 0 
face_count = 0 

while cap.isOpened():
    
    # Capture frame-by-frame
    ret, img = cap.read()

    # Checking end of File
    if ret == False:
        break;

    height, width, channels = img.shape

    # Detecting objects
    blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    class_ids = []
    confidences = []
    boxes = []
    
    # Analyze Detected Objects
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                
                # Object detected
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # Rectangle coordinates
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN

    # Initialize empty lists
    persons = []
    faces = []

    # Work on detected Persons in the video
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            if label=='person' and (x<0)==False:

                person_count += 1
                persons.append([x,y,w,h])

                # Save cropped person
                cv2.imwrite(BASE_PATH + "extracted_persons/"+str(frame_count)+"_"+str(person_count)+".png",img[y:y+h,x:x+w])

                # Detect face in the person
                # BGR to RGB
                detections = detector.detect(img[y:y+h,x:x+w,::-1])

                # If a face is detected
                if detections.shape[0]>0:
                  
                  face_count += 1

                  # Calculating coordinates of detected face
                  x1 = x + int(detections[0][0])
                  x2 = x + int(detections[0][2])
                  y1 = y + int(detections[0][1])
                  y2 = y + int(detections[0][3])

                  faces.append([x1,y1,x2,y2])

                  # Save cropped face
                  cv2.imwrite(BASE_PATH + "extracted_faces/"+str(frame_count)+"_"+str(face_count)+".png",img[y1:y2,x1:x2])