In [None]:
%pip install retina-face imutils opencv-python

In [None]:
# Download test images from unsplash, see README for attributions
!curl -L https://images.unsplash.com/photo-1625990637351-ee0e5e9ba5e5 -o test1.jpeg
!curl -L https://images.unsplash.com/photo-1542304074-9c8ce93b52fd -o test2.jpeg
!curl -L https://images.unsplash.com/photo-1523151401668-fff9e38a48e4 -o test3.jpeg

In [None]:
# Download test video from pexels, see README for attributions
!curl -L https://download.pexels.com/vimeo/374178490/pexels-fauxels-3254006.mp4?width=3840 -o test.mp4

In [None]:
import os
from pathlib import Path

import cv2 as cv
import imutils
import numpy as np
from IPython.display import Image, Video
from retinaface import RetinaFace
from tqdm import tqdm

In [None]:
img_path = "test3.jpeg"
faces = RetinaFace.detect_faces(img_path)

In [None]:
faces

In [None]:
def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255)):
    output = image.copy()
    for det in results.values():
        bbox = det["facial_area"]
        cv.rectangle(output, (bbox[0], bbox[1]), (bbox[2], bbox[3]), box_color, 2)

        conf = det["score"]
        cv.putText(
            output,
            "{:.4f}".format(conf),
            (bbox[0], bbox[1] + 12),
            cv.FONT_HERSHEY_DUPLEX,
            0.5,
            text_color,
        )

    return output

In [None]:
def detect_face(input_image):
    input_image = Path(input_image)
    image = cv.imread(os.fspath(input_image))

    image = imutils.resize(image, width=1000)  # just to make the image smaller
    # results = RetinaFace.detect_faces(os.fspath(image))
    results = RetinaFace.detect_faces(np.array(image))

    # Print results
    print("{} faces detected.".format(len(results)))

    # Draw results on the input image
    image = visualize(image, results)

    out_image = input_image.parent / (input_image.stem + "_result.jpeg")
    cv.imwrite(os.fspath(out_image), image)
    display(Image(filename=os.fspath(out_image)))

In [None]:
detect_face("test1.jpeg")

In [None]:
detect_face("test2.jpeg")

In [None]:
detect_face("test3.jpeg")

In [None]:
# trim video to 4 seconds
!ffmpeg -ss 0 -i test.mp4 -c copy -t 4 test-trim.mp4 -y

In [None]:
cap = cv.VideoCapture("test-trim.mp4")

In [None]:
# fourcc = cv.VideoWriter_fourcc(*'mp4v')
fourcc = cv.VideoWriter_fourcc(*"VP90")
out = cv.VideoWriter("output.mp4", fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

In [None]:
pos_frame = int(cap.get(cv.CAP_PROP_POS_FRAMES))
num_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
# Loop through each frame of the video
while cap.isOpened():
    # Read the frame
    ret, frame = cap.read()
    if not ret:
        break
    # print('frame number: ', pos_frame)
    print(
        "." * (pos_frame % 10 + 1) + " " * (num_frames - (pos_frame % 10 + 1)), end="\r"
    )
    pos_frame = int(cap.get(cv.CAP_PROP_POS_FRAMES))
    results = RetinaFace.detect_faces(np.array(frame))
    if isinstance(results, dict):  # no face dedecuted
        frame = visualize(frame, results)
        # Draw bounding boxes around the faces
        # for val in results.values():
        #    bbox = val['facial_area']
        #    cv.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)

    # Write the frame to the output video file
    out.write(frame)

    if cap.get(cv.CAP_PROP_POS_FRAMES) == cap.get(cv.CAP_PROP_FRAME_COUNT):
        # If the number of captured frames is equal to the total number of frames,
        # we stop
        break

    # Exit if the user presses 'q'
    # if cv.waitKey(1) & 0xFF == ord('q'):
    #    break

In [None]:
# Release the video capture, release the output video, and close the window
cap.release()
out.release()

In [None]:
# display the video
Video("output.mp4", embed=True, width=800)