In [1]:
import cv2
import numpy as np
from skimage import io
from batch_face import RetinaFace, LandmarkPredictor, SixDRep, draw_landmarks, Timer
import time

In [2]:
def get_landmarks(frame, faces):
    ### Predict landmarks from given face co-ordinates ###
    landmarks = predictor(faces, frame, from_fd=True)
    return landmarks

In [3]:
def draw_landmarks_cv(frame, faces, landmarks):
    ### Draw landmarks on faces using CV2 - Possible to draw multiple faces with a For loop, however we are only interested in having one face in the frame ### 
    frame = draw_landmarks(frame, faces[0][0], landmarks[0])
    return frame

In [4]:
def get_head_pose(frame, faces_pose):
    head_poses = head_pose_estimator(faces_pose, frame, input_face_type='tuple', update_dict=True)
    return head_poses

In [5]:
def draw_head_pose_cube_cv(frame, faces, pose):
    head_pose_estimator.plot_pose_cube(frame, faces[0][0], **pose)

In [6]:
def updated_bbox(landmarks):
    ldm_new = landmarks[0]
    (x1, y1), (x2, y2) = ldm_new.min(0), ldm_new.max(0)
    box_new = np.array([x1, y1, x2, y2])
    box_new[:2] -= 10
    box_new[2:] += 10
    faces = [[box_new, None, None]]
    return faces

Opens camera and passes frames to functions, comment/uncomment functions for desired tracking

In [8]:
### Open camera ###
cap = cv2.VideoCapture(0)
detector = RetinaFace(0)
predictor = LandmarkPredictor(0)
head_pose_estimator = SixDRep(0)
detect_time = time.time()
faces = None

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    loop_time = time.time()
    
    ### NOTE: RGB values are normalized within RetinaFace ###
    ### Detect faces if none exist ###
    
    # Calculate the time difference
    elapsed_time = time.time() - detect_time

    # Check if n seconds has passed: The shorter the elapsed time - the more face detections are done, but also the lower the fps and efficiency
    if faces is None or elapsed_time >= 1:
        faces = detector(frame, cv=True, threshold=0.5)
        detect_time = time.time()
    else:
        ### This is an efficiency method of predicting the face bound-box - especially for live camera. It uses the min and max values from the results of the previous landmark 'predictor' function. Helps increase the fps rate ###
        ### However, it will not detect new faces, or when a face has gone ###
        faces = updated_bbox(landmarks)

    if len(faces) == 0:
        print("NO face is detected!")
        continue

    ### Predict landmarks from face ###
    landmarks = get_landmarks(frame, faces)

    ### Estimate head pose from face ###
    pose = get_head_pose(frame, faces)
    
    ### Draw landmarks (AND/OR) pose cube ###
    frame = draw_landmarks_cv(frame, faces, landmarks)
    draw_head_pose_cube_cv(frame, faces, pose[0])

    # Calculate and display FPS, Pitch, Yaw and Roll
    fps = 1 / (time.time() - loop_time)
    cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
    cv2.putText(frame, f"Pitch: {pose[0]['pitch']:.2f}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    cv2.putText(frame, f"Yaw: {pose[0]['yaw']:.2f}", (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    cv2.putText(frame, f"Roll: {pose[0]['roll']:.2f}", (10, 130), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    
    ### Display the resulting frame ###
    cv2.imshow('', frame)

    ### Press 'q' to exit the video window ###
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

### Release the capture when done ###
cap.release()
cv2.destroyAllWindows()