In [1]:
# importing numpy, cv2, mediapipe, and time
import numpy as np
import cv2
import mediapipe as mp
import time

In [2]:
# visual face mesh variables needed to display landmarks
mpMesh = mp.solutions.face_mesh
faceMesh = mpMesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)
drawing = mp.solutions.drawing_utils
specs = drawing.DrawingSpec(thickness=2, circle_radius=1)

# turns video on and stores data in variable
cap = cv2.VideoCapture(0)

# stores data while video is on
while cap.isOpened():
    # reads data from frames in video
    _, image = cap.read()
    startTime = time.time()

    # flips image and changes it from BGR to RGB
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)

    # gets the result of the image and processes it in a variable
    result = faceMesh.process(image)

    # converts RGB to BGR
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    # used to store data from head gaze position
    h, w, _ = image.shape
    coords3d = []
    coords2d = []

    # iterates through each facial landmark while video is running
    if result.multi_face_landmarks:
        for face_landmarks in result.multi_face_landmarks:
            for idx, lm in enumerate(face_landmarks.landmark):
                if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:
                    if idx == 1:

                        # stores x and y values in 2d array
                        nose_2d = (lm.x * w, lm.y * h)

                        # stores x, y, and z values in 3d array
                        nose_3d = (lm.x * w, lm.y * h, lm.z * 3000)

                    # x and y variables
                    x, y = int(lm.x * w), int(lm.y * h)

                    # stores  and y into 2d array
                    coords2d.append([x, y])

                    # stores x, y, z into 3d array
                    coords3d.append([x, y, lm.z])
            
            # converting to numpy array
            coords2d = np.array(coords2d, dtype=np.float64)
            coords3d = np.array(coords3d, dtype=np.float64)

            # camera matrix to store data
            focalLen  = 1 * w
            matrixCam = np.array([[focalLen, 0, h / 2], [0, focalLen, w / 2], [0, 0, 1]])

            # distortion parameters
            matrixDist = np.zeros((4, 1), dtype=np.float64)

            # solves the PnP
            _, rotVec, transVec = cv2.solvePnP(coords3d, coords2d, matrixCam, matrixDist)

            # rotational matrix
            matrixRot, _ = cv2.Rodrigues(rotVec)

            # stores angles where nose is pointed toward
            angles, _, _, _, _, _ = cv2.RQDecomp3x3(matrixRot)

            # y rotational degree
            x = angles[0] * 360
            y = angles[1] * 360
            z = angles[2] * 360

            # displays nose angle
            noseProj, _ = cv2.projectPoints(nose_3d, rotVec, transVec, matrixCam, matrixDist)
            p1 = (int(nose_2d[0]), int(nose_2d[1]))
            p2 = (int(nose_2d[0] + y * 10), int(nose_2d[1] - x * 10))
            cv2.line(image, p1, p2, (255, 0, 0), 3)

            # displays x, y, and z direction of nose
            cv2.putText(image, "x: " + str(np.round(x, 2)), (500, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            cv2.putText(image, "y: " + str(np.round(y, 2)), (500, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            cv2.putText(image, "z: " + str(np.round(z, 2)), (500, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            
            currX = np.round(x, 2)
            currY = np.round(y, 2)

            if currX > -1.5 and currX < 1.5 and currY > -1.5 and currY < 1.5:
                cv2.putText(image, "CENTERED", (1000, 100), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 0), 4)
            elif currX < -1.5:
                cv2.putText(image, "Tilt head UP", (1000, 100), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 4)
            elif currX > 1.5:
                cv2.putText(image, "Tilt head DOWN", (1000, 100), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 4)    
            elif currY > 1.5:
                cv2.putText(image, "Tilt head LEFT", (1000, 100), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 4)    
            else:
                cv2.putText(image, "Tilt head RIGHT", (1000, 100), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 4)    

        # displays fps
        end = time.time()
        totalTime = end - startTime
        fps = 1 / totalTime
        cv2.putText(image, f'FPS: {int(fps)}', (20, 450), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 2)

        # draws the image
        drawing.draw_landmarks(image=image, landmark_list=face_landmarks, connections=mpMesh.FACEMESH_CONTOURS, landmark_drawing_spec=specs, connection_drawing_spec=specs)
    
    # sets image name
    cv2.imshow('Head Gaze Model', image)

    # sets the exit key to 'x'
    if cv2.waitKey(1) & 0xFF == ord('x'):
        break

# releases cap and ends while loop if wait key is pressed
cap.release()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
