In [2]:
import cv2
import numpy as np
from ultralytics import YOLO
import mediapipe as mp

In [39]:
# Load image
image_path = './images/4.jpg'  # Replace with your image path
frame = cv2.imread(image_path)
if frame is None:
    print("Error: Cannot load image.")
    exit()

# Load YOLOv8 model
model = YOLO("yolov8n-face-lindevs.pt")

# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True,
                                  max_num_faces=1,
                                  refine_landmarks=True,
                                  min_detection_confidence=0.5)

def get_head_pose(image, landmarks):
    image_points = np.array([
        landmarks[1],    # Nose tip
        landmarks[152],  # Chin
        landmarks[263],  # Right eye right corner
        landmarks[33],   # Left eye left corner
        landmarks[287],  # Right mouth corner
        landmarks[57]    # Left mouth corner
    ], dtype="double")

    model_points = np.array([
        (0.0, 0.0, 0.0),
        (0.0, -330.0, -65.0),
        (225.0, 170.0, -135.0),
        (-225.0, 170.0, -135.0),
        (150.0, -150.0, -125.0),
        (-150.0, -150.0, -125.0)
    ])

    focal_length = image.shape[1]
    center = (image.shape[1] / 2, image.shape[0] / 2)
    camera_matrix = np.array([
        [focal_length, 0, center[0]],
        [0, focal_length, center[1]],
        [0, 0, 1]
    ], dtype="double")

    dist_coeffs = np.zeros((4, 1))
    success, rvec, _, _ = cv2.solvePnPRansac(model_points, image_points, camera_matrix, dist_coeffs)

    rmat, _ = cv2.Rodrigues(rvec)
    proj_matrix = np.hstack((rmat, np.zeros((3, 1))))
    _, _, _, _, _, _, angles = cv2.decomposeProjectionMatrix(proj_matrix)

    return angles  # pitch, yaw, roll

def get_eye_direction(landmarks, iw):
    left_eye = landmarks[33]
    right_eye = landmarks[263]
    eye_mid_x = (left_eye[0] + right_eye[0]) / 2
    eye_mid_x_norm = eye_mid_x / iw

    if eye_mid_x_norm < 0.4:
        return "Looking Left"
    elif eye_mid_x_norm > 0.6:
        return "Looking Right"
    else:
        return "Looking Center"

# Run YOLO face detection
results = model(frame)

for box in results[0].boxes.xyxy:
    x1, y1, x2, y2 = map(int, box)
    face = frame[y1:y2, x1:x2]

    if face.shape[0] == 0 or face.shape[1] == 0:
        continue

    face_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
    result_mesh = face_mesh.process(face_rgb)

    if result_mesh.multi_face_landmarks:
        for landmarks in result_mesh.multi_face_landmarks:
            ih, iw, _ = face.shape
            coords = [(int(p.x * iw), int(p.y * ih)) for p in landmarks.landmark]

            # Visualize key landmarks for debugging (relative to face bounding box)
            for i, landmark in enumerate(coords):
                x, y = landmark
                cv2.circle(frame, (x1 + x, y1 + y), 5, (0, 0, 255), -1)  # Map to original frame coordinates

            try:
                # Get head pose angles
                angles = get_head_pose(face, coords)
                pitch, yaw, roll = [a[0] for a in angles]
                print(f"Head Pose Angles - Pitch: {pitch}, Yaw: {yaw}, Roll: {roll}")  # Debugging angles

                # Adjust thresholds for pitch and yaw if needed
                if abs(yaw) > 20:
                    head_status = "Head turned"
                elif abs(pitch) > 20:
                    head_status = "Looking up/down"
                else:
                    head_status = "Head OK"
            except Exception as e:
                print(f"Error in head pose estimation: {e}")
                head_status = "Head Pose Error"

            # Eye gaze check
            eye_dir = get_eye_direction(coords, iw)

            # Display all status info
            print(f"Head: {head_status} | Gaze: {eye_dir}")

            # Draw bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

            # Draw head status above the face box
            cv2.putText(frame, head_status, (x1, y1 - 20), cv2.FONT_HERSHEY_SIMPLEX,
                        0.6, (0, 255, 255), 2)

            # Draw eye direction below the face box
            cv2.putText(frame, eye_dir, (x1, y2 + 20), cv2.FONT_HERSHEY_SIMPLEX,
                        0.6, (255, 255, 0), 2)

# Save the final image
cv2.imwrite("output_with_boxes.jpg", frame)

# Display result
cv2.imshow("Image Pose & Gaze Detection", frame)
cv2.waitKey(0)
cv2.destroyAllWindows()



0: 640x480 1 face, 176.7ms
Speed: 8.4ms preprocess, 176.7ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 480)
Head Pose Angles - Pitch: -179.68621664002237, Yaw: -4.327508036987097, Roll: 2.739224906538276
Head: Looking up/down | Gaze: Looking Center
