In [1]:
import cv2
import numpy as np
# import torch
from ultralytics import YOLO
import mediapipe as mp
from math import atan2, degrees



In [18]:
import cv2
import numpy as np
import torch
import mediapipe as mp
from math import atan2, degrees
from ultralytics import YOLO

# Initialize YOLOv8 model for face detection
def load_yolo_model():
    # Load the YOLOv8 model (assuming you have a custom trained model `yolov8n-face-lindevs.pt`)
    model = YOLO("yolov8n-face-lindevs.pt")  # Make sure the path to the model is correct
    return model

# Function to detect faces using YOLOv8
def detect_faces_yolo(image, model):
    results = model(image)  # Run inference on the image
    faces = []
    
    # YOLOv8 results contain a list of detections
    for result in results:
        # Each result contains information about the detected object (bounding box, confidence, and class)
        boxes = result.boxes  # Access the bounding boxes
        
        for box in boxes:
            xmin, ymin, xmax, ymax = box.xyxy[0].tolist()  # Get bounding box coordinates
            conf = box.conf[0].item()  # Get confidence score
            cls = box.cls[0].item()  # Get class ID
            
            if int(cls) == 0:  # Class 0 is 'person' (for face detection)
                faces.append([int(xmin), int(ymin), int(xmax), int(ymax)])
    
    return faces

# Function to estimate head pose using Mediapipe and draw landmarks
def estimate_head_pose(image, face_landmarks):
    # Mediapipe face mesh model
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh(max_num_faces=1)

    # Convert image to RGB
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb_image)
    
    if results.multi_face_landmarks:
        # Assuming the first face is the one we are working with
        landmarks = results.multi_face_landmarks[0]
        
        # Loop over landmarks and draw them
        for idx, landmark in enumerate(landmarks.landmark):
            # Convert landmark from normalized coordinates to pixel coordinates
            x, y = int(landmark.x * image.shape[1]), int(landmark.y * image.shape[0])
            cv2.circle(image, (x, y), 1, (0, 0, 255), -1)  # Red color for landmarks
        
        # Get the nose tip and eyes positions (simplified for head pose)
        nose = np.array([landmarks.landmark[1].x, landmarks.landmark[1].y])
        left_eye = np.array([landmarks.landmark[33].x, landmarks.landmark[33].y])
        right_eye = np.array([landmarks.landmark[133].x, landmarks.landmark[133].y])

        # Calculate the head pose by analyzing eye and nose positions
        horizontal_angle = atan2(nose[1] - (left_eye[1] + right_eye[1]) / 2, nose[0] - (left_eye[0] + right_eye[0]) / 2)
        horizontal_angle = degrees(horizontal_angle)
        
        print(f"Horizontal Angle: {horizontal_angle:.2f} degrees")
        # Based on angle, decide direction
        if horizontal_angle > 30 and horizontal_angle < 40:
            head_pose = "Right"
        elif horizontal_angle < -10:
            head_pose = "Left"
        else:
            head_pose = "Forward"
            
        return head_pose
    return "No face detected"

# Main function to process image
def main(image_path):
    # Load the YOLOv8 model
    model = load_yolo_model()

    # Read image
    image = cv2.imread(image_path)
    
    # Detect faces using YOLO
    faces = detect_faces_yolo(image, model)
    
    if not faces:
        print("No faces detected.")
        return
    
    for face in faces:
        x1, y1, x2, y2 = map(int, face)  # Get face bounding box coordinates
        face_image = image[y1:y2, x1:x2]  # Crop face area from the image
        
        # Estimate head pose using Mediapipe
        head_pose = estimate_head_pose(face_image, None)
        
        # Draw bounding box and display the head pose
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(image, f"Head Pose: {head_pose}", (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

    # save the image
    cv2.imwrite("output.jpg", image)

    # Display image with annotations
    cv2.imshow("Detected Faces and Head Pose", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == "__main__":
    image_path = "./images/1.jpg"  # Replace with your image file path
    main(image_path)



0: 640x480 1 face, 99.8ms
Speed: 4.1ms preprocess, 99.8ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 480)
Horizontal Angle: 42.21 degrees
