# Gaze Detection - Eye Contact Analysis

This notebook implements eye contact detection using MediaPipe Face Mesh.

## Goals
1. Set up MediaPipe Face Mesh for landmark detection
2. Calculate gaze direction from iris position
3. Define "looking at camera" threshold
4. Test on sample videos/webcam
5. Export utility functions for backend integration

In [None]:
# Install dependencies if needed
# !pip install mediapipe opencv-python numpy matplotlib

In [None]:
import os
import sys
from pathlib import Path

# Add project root to path
PROJECT_ROOT = Path(os.getcwd()).parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))

import cv2
import numpy as np
import matplotlib.pyplot as plt
import mediapipe as mp

print(f"OpenCV version: {cv2.__version__}")
print(f"MediaPipe version: {mp.__version__}")
print(f"Project root: {PROJECT_ROOT}")

## 1. Initialize MediaPipe Face Mesh

MediaPipe Face Mesh provides 468 facial landmarks including eye and iris landmarks.

In [None]:
# Initialize MediaPipe components
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

# Face Mesh with refine landmarks for iris tracking
face_mesh = mp_face_mesh.FaceMesh(
    max_num_faces=1,
    refine_landmarks=True,  # Enables iris tracking
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

print("MediaPipe Face Mesh initialized with iris tracking")

## 2. Key Landmark Indices

MediaPipe provides specific landmarks for eyes and iris.

In [None]:
# Eye landmark indices (MediaPipe Face Mesh)
# Left eye (from viewer's perspective, subject's right eye)
LEFT_EYE_INDICES = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]

# Right eye (from viewer's perspective, subject's left eye)
RIGHT_EYE_INDICES = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]

# Iris landmarks (with refine_landmarks=True)
# Left iris
LEFT_IRIS_INDICES = [474, 475, 476, 477]
LEFT_IRIS_CENTER = 473

# Right iris
RIGHT_IRIS_INDICES = [469, 470, 471, 472]
RIGHT_IRIS_CENTER = 468

# Eye corners for calculating gaze direction
LEFT_EYE_INNER_CORNER = 362
LEFT_EYE_OUTER_CORNER = 263
RIGHT_EYE_INNER_CORNER = 133
RIGHT_EYE_OUTER_CORNER = 33

print("Eye landmark indices defined")

In [None]:
def get_landmark_coords(landmarks, indices, img_width, img_height):
    """
    Get pixel coordinates for given landmark indices.
    
    Args:
        landmarks: MediaPipe landmark list
        indices: List of landmark indices
        img_width: Image width in pixels
        img_height: Image height in pixels
    
    Returns:
        numpy array of (x, y) coordinates
    """
    coords = []
    for idx in indices:
        lm = landmarks[idx]
        x = int(lm.x * img_width)
        y = int(lm.y * img_height)
        coords.append([x, y])
    return np.array(coords)


def get_single_landmark(landmarks, idx, img_width, img_height):
    """Get single landmark as (x, y) tuple."""
    lm = landmarks[idx]
    return (int(lm.x * img_width), int(lm.y * img_height))

## 3. Gaze Direction Calculation

In [None]:
def calculate_gaze_ratio(landmarks, img_width, img_height):
    """
    Calculate gaze ratio for each eye.
    
    The ratio indicates where the iris is positioned within the eye:
    - 0.0: Looking far left
    - 0.5: Looking center (at camera)
    - 1.0: Looking far right
    
    Returns:
        tuple: (left_eye_ratio, right_eye_ratio, average_ratio)
    """
    # Get iris centers
    left_iris = get_single_landmark(landmarks, LEFT_IRIS_CENTER, img_width, img_height)
    right_iris = get_single_landmark(landmarks, RIGHT_IRIS_CENTER, img_width, img_height)
    
    # Get eye corners
    left_inner = get_single_landmark(landmarks, LEFT_EYE_INNER_CORNER, img_width, img_height)
    left_outer = get_single_landmark(landmarks, LEFT_EYE_OUTER_CORNER, img_width, img_height)
    right_inner = get_single_landmark(landmarks, RIGHT_EYE_INNER_CORNER, img_width, img_height)
    right_outer = get_single_landmark(landmarks, RIGHT_EYE_OUTER_CORNER, img_width, img_height)
    
    # Calculate horizontal ratio for left eye
    left_eye_width = abs(left_outer[0] - left_inner[0])
    if left_eye_width > 0:
        left_ratio = (left_iris[0] - min(left_inner[0], left_outer[0])) / left_eye_width
    else:
        left_ratio = 0.5
    
    # Calculate horizontal ratio for right eye
    right_eye_width = abs(right_outer[0] - right_inner[0])
    if right_eye_width > 0:
        right_ratio = (right_iris[0] - min(right_inner[0], right_outer[0])) / right_eye_width
    else:
        right_ratio = 0.5
    
    # Average both eyes
    avg_ratio = (left_ratio + right_ratio) / 2
    
    return left_ratio, right_ratio, avg_ratio


def get_gaze_direction(gaze_ratio, center_threshold=0.15):
    """
    Determine gaze direction from ratio.
    
    Args:
        gaze_ratio: Value between 0-1 (0.5 is center)
        center_threshold: How far from 0.5 is still considered center
    
    Returns:
        str: 'center', 'left', or 'right'
    """
    center = 0.5
    
    if abs(gaze_ratio - center) <= center_threshold:
        return 'center'
    elif gaze_ratio < center:
        return 'left'
    else:
        return 'right'


print("Gaze calculation functions defined")

## 4. Head Pose Estimation

In [None]:
def estimate_head_pose(landmarks, img_width, img_height):
    """
    Estimate head pose (pitch, yaw, roll) using facial landmarks.
    
    Uses PnP (Perspective-n-Point) algorithm with 3D model points.
    
    Returns:
        dict: pitch, yaw, roll in degrees
    """
    # 3D model points (generic face model)
    model_points = np.array([
        [0.0, 0.0, 0.0],          # Nose tip
        [0.0, -330.0, -65.0],     # Chin
        [-225.0, 170.0, -135.0],  # Left eye corner
        [225.0, 170.0, -135.0],   # Right eye corner
        [-150.0, -150.0, -125.0], # Left mouth corner
        [150.0, -150.0, -125.0]   # Right mouth corner
    ], dtype=np.float64)
    
    # 2D image points
    # Key landmark indices: nose tip (1), chin (152), left eye outer (33), 
    # right eye outer (263), left mouth corner (61), right mouth corner (291)
    landmark_indices = [1, 152, 33, 263, 61, 291]
    image_points = np.array([
        get_single_landmark(landmarks, idx, img_width, img_height)
        for idx in landmark_indices
    ], dtype=np.float64)
    
    # Camera matrix (approximate)
    focal_length = img_width
    center = (img_width / 2, img_height / 2)
    camera_matrix = np.array([
        [focal_length, 0, center[0]],
        [0, focal_length, center[1]],
        [0, 0, 1]
    ], dtype=np.float64)
    
    # Assume no lens distortion
    dist_coeffs = np.zeros((4, 1))
    
    # Solve PnP
    success, rotation_vector, translation_vector = cv2.solvePnP(
        model_points, image_points, camera_matrix, dist_coeffs,
        flags=cv2.SOLVEPNP_ITERATIVE
    )
    
    if not success:
        return {'pitch': 0, 'yaw': 0, 'roll': 0}
    
    # Convert rotation vector to Euler angles
    rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
    
    # Extract Euler angles
    sy = np.sqrt(rotation_matrix[0, 0]**2 + rotation_matrix[1, 0]**2)
    
    if sy > 1e-6:
        pitch = np.arctan2(rotation_matrix[2, 1], rotation_matrix[2, 2])
        yaw = np.arctan2(-rotation_matrix[2, 0], sy)
        roll = np.arctan2(rotation_matrix[1, 0], rotation_matrix[0, 0])
    else:
        pitch = np.arctan2(-rotation_matrix[1, 2], rotation_matrix[1, 1])
        yaw = np.arctan2(-rotation_matrix[2, 0], sy)
        roll = 0
    
    # Convert to degrees
    return {
        'pitch': np.degrees(pitch),
        'yaw': np.degrees(yaw),
        'roll': np.degrees(roll)
    }

print("Head pose estimation function defined")

## 5. Eye Contact Score Calculation

In [None]:
def calculate_eye_contact_score(
    gaze_ratio: float,
    head_yaw: float,
    head_pitch: float,
    gaze_center_threshold: float = 0.15,
    head_yaw_threshold: float = 20.0,
    head_pitch_threshold: float = 15.0
) -> dict:
    """
    Calculate eye contact score (0-100).
    
    Components:
    - Gaze direction (60%): Is the iris centered?
    - Head yaw (25%): Is the head facing forward?
    - Head pitch (15%): Is the head level?
    
    Returns:
        dict: Score breakdown
    """
    # Gaze score (higher when closer to 0.5)
    gaze_deviation = abs(gaze_ratio - 0.5)
    gaze_score = max(0, 100 - (gaze_deviation / 0.5) * 100)
    
    # Head yaw score (higher when facing forward)
    yaw_deviation = abs(head_yaw)
    if yaw_deviation <= head_yaw_threshold:
        yaw_score = 100 - (yaw_deviation / head_yaw_threshold) * 30
    else:
        yaw_score = max(0, 70 - (yaw_deviation - head_yaw_threshold) * 2)
    
    # Head pitch score (higher when level)
    pitch_deviation = abs(head_pitch)
    if pitch_deviation <= head_pitch_threshold:
        pitch_score = 100 - (pitch_deviation / head_pitch_threshold) * 30
    else:
        pitch_score = max(0, 70 - (pitch_deviation - head_pitch_threshold) * 2)
    
    # Determine if looking at camera
    is_looking_at_camera = (
        gaze_deviation <= gaze_center_threshold and
        yaw_deviation <= head_yaw_threshold and
        pitch_deviation <= head_pitch_threshold
    )
    
    # Weighted average
    overall_score = gaze_score * 0.6 + yaw_score * 0.25 + pitch_score * 0.15
    
    return {
        'overall_score': round(overall_score, 1),
        'gaze_ratio': round(gaze_ratio, 3),
        'gaze_score': round(gaze_score, 1),
        'gaze_direction': get_gaze_direction(gaze_ratio, gaze_center_threshold),
        'head_yaw': round(head_yaw, 1),
        'yaw_score': round(yaw_score, 1),
        'head_pitch': round(head_pitch, 1),
        'pitch_score': round(pitch_score, 1),
        'is_looking_at_camera': is_looking_at_camera
    }

# Test with example values
test_cases = [
    (0.5, 0, 0, "Looking directly at camera"),
    (0.3, 15, 5, "Slightly looking left"),
    (0.7, -25, 10, "Looking right with head turned"),
    (0.5, 0, 30, "Looking at camera but head tilted down"),
]

print("Eye Contact Score Tests:")
print("-" * 60)
for gaze, yaw, pitch, desc in test_cases:
    result = calculate_eye_contact_score(gaze, yaw, pitch)
    print(f"\n{desc}")
    print(f"  Overall: {result['overall_score']}/100")
    print(f"  Direction: {result['gaze_direction']}")
    print(f"  Looking at camera: {result['is_looking_at_camera']}")

## 6. Process Single Frame

In [None]:
def analyze_frame_gaze(frame):
    """
    Analyze a single frame for eye contact.
    
    Args:
        frame: BGR image (numpy array)
    
    Returns:
        dict: Gaze analysis results or None if no face detected
    """
    # Convert BGR to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    h, w = frame.shape[:2]
    
    # Process with MediaPipe
    results = face_mesh.process(rgb_frame)
    
    if not results.multi_face_landmarks:
        return {
            'face_detected': False,
            'overall_score': 0,
            'gaze_direction': 'unknown',
            'is_looking_at_camera': False
        }
    
    # Get first face landmarks
    landmarks = results.multi_face_landmarks[0].landmark
    
    # Calculate gaze ratio
    left_ratio, right_ratio, avg_ratio = calculate_gaze_ratio(landmarks, w, h)
    
    # Estimate head pose
    head_pose = estimate_head_pose(landmarks, w, h)
    
    # Calculate score
    score_result = calculate_eye_contact_score(
        avg_ratio,
        head_pose['yaw'],
        head_pose['pitch']
    )
    
    # Add face detected flag
    score_result['face_detected'] = True
    score_result['head_roll'] = round(head_pose['roll'], 1)
    
    return score_result


def draw_gaze_overlay(frame, results):
    """
    Draw gaze information overlay on frame.
    """
    overlay = frame.copy()
    
    if not results.get('face_detected', False):
        cv2.putText(overlay, "No face detected", (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        return overlay
    
    # Color based on score
    score = results['overall_score']
    if score >= 80:
        color = (0, 255, 0)  # Green
    elif score >= 60:
        color = (0, 255, 255)  # Yellow
    else:
        color = (0, 0, 255)  # Red
    
    # Draw info
    y = 30
    texts = [
        f"Eye Contact: {score:.0f}%",
        f"Direction: {results['gaze_direction']}",
        f"Yaw: {results['head_yaw']:.1f} deg",
        f"At Camera: {'Yes' if results['is_looking_at_camera'] else 'No'}"
    ]
    
    for text in texts:
        cv2.putText(overlay, text, (10, y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
        y += 25
    
    return overlay

print("Frame analysis functions defined")

## 7. Test with Sample Image

In [None]:
# Create a test frame (solid color with simulated face or use webcam)
# For actual testing, you can load a real image or capture from webcam

def test_with_webcam(duration_seconds=5):
    """
    Test gaze detection with webcam for a few seconds.
    Press 'q' to quit early.
    """
    cap = cv2.VideoCapture(0)
    
    if not cap.isOpened():
        print("Cannot open webcam")
        return
    
    print(f"Testing for {duration_seconds} seconds... Press 'q' to quit.")
    
    frame_count = 0
    scores = []
    
    import time
    start_time = time.time()
    
    while time.time() - start_time < duration_seconds:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Analyze frame
        results = analyze_frame_gaze(frame)
        
        # Draw overlay
        display_frame = draw_gaze_overlay(frame, results)
        
        # Show frame
        cv2.imshow('Gaze Detection Test', display_frame)
        
        if results['face_detected']:
            scores.append(results['overall_score'])
        
        frame_count += 1
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()
    
    print(f"\nProcessed {frame_count} frames")
    if scores:
        print(f"Average eye contact score: {np.mean(scores):.1f}")
        print(f"Min: {np.min(scores):.1f}, Max: {np.max(scores):.1f}")

# Uncomment to test with webcam:
# test_with_webcam(10)

## 8. Export Utility Functions

In [None]:
# Create utils module for backend integration
utils_code = '''
"""
Gaze Analysis Utilities

Provides functions for analyzing eye contact including:
- Gaze direction detection
- Head pose estimation
- Eye contact scoring

Generated from notebook: 02_gaze_detection.ipynb
"""

import cv2
import numpy as np
from typing import Dict, Tuple, Optional

# MediaPipe landmark indices
LEFT_IRIS_CENTER = 473
RIGHT_IRIS_CENTER = 468
LEFT_EYE_INNER_CORNER = 362
LEFT_EYE_OUTER_CORNER = 263
RIGHT_EYE_INNER_CORNER = 133
RIGHT_EYE_OUTER_CORNER = 33


def get_single_landmark(landmarks, idx: int, img_width: int, img_height: int) -> Tuple[int, int]:
    """Get single landmark as (x, y) tuple."""
    lm = landmarks[idx]
    return (int(lm.x * img_width), int(lm.y * img_height))


def calculate_gaze_ratio(landmarks, img_width: int, img_height: int) -> Tuple[float, float, float]:
    """Calculate gaze ratio for each eye."""
    left_iris = get_single_landmark(landmarks, LEFT_IRIS_CENTER, img_width, img_height)
    right_iris = get_single_landmark(landmarks, RIGHT_IRIS_CENTER, img_width, img_height)
    
    left_inner = get_single_landmark(landmarks, LEFT_EYE_INNER_CORNER, img_width, img_height)
    left_outer = get_single_landmark(landmarks, LEFT_EYE_OUTER_CORNER, img_width, img_height)
    right_inner = get_single_landmark(landmarks, RIGHT_EYE_INNER_CORNER, img_width, img_height)
    right_outer = get_single_landmark(landmarks, RIGHT_EYE_OUTER_CORNER, img_width, img_height)
    
    left_eye_width = abs(left_outer[0] - left_inner[0])
    left_ratio = (left_iris[0] - min(left_inner[0], left_outer[0])) / left_eye_width if left_eye_width > 0 else 0.5
    
    right_eye_width = abs(right_outer[0] - right_inner[0])
    right_ratio = (right_iris[0] - min(right_inner[0], right_outer[0])) / right_eye_width if right_eye_width > 0 else 0.5
    
    return left_ratio, right_ratio, (left_ratio + right_ratio) / 2


def get_gaze_direction(gaze_ratio: float, center_threshold: float = 0.15) -> str:
    """Determine gaze direction from ratio."""
    if abs(gaze_ratio - 0.5) <= center_threshold:
        return "center"
    return "left" if gaze_ratio < 0.5 else "right"


def calculate_eye_contact_score(
    gaze_ratio: float,
    head_yaw: float,
    head_pitch: float,
    gaze_center_threshold: float = 0.15,
    head_yaw_threshold: float = 20.0,
    head_pitch_threshold: float = 15.0
) -> Dict:
    """Calculate eye contact score (0-100)."""
    gaze_deviation = abs(gaze_ratio - 0.5)
    gaze_score = max(0, 100 - (gaze_deviation / 0.5) * 100)
    
    yaw_deviation = abs(head_yaw)
    yaw_score = 100 - (yaw_deviation / head_yaw_threshold) * 30 if yaw_deviation <= head_yaw_threshold else max(0, 70 - (yaw_deviation - head_yaw_threshold) * 2)
    
    pitch_deviation = abs(head_pitch)
    pitch_score = 100 - (pitch_deviation / head_pitch_threshold) * 30 if pitch_deviation <= head_pitch_threshold else max(0, 70 - (pitch_deviation - head_pitch_threshold) * 2)
    
    is_looking_at_camera = (
        gaze_deviation <= gaze_center_threshold and
        yaw_deviation <= head_yaw_threshold and
        pitch_deviation <= head_pitch_threshold
    )
    
    overall_score = gaze_score * 0.6 + yaw_score * 0.25 + pitch_score * 0.15
    
    return {
        "overall_score": round(overall_score, 1),
        "gaze_ratio": round(gaze_ratio, 3),
        "gaze_score": round(gaze_score, 1),
        "gaze_direction": get_gaze_direction(gaze_ratio, gaze_center_threshold),
        "head_yaw": round(head_yaw, 1),
        "head_pitch": round(head_pitch, 1),
        "is_looking_at_camera": is_looking_at_camera
    }
'''

# Save to training directory
utils_path = PROJECT_ROOT / 'ml' / 'softskills' / 'training' / 'gaze_utils.py'
utils_path.parent.mkdir(parents=True, exist_ok=True)

with open(utils_path, 'w') as f:
    f.write(utils_code)

print(f"Exported utilities to: {utils_path}")

## 9. Summary

### Key Findings
1. MediaPipe Face Mesh provides 468 landmarks including iris tracking
2. Gaze ratio of 0.5 indicates looking at center/camera
3. Head pose (yaw, pitch) affects perceived eye contact
4. Combined score weights: Gaze 60%, Head Yaw 25%, Pitch 15%

### Thresholds for "Looking at Camera"
- Gaze deviation: ≤ 0.15 from center (0.5)
- Head yaw: ≤ 20 degrees
- Head pitch: ≤ 15 degrees

### Next Steps
1. Add blink detection for engagement analysis
2. Implement temporal smoothing for stability
3. Integrate with backend `gaze_analyzer.py` service