# Gesture Recognition - Hand Movement Analysis

This notebook implements hand gesture analysis using MediaPipe Hands.

## Goals
1. Set up MediaPipe Hands for landmark detection
2. Analyze the HaGRID gesture dataset
3. Calculate movement frequency and stability
4. Create gesture scoring function
5. Export utility functions for backend integration

In [None]:
# Install dependencies if needed
# !pip install mediapipe opencv-python numpy matplotlib torch

In [None]:
import os
import sys
from pathlib import Path

# Add project root to path
PROJECT_ROOT = Path(os.getcwd()).parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))

import cv2
import numpy as np
import matplotlib.pyplot as plt
import mediapipe as mp
from collections import deque

print(f"OpenCV version: {cv2.__version__}")
print(f"MediaPipe version: {mp.__version__}")
print(f"Project root: {PROJECT_ROOT}")

## 1. Initialize MediaPipe Hands

In [None]:
# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

# Hands detector
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

print("MediaPipe Hands initialized")
print(f"Max hands: 2")
print(f"21 landmarks per hand (wrist + 4 per finger)")

## 2. Explore HaGRID Dataset Structure

In [None]:
# Check if HaGRID dataset is available
HAGRID_PATH = PROJECT_ROOT / "ml" / "softskills" / "datasets" / "gestures" / "hagrid"

if HAGRID_PATH.exists():
    print(f"HaGRID dataset found at: {HAGRID_PATH}")
    print("\nDirectory contents:")
    for item in sorted(HAGRID_PATH.iterdir()):
        if item.is_dir():
            print(f"  üìÅ {item.name}/")
        else:
            print(f"  üìÑ {item.name}")
else:
    print("HaGRID dataset not found. Using simulated data.")

In [None]:
# HaGRID gesture classes
HAGRID_GESTURES = [
    'call',          # Phone gesture
    'dislike',       # Thumbs down
    'fist',          # Closed fist
    'four',          # Four fingers
    'like',          # Thumbs up
    'mute',          # Finger on lips (silence)
    'ok',            # OK sign
    'one',           # Index finger up
    'palm',          # Open palm
    'peace',         # Peace sign
    'peace_inverted',
    'rock',          # Rock and roll
    'stop',          # Stop gesture
    'stop_inverted',
    'three',         # Three fingers
    'three2',        # Alternative three
    'two_up',        # Two fingers up
    'two_up_inverted',
    'no_gesture'     # No specific gesture
]

print(f"HaGRID contains {len(HAGRID_GESTURES)} gesture classes")

## 3. Hand Landmark Processing

In [None]:
def get_hand_center(landmarks, img_width, img_height):
    """
    Calculate center of hand from landmarks.
    
    Args:
        landmarks: MediaPipe hand landmarks
        img_width: Image width
        img_height: Image height
    
    Returns:
        (x, y) center coordinates
    """
    x_coords = [lm.x * img_width for lm in landmarks.landmark]
    y_coords = [lm.y * img_height for lm in landmarks.landmark]
    
    return (
        sum(x_coords) / len(x_coords),
        sum(y_coords) / len(y_coords)
    )


def get_hand_bbox(landmarks, img_width, img_height):
    """
    Get bounding box of hand.
    
    Returns:
        (x_min, y_min, x_max, y_max)
    """
    x_coords = [lm.x * img_width for lm in landmarks.landmark]
    y_coords = [lm.y * img_height for lm in landmarks.landmark]
    
    return (
        min(x_coords),
        min(y_coords),
        max(x_coords),
        max(y_coords)
    )


def get_hand_spread(landmarks):
    """
    Calculate hand spread (openness).
    Higher value = more open hand.
    
    Returns:
        Float representing relative spread
    """
    # Fingertip indices: thumb=4, index=8, middle=12, ring=16, pinky=20
    fingertips = [4, 8, 12, 16, 20]
    wrist = landmarks.landmark[0]
    
    # Calculate average distance from wrist to fingertips
    distances = []
    for tip_idx in fingertips:
        tip = landmarks.landmark[tip_idx]
        dist = np.sqrt((tip.x - wrist.x)**2 + (tip.y - wrist.y)**2 + (tip.z - wrist.z)**2)
        distances.append(dist)
    
    return sum(distances) / len(distances)

print("Hand processing functions defined")

## 4. Movement Tracking

In [None]:
class HandMovementTracker:
    """
    Tracks hand movements over time to calculate:
    - Movement frequency
    - Movement amplitude
    - Stability score
    """
    
    def __init__(self, history_size=30, fps=30):
        self.history_size = history_size
        self.fps = fps
        self.left_hand_history = deque(maxlen=history_size)
        self.right_hand_history = deque(maxlen=history_size)
    
    def update(self, left_hand_center=None, right_hand_center=None):
        """
        Update position history.
        
        Args:
            left_hand_center: (x, y) or None if not detected
            right_hand_center: (x, y) or None if not detected
        """
        self.left_hand_history.append(left_hand_center)
        self.right_hand_history.append(right_hand_center)
    
    def _calculate_movement(self, history):
        """
        Calculate movement metrics from position history.
        """
        # Filter out None values
        valid_positions = [p for p in history if p is not None]
        
        if len(valid_positions) < 2:
            return {
                'visible': False,
                'movement_distance': 0,
                'velocity': 0,
                'stability': 100
            }
        
        # Calculate frame-to-frame distances
        distances = []
        for i in range(1, len(valid_positions)):
            prev = valid_positions[i-1]
            curr = valid_positions[i]
            dist = np.sqrt((curr[0] - prev[0])**2 + (curr[1] - prev[1])**2)
            distances.append(dist)
        
        total_distance = sum(distances)
        avg_velocity = np.mean(distances) * self.fps  # pixels per second
        
        # Stability: lower movement = higher stability
        # Normalized to 0-100 scale
        stability = max(0, 100 - avg_velocity * 0.5)
        
        return {
            'visible': True,
            'visibility_ratio': len(valid_positions) / len(history),
            'movement_distance': total_distance,
            'velocity': avg_velocity,
            'stability': stability
        }
    
    def get_metrics(self):
        """
        Get movement metrics for both hands.
        """
        left = self._calculate_movement(self.left_hand_history)
        right = self._calculate_movement(self.right_hand_history)
        
        # Combine metrics
        hands_visible = left['visible'] or right['visible']
        num_hands = int(left['visible']) + int(right['visible'])
        
        if not hands_visible:
            avg_stability = 0
        else:
            stabilities = []
            if left['visible']:
                stabilities.append(left['stability'])
            if right['visible']:
                stabilities.append(right['stability'])
            avg_stability = np.mean(stabilities)
        
        return {
            'hands_visible': hands_visible,
            'num_hands': num_hands,
            'left_hand': left,
            'right_hand': right,
            'avg_stability': avg_stability
        }

# Test
tracker = HandMovementTracker(history_size=10)

# Simulate some movement
for i in range(10):
    tracker.update(
        left_hand_center=(100 + i*5, 200 + i*2),
        right_hand_center=(300 + i*3, 200 - i*1)
    )

metrics = tracker.get_metrics()
print("Movement Tracker Test:")
print(f"  Hands visible: {metrics['hands_visible']}")
print(f"  Num hands: {metrics['num_hands']}")
print(f"  Avg stability: {metrics['avg_stability']:.1f}")

## 5. Gesture Scoring for Interview Context

In [None]:
def calculate_gesture_score(
    hands_visible: bool,
    num_hands: int,
    stability: float,
    movement_velocity: float,
    visibility_ratio: float = 1.0,
    optimal_velocity_min: float = 20,   # Some movement is good
    optimal_velocity_max: float = 100,  # But not too much
    stability_weight: float = 0.4,
    movement_weight: float = 0.3,
    visibility_weight: float = 0.3
) -> dict:
    """
    Calculate gesture score for interview context.
    
    Good gestures in interviews:
    - Visible hands (builds trust)
    - Moderate, natural movement (engages audience)
    - Stable, controlled gestures (shows confidence)
    - Not too static (robotic) or too fidgety (nervous)
    
    Returns:
        dict: Score breakdown
    """
    # Visibility score
    if hands_visible:
        visibility_score = min(100, visibility_ratio * 100)
    else:
        visibility_score = 30  # Penalty for hidden hands
    
    # Movement score (optimal range)
    if optimal_velocity_min <= movement_velocity <= optimal_velocity_max:
        movement_score = 100
    elif movement_velocity < optimal_velocity_min:
        # Too static
        movement_score = 70 + (movement_velocity / optimal_velocity_min) * 30
    else:
        # Too fidgety
        excess = movement_velocity - optimal_velocity_max
        movement_score = max(30, 100 - excess * 0.5)
    
    # Stability score (already 0-100)
    stability_score = stability
    
    # Weighted average
    overall_score = (
        stability_score * stability_weight +
        movement_score * movement_weight +
        visibility_score * visibility_weight
    )
    
    # Determine assessment
    if overall_score >= 80:
        assessment = 'excellent'
    elif overall_score >= 60:
        assessment = 'good'
    elif overall_score >= 40:
        assessment = 'needs_improvement'
    else:
        assessment = 'poor'
    
    return {
        'overall_score': round(overall_score, 1),
        'visibility_score': round(visibility_score, 1),
        'movement_score': round(movement_score, 1),
        'stability_score': round(stability_score, 1),
        'hands_visible': hands_visible,
        'num_hands': num_hands,
        'assessment': assessment
    }

# Test cases
test_cases = [
    (True, 2, 85, 50, 1.0, "Natural gesturing"),
    (True, 1, 95, 10, 0.8, "Minimal movement (too static)"),
    (True, 2, 40, 150, 1.0, "Fidgety/nervous"),
    (False, 0, 0, 0, 0.0, "Hidden hands"),
]

print("Gesture Score Tests:")
print("-" * 60)
for visible, num, stability, velocity, vis_ratio, desc in test_cases:
    result = calculate_gesture_score(visible, num, stability, velocity, vis_ratio)
    print(f"\n{desc}")
    print(f"  Overall: {result['overall_score']}/100 ({result['assessment']})")
    print(f"  Visibility: {result['visibility_score']}, Movement: {result['movement_score']}, Stability: {result['stability_score']}")

## 6. Frame Analysis Function

In [None]:
def analyze_frame_hands(frame, tracker=None):
    """
    Analyze a single frame for hand gestures.
    
    Args:
        frame: BGR image
        tracker: Optional HandMovementTracker for temporal analysis
    
    Returns:
        dict: Hand analysis results
    """
    # Convert BGR to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    h, w = frame.shape[:2]
    
    # Process with MediaPipe
    results = hands.process(rgb_frame)
    
    left_center = None
    right_center = None
    hand_spreads = []
    
    if results.multi_hand_landmarks:
        for hand_landmarks, handedness in zip(
            results.multi_hand_landmarks,
            results.multi_handedness
        ):
            # Get hand label (Left/Right)
            label = handedness.classification[0].label
            center = get_hand_center(hand_landmarks, w, h)
            spread = get_hand_spread(hand_landmarks)
            hand_spreads.append(spread)
            
            if label == 'Left':
                left_center = center
            else:
                right_center = center
    
    # Update tracker if provided
    if tracker is not None:
        tracker.update(left_center, right_center)
        metrics = tracker.get_metrics()
    else:
        # Single frame analysis (no temporal info)
        metrics = {
            'hands_visible': left_center is not None or right_center is not None,
            'num_hands': sum([left_center is not None, right_center is not None]),
            'avg_stability': 100 if (left_center or right_center) else 0
        }
    
    # Calculate gesture score
    velocity = 0
    if 'left_hand' in metrics and metrics['left_hand'].get('visible'):
        velocity = max(velocity, metrics['left_hand'].get('velocity', 0))
    if 'right_hand' in metrics and metrics['right_hand'].get('visible'):
        velocity = max(velocity, metrics['right_hand'].get('velocity', 0))
    
    score_result = calculate_gesture_score(
        metrics['hands_visible'],
        metrics['num_hands'],
        metrics['avg_stability'],
        velocity
    )
    
    # Add hand positions
    score_result['left_hand_center'] = left_center
    score_result['right_hand_center'] = right_center
    score_result['avg_spread'] = np.mean(hand_spreads) if hand_spreads else 0
    
    return score_result

print("Frame analysis function defined")

## 7. Export Utility Functions

In [None]:
# Create utils module for backend integration
utils_code = '''
"""
Gesture Analysis Utilities

Provides functions for analyzing hand gestures including:
- Hand detection and tracking
- Movement frequency/stability calculation
- Gesture scoring for interviews

Generated from notebook: 03_gesture_recognition.ipynb
"""

import numpy as np
from collections import deque
from typing import Dict, Tuple, Optional, List


def get_hand_center(landmarks, img_width: int, img_height: int) -> Tuple[float, float]:
    """Calculate center of hand from landmarks."""
    x_coords = [lm.x * img_width for lm in landmarks.landmark]
    y_coords = [lm.y * img_height for lm in landmarks.landmark]
    return (sum(x_coords) / len(x_coords), sum(y_coords) / len(y_coords))


def get_hand_spread(landmarks) -> float:
    """Calculate hand spread (openness)."""
    fingertips = [4, 8, 12, 16, 20]
    wrist = landmarks.landmark[0]
    distances = []
    for tip_idx in fingertips:
        tip = landmarks.landmark[tip_idx]
        dist = np.sqrt((tip.x - wrist.x)**2 + (tip.y - wrist.y)**2 + (tip.z - wrist.z)**2)
        distances.append(dist)
    return sum(distances) / len(distances)


class HandMovementTracker:
    """Tracks hand movements over time."""
    
    def __init__(self, history_size: int = 30, fps: int = 30):
        self.history_size = history_size
        self.fps = fps
        self.left_hand_history = deque(maxlen=history_size)
        self.right_hand_history = deque(maxlen=history_size)
    
    def update(self, left_hand_center=None, right_hand_center=None):
        self.left_hand_history.append(left_hand_center)
        self.right_hand_history.append(right_hand_center)
    
    def _calculate_movement(self, history) -> Dict:
        valid_positions = [p for p in history if p is not None]
        if len(valid_positions) < 2:
            return {"visible": False, "movement_distance": 0, "velocity": 0, "stability": 100}
        
        distances = []
        for i in range(1, len(valid_positions)):
            prev, curr = valid_positions[i-1], valid_positions[i]
            dist = np.sqrt((curr[0] - prev[0])**2 + (curr[1] - prev[1])**2)
            distances.append(dist)
        
        avg_velocity = np.mean(distances) * self.fps
        stability = max(0, 100 - avg_velocity * 0.5)
        
        return {
            "visible": True,
            "visibility_ratio": len(valid_positions) / len(history),
            "movement_distance": sum(distances),
            "velocity": avg_velocity,
            "stability": stability
        }
    
    def get_metrics(self) -> Dict:
        left = self._calculate_movement(self.left_hand_history)
        right = self._calculate_movement(self.right_hand_history)
        
        hands_visible = left["visible"] or right["visible"]
        stabilities = [h["stability"] for h in [left, right] if h["visible"]]
        
        return {
            "hands_visible": hands_visible,
            "num_hands": int(left["visible"]) + int(right["visible"]),
            "left_hand": left,
            "right_hand": right,
            "avg_stability": np.mean(stabilities) if stabilities else 0
        }


def calculate_gesture_score(
    hands_visible: bool,
    num_hands: int,
    stability: float,
    movement_velocity: float,
    visibility_ratio: float = 1.0
) -> Dict:
    """Calculate gesture score for interview context."""
    visibility_score = min(100, visibility_ratio * 100) if hands_visible else 30
    
    if 20 <= movement_velocity <= 100:
        movement_score = 100
    elif movement_velocity < 20:
        movement_score = 70 + (movement_velocity / 20) * 30
    else:
        movement_score = max(30, 100 - (movement_velocity - 100) * 0.5)
    
    overall_score = stability * 0.4 + movement_score * 0.3 + visibility_score * 0.3
    
    return {
        "overall_score": round(overall_score, 1),
        "visibility_score": round(visibility_score, 1),
        "movement_score": round(movement_score, 1),
        "stability_score": round(stability, 1),
        "hands_visible": hands_visible,
        "num_hands": num_hands
    }
'''

# Save to training directory
utils_path = PROJECT_ROOT / 'ml' / 'softskills' / 'training' / 'gesture_utils.py'
utils_path.parent.mkdir(parents=True, exist_ok=True)

with open(utils_path, 'w') as f:
    f.write(utils_code)

print(f"Exported utilities to: {utils_path}")

## 8. Summary

### Key Findings
1. MediaPipe Hands provides 21 landmarks per hand
2. Hand visibility is important for trust in interviews
3. Optimal gesturing: moderate movement, not too static or fidgety
4. Stability indicates confidence and control

### Scoring Criteria
- **Visibility (30%)**: Are hands visible? Builds trust.
- **Movement (30%)**: Natural, moderate gesturing (20-100 px/s)
- **Stability (40%)**: Controlled, not shaky or nervous

### Next Steps
1. Classify specific gesture types if needed
2. Add self-touch detection (face touching, etc.)
3. Integrate with backend `gesture_analyzer.py` service