In [2]:

import cv2
import mediapipe as mp
import numpy as np
import time
import pyttsx3
import platform

# --- 1. Initialization ---

# Initialize Text-to-Speech Engine
try:
    engine = pyttsx3.init()
except Exception as e:
    print(f"Could not initialize TTS engine: {e}")
    engine = None

def speak(text):
    """Cross-platform text-to-speech function."""
    if engine:
        try:
            # Stop any ongoing speech
            engine.stop()
            engine.say(text)
            engine.runAndWait()
        except Exception as e:
            print(f"TTS Error: {e}")
    # Fallback for macOS if pyttsx3 fails
    elif platform.system() == 'Darwin':
        try:
            import os
            os.system(f"say '{text}'")
        except Exception as e:
            print(f"macOS 'say' command failed: {e}")
    else:
        print(f"Speak: {text} (TTS not available)")


# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.5
)
mp_drawing = mp.solutions.drawing_utils

# --- 2. Gesture Recognition Logic ---

# Gesture Definitions
GESTURES = {
    'VICTORY': 'Victory',
    'THUMBS_UP': 'Thumbs Up',
    'ONE': 'One',
    'TWO': 'Two',
    'THREE': 'Three',
    'FOUR': 'Four',
    'FIVE': 'Five / Open Hand',
    'FIST': 'Fist / Closed Hand',
}

def recognize_gesture(landmarks):
    """Recognizes a gesture from hand landmarks."""
    if not landmarks:
        return None

    # Get coordinates for all landmarks
    coords = np.array([(lm.x, lm.y, lm.z) for lm in landmarks.landmark])

    # Finger landmark indices
    finger_tips = [4, 8, 12, 16, 20]
    finger_pips = [3, 6, 10, 14, 18] # PIP joints

    # --- Thumbs Up Logic ---
    thumb_tip = coords[4]
    thumb_mcp = coords[2]
    index_pip = coords[6]
    middle_pip = coords[10]
    
    # Condition: Thumb tip is above its base and other fingertips are below their PIP joints.
    is_thumb_up = thumb_tip[1] < thumb_mcp[1]
    are_fingers_down = (
        coords[8][1] > index_pip[1] and
        coords[12][1] > middle_pip[1] and
        coords[16][1] > coords[14][1] and
        coords[20][1] > coords[18][1]
    )
    if is_thumb_up and are_fingers_down:
        return GESTURES['THUMBS_UP']

    # --- Finger Counting Logic ---
    extended_fingers = 0
    for i in range(5):
        # A finger is extended if its tip is above its PIP joint.
        # For thumb, check if tip is to the left/right of PIP (depending on hand)
        if i == 0: # Thumb
            # This logic checks horizontal extension for a flipped image
            if coords[finger_tips[i]][0] < coords[finger_pips[i]][0]:
                extended_fingers += 1
        else:
            if coords[finger_tips[i]][1] < coords[finger_pips[i]][1]:
                extended_fingers += 1

    # --- Map finger counts to gestures ---
    if extended_fingers == 5: return GESTURES['FIVE']
    if extended_fingers == 4: return GESTURES['FOUR']
    if extended_fingers == 3: return GESTURES['THREE']
    if extended_fingers == 2:
        # Check for Victory sign (index and middle finger up)
        index_up = coords[8][1] < coords[6][1]
        middle_up = coords[12][1] < coords[10][1]
        ring_down = coords[16][1] > coords[14][1]
        if index_up and middle_up and ring_down:
            return GESTURES['VICTORY']
        return GESTURES['TWO']
    if extended_fingers == 1: return GESTURES['ONE']
    if extended_fingers == 0: return GESTURES['FIST']

    return None


# --- 3. Main Application Loop ---

# Stability and Speech Timing variables
REQUIRED_STABILITY = 10
MIN_SPEAK_DELAY = 2.0  # seconds

stable_gesture = None
stable_count = 0
last_spoken_gesture = None
last_spoken_time = 0

# Start video capture
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open camera.")
else:
    print("Camera started. Press 'ESC' to quit.")
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Ignoring empty camera frame.")
            continue

        # Flip the frame horizontally for a later selfie-view display
        frame = cv2.flip(frame, 1)
        
        # To improve performance, optionally mark the image as not writeable to
        # pass by reference.
        frame.flags.setflags(write=False)
        # Convert the BGR image to RGB.
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Process the frame and find hands
        results = hands.process(rgb_frame)

        # Allow writing to the frame again
        frame.flags.setflags(write=True)

        current_gesture = None
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Draw hand landmarks
                mp_drawing.draw_landmarks(
                    frame,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                    mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2),
                )
                # Recognize the gesture
                current_gesture = recognize_gesture(hand_landmarks)

        # --- Gesture Stability & Speech Logic ---
        if current_gesture == stable_gesture:
            stable_count += 1
        else:
            stable_gesture = current_gesture
            stable_count = 0

        now = time.time()
        if (
            stable_count > REQUIRED_STABILITY and
            current_gesture is not None and
            current_gesture != last_spoken_gesture and
            (now - last_spoken_time) > MIN_SPEAK_DELAY
        ):
            print(f"Gesture Detected: {current_gesture}")
            speak(current_gesture)
            last_spoken_gesture = current_gesture
            last_spoken_time = now
            stable_count = 0  # Reset after speaking

        # --- Display Information on Frame ---
        # Status box
        cv2.rectangle(frame, (0, 0), (300, 80), (20, 20, 20), -1)
        # Detected Gesture Text
        display_gesture = stable_gesture if stable_gesture else "None"
        cv2.putText(
            frame,
            f"Gesture: {display_gesture}",
            (10, 60),
            cv2.FONT_HERSHEY_SIMPLEX,
            1,
            (255, 255, 255),
            2,
            cv2.LINE_AA
        )

        # Display the resulting frame
        cv2.imshow('Hand Gesture to Speech', frame)

        # Exit on 'ESC' key
        if cv2.waitKey(5) & 0xFF == 27:
            break

# --- 4. Cleanup ---
print("Cleaning up...")
cap.release()
cv2.destroyAllWindows()
# Ensure the TTS engine shuts down cleanly
if engine:
    engine.stop()



ModuleNotFoundError: No module named 'mediapipe'

In [4]:

import cv2
import mediapipe as mp
import numpy as np
import time
import pyttsx3
import platform
from collections import deque

# --- 1. Initialization ---

# Initialize Text-to-Speech Engine
try:
    engine = pyttsx3.init()
except Exception as e:
    print(f"Could not initialize TTS engine: {e}")
    engine = None

def speak(text):
    """Cross-platform text-to-speech function."""
    if engine:
        try:
            engine.stop()
            engine.say(text)
            engine.runAndWait()
        except Exception as e:
            print(f"TTS Error: {e}")
    elif platform.system() == 'Darwin':
        try:
            import os
            os.system(f"say '{text}'")
        except Exception as e:
            print(f"macOS 'say' command failed: {e}")
    else:
        print(f"Speak: {text} (TTS not available)")

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.5
)
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

# --- 2. Enhanced Gesture Recognition Logic ---

# Gesture Definitions
GESTURES = {
    'VICTORY': 'Victory',
    'THUMBS_UP': 'Thumbs Up',
    'ONE': 'One',
    'TWO': 'Two',
    'THREE': 'Three',
    'FOUR': 'Four',
    'FIVE': 'Five / Open Hand',
    'FIST': 'Fist / Closed Hand',
    'OK': 'OK',
}

# This list will be populated by the recognition function to give visual feedback
finger_status = {'extended': [], 'closed': []}

def recognize_gesture(landmarks, hand_size):
    """
    Recognizes a gesture from hand landmarks using normalized distances.
    Returns the recognized gesture and updates the global finger_status.
    """
    global finger_status
    finger_status = {'extended': [], 'closed': []}
    
    if not landmarks or hand_size == 0:
        return None

    # Get coordinates for all landmarks
    coords = np.array([(lm.x, lm.y, lm.z) for lm in landmarks.landmark])
    
    # --- Finger Extension Calculation ---
    # We determine if a finger is extended by checking the vertical distance 
    # between the tip and the MCP joint, normalized by hand size.
    
    finger_tips_indices = [4, 8, 12, 16, 20]
    finger_mcp_indices = [2, 5, 9, 13, 17]
    
    extended_fingers = []
    for i, (tip_idx, mcp_idx) in enumerate(zip(finger_tips_indices, finger_mcp_indices)):
        tip = coords[tip_idx]
        mcp = coords[mcp_idx]
        # For thumb, we check horizontal distance from wrist
        if i == 0: 
            wrist = coords[0]
            # Use a threshold based on hand orientation (assuming right hand for logic)
            # A more robust check might use dot products to find true extension direction.
            is_extended = (abs(tip[0] - mcp[0])) > 0.04 
        else:
            is_extended = (mcp[1] - tip[1]) / hand_size > 0.15

        if is_extended:
            extended_fingers.append(i)
            finger_status['extended'].append(tip_idx)
        else:
            finger_status['closed'].append(tip_idx)

    num_extended = len(extended_fingers)

    # --- Gesture Mapping ---
    
    # OK Gesture: Thumb and Index finger tips are close, other fingers extended.
    thumb_tip = coords[4]
    index_tip = coords[8]
    tip_distance = np.linalg.norm(thumb_tip - index_tip) / hand_size
    if tip_distance < 0.08 and all(f in extended_fingers for f in [2, 3, 4]):
        return GESTURES['OK']

    # Thumbs Up: Only thumb is extended.
    if num_extended == 1 and 0 in extended_fingers:
        return GESTURES['THUMBS_UP']
        
    # Victory: Index and Middle fingers extended.
    if num_extended == 2 and 1 in extended_fingers and 2 in extended_fingers:
        return GESTURES['VICTORY']

    # Map remaining finger counts to gestures
    if num_extended == 5: return GESTURES['FIVE']
    if num_extended == 4: return GESTURES['FOUR']
    if num_extended == 3: return GESTURES['THREE']
    if num_extended == 2: return GESTURES['TWO']
    if num_extended == 1: return GESTURES['ONE']
    if num_extended == 0: return GESTURES['FIST']

    return None

# --- 3. UI and Drawing Functions ---

def draw_ui(frame, gesture, history):
    """Draws the main UI elements on the frame."""
    # Status box
    cv2.rectangle(frame, (0, 0), (400, 80), (20, 20, 20), -1)
    display_gesture = gesture if gesture else "None"
    cv2.putText(frame, f"Gesture: {display_gesture}", (10, 60),
                cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255, 255, 255), 2, cv2.LINE_AA)
    
    # Gesture History Box
    history_x = frame.shape[1] - 280
    cv2.rectangle(frame, (history_x, 0), (frame.shape[1], 180), (20, 20, 20), -1)
    cv2.putText(frame, "History:", (history_x + 10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
    for i, old_gesture in enumerate(history):
        cv2.putText(frame, old_gesture, (history_x + 10, 60 + i * 25),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1, cv2.LINE_AA)

def draw_finger_feedback(frame, landmarks, status):
    """Draws green/red circles on fingertips for feedback."""
    if not landmarks: return
    
    h, w, _ = frame.shape
    coords = np.array([(lm.x, lm.y) for lm in landmarks.landmark])
    coords = (coords * [w, h]).astype(int)

    for idx in status['extended']:
        cv2.circle(frame, tuple(coords[idx]), 10, (0, 255, 0), -1) # Green for extended
    for idx in status['closed']:
        cv2.circle(frame, tuple(coords[idx]), 10, (0, 0, 255), -1) # Red for closed

# --- 4. Main Application Loop ---

def run_gesture_recognition():
    """Main function to run the camera, calibration, and recognition loop."""
    # Stability and Speech Timing variables
    REQUIRED_STABILITY = 10
    MIN_SPEAK_DELAY = 2.0  # seconds

    stable_gesture = None
    stable_count = 0
    last_spoken_gesture = None
    last_spoken_time = 0
    
    gesture_history = deque(maxlen=5)
    hand_size = 0  # Will be set during calibration

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Could not open camera.")
        return

    # --- Calibration Phase ---
    calibration_start_time = time.time()
    calibration_duration = 3.0
    calibrated = False
    
    print("Starting calibration... Hold your hand open in front of the camera.")

    while time.time() - calibration_start_time < calibration_duration:
        ret, frame = cap.read()
        if not ret: continue
        frame = cv2.flip(frame, 1)
        
        # Display calibration message
        time_left = calibration_duration - (time.time() - calibration_start_time)
        cv2.putText(frame, f"CALIBRATING... {time_left:.1f}s", (50, 50), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.putText(frame, "Hold your hand open", (50, 100), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
        
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb_frame)
        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]
            # Calculate hand size as distance from wrist to middle finger MCP
            wrist = hand_landmarks.landmark[0]
            middle_mcp = hand_landmarks.landmark[9]
            hand_size = np.linalg.norm([wrist.x - middle_mcp.x, wrist.y - middle_mcp.y])
            calibrated = True

        cv2.imshow('Hand Gesture to Speech', frame)
        if cv2.waitKey(5) & 0xFF == 27:
            cap.release()
            cv2.destroyAllWindows()
            return

    if not calibrated:
        print("Calibration failed. Could not detect hand. Using default values.")
        hand_size = 0.2 # Fallback value

    print("Calibration Complete! Starting main application.")
    speak("Calibration Complete")
    
    # --- Main Loop ---
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret: continue

        frame = cv2.flip(frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        rgb_frame.flags.setflags(write=False)
        results = hands.process(rgb_frame)
        rgb_frame.flags.setflags(write=True)

        current_gesture = None
        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]
            current_gesture = recognize_gesture(hand_landmarks, hand_size)
            
            # Draw landmarks and finger feedback
            mp_drawing.draw_landmarks(
                frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())
            draw_finger_feedback(frame, hand_landmarks, finger_status)

        # Update gesture stability
        if current_gesture == stable_gesture:
            stable_count += 1
        else:
            stable_gesture = current_gesture
            stable_count = 0

        # Speak gesture if stable
        now = time.time()
        if (stable_count > REQUIRED_STABILITY and current_gesture is not None and
            current_gesture != last_spoken_gesture and (now - last_spoken_time) > MIN_SPEAK_DELAY):
            
            print(f"Gesture Detected: {current_gesture}")
            speak(current_gesture)
            last_spoken_gesture = current_gesture
            last_spoken_time = now
            if current_gesture not in gesture_history:
                gesture_history.appendleft(current_gesture)
            stable_count = 0

        # Draw UI elements
        draw_ui(frame, stable_gesture, gesture_history)
        
        cv2.imshow('Hand Gesture to Speech', frame)
        if cv2.waitKey(5) & 0xFF == 27:
            break

    # --- Cleanup ---
    print("Cleaning up...")
    cap.release()
    cv2.destroyAllWindows()
    if engine:
        engine.stop()

# --- 5. Run the Application ---
if __name__ == '__main__':
    # In a notebook, you can just call the function directly.
    # The if __name__ == '__main__': is good practice for Python scripts.
    run_gesture_recognition()



ModuleNotFoundError: No module named 'mediapipe'