In [None]:
import cv2
import numpy as np
import os
import time
from collections import deque

# Use macOS system voice
def speak(text):
    os.system(f"say '{text}'")

# Count number of fingers
def count_fingers(contour, defects, frame):
    if defects is None or contour is None:
        return 0
    count = 0
    for i in range(defects.shape[0]):
        s, e, f, d = defects[i, 0]
        start = tuple(contour[s][0])
        end = tuple(contour[e][0])
        far = tuple(contour[f][0])

        a = np.linalg.norm(np.array(end) - np.array(start))
        b = np.linalg.norm(np.array(far) - np.array(start))
        c = np.linalg.norm(np.array(end) - np.array(far))

        # Avoid division by zero
        if b * c == 0:
            continue

        angle = np.arccos((b ** 2 + c ** 2 - a ** 2) / (2 * b * c))
        if angle <= np.pi / 2 and d > 10000:
            count += 1
            cv2.circle(frame, far, 5, [0, 0, 255], -1)
    return count

# Initialize camera
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)

# Gesture smoothing
gesture_queue = deque(maxlen=25)
stable_gesture = None
stable_count = 0
required_stability = 10
last_spoken_gesture = None
last_time_spoken = time.time()
min_speak_delay = 2  # seconds

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    roi = frame[100:400, 100:400]
    cv2.rectangle(frame, (100, 100), (400, 400), (0, 255, 0), 2)

    # Convert to HSV and apply skin mask
    hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
    lower_skin = np.array([0, 48, 80], dtype=np.uint8)
    upper_skin = np.array([20, 255, 255], dtype=np.uint8)
    mask = cv2.inRange(hsv, lower_skin, upper_skin)
    mask = cv2.GaussianBlur(mask, (5, 5), 0)

    contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    gesture = None

    if contours:
        contour = max(contours, key=cv2.contourArea)
        if cv2.contourArea(contour) > 4000:
            hull = cv2.convexHull(contour)
            hull_indices = cv2.convexHull(contour, returnPoints=False)
            if hull_indices is not None and len(hull_indices) > 3:
                defects = cv2.convexityDefects(contour, hull_indices)
                fingers = count_fingers(contour, defects, roi) + 1
                fingers = min(fingers, 5)  # Cap to max 5 fingers

                gesture_queue.append(fingers)
                avg_fingers = round(sum(gesture_queue) / len(gesture_queue))

                # Map fingers to gestures
                if avg_fingers == 1:
                    gesture = "One"
                elif avg_fingers == 2:
                    gesture = "Two"
                elif avg_fingers == 3:
                    gesture = "Three"
                elif avg_fingers == 4:
                    gesture = "Four"
                elif avg_fingers == 5:
                    gesture = "Five / Open Hand"
                elif avg_fingers == 0:
                    gesture = "Fist / Closed"

                # Gesture stability check
                if gesture == stable_gesture:
                    stable_count += 1
                else:
                    stable_count = 0
                    stable_gesture = gesture

                # Speak only when stable and enough time passed
                now = time.time()
                if (
                    stable_count > required_stability
                    and gesture != last_spoken_gesture
                    and (now - last_time_spoken) > min_speak_delay
                ):
                    print("Gesture Detected:", gesture)
                    speak(gesture)
                    last_spoken_gesture = gesture
                    last_time_spoken = now
                    stable_count = 0  # Reset

                # Show on screen
                cv2.putText(
                    frame,
                    f"Gesture: {gesture}",
                    (10, 60),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1,
                    (255, 255, 255),
                    2,
                )

    # Display output
    cv2.imshow("Hand Gesture Recognition", frame)
    cv2.imshow("Mask", mask)

    if cv2.waitKey(1) & 0xFF == 27:  # ESC to quit
        break

cap.release()
cv2.destroyAllWindows()


Gesture Detected: Two
Gesture Detected: Three
Gesture Detected: Two
Gesture Detected: Three
Gesture Detected: Two
Gesture Detected: One
