In [None]:
!pip install opencv-python-headless
!pip install mediapipe
!pip install numpy


In [None]:
import cv2
import mediapipe as mp
import numpy as np
import os
import time

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

IMAGE_PATH = "gesture_images"
GESTURES = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'OPEN_HAND']
CURRENT_GESTURE = 0
IMG_COUNT = 0
IMG_TARGET = 400  
ROI_BOX = (50, 100, 350, 400) 
minValue = 70  

data = []
labels = []

for gesture in GESTURES:
    gesture_dir = os.path.join(IMAGE_PATH, gesture)
    if not os.path.exists(gesture_dir):
        os.makedirs(gesture_dir)

def process_frame(frame, process_landmarks=True):
    global IMG_COUNT, data, labels

    x1, y1, x2, y2 = ROI_BOX
    roi = frame[y1:y2, x1:x2]

    gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

    blurred_roi = cv2.GaussianBlur(gray_roi, (5, 5), 2)
    
    th3 = cv2.adaptiveThreshold(blurred_roi, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    _, processed_roi = cv2.threshold(th3, minValue, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    if process_landmarks and IMG_COUNT < IMG_TARGET:
        rgb_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
        result = hands.process(rgb_roi)

        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                # Check if hand is fully within ROI
                all_inside = all(0 <= lm.x <= 1 and 0 <= lm.y <= 1 for lm in hand_landmarks.landmark)
                
                if all_inside:
                    # Save landmarks and corresponding gesture label
                    landmarks = [(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark]
                    data.append(landmarks)
                    labels.append(GESTURES[CURRENT_GESTURE])

                    # Create a copy of processed_roi for drawing
                    landmark_image = cv2.cvtColor(processed_roi, cv2.COLOR_GRAY2BGR)

                    # Draw landmarks on the landmark_image
                    mp_drawing.draw_landmarks(
                        landmark_image,
                        hand_landmarks,
                        mp_hands.HAND_CONNECTIONS,
                        mp_drawing_styles.get_default_hand_landmarks_style(),
                        mp_drawing_styles.get_default_hand_connections_style())

                    # Save the processed image with landmarks in its gesture folder
                    gesture_folder = os.path.join(IMAGE_PATH, GESTURES[CURRENT_GESTURE])
                    img_filename = os.path.join(gesture_folder, f"img_{IMG_COUNT:04d}.png")
                    cv2.imwrite(img_filename, landmark_image)

                    # Update image count
                    IMG_COUNT += 1

                    # Draw landmarks on the original ROI for display
                    mp_drawing.draw_landmarks(
                        roi,
                        hand_landmarks,
                        mp_hands.HAND_CONNECTIONS,
                        mp_drawing_styles.get_default_hand_landmarks_style(),
                        mp_drawing_styles.get_default_hand_connections_style())

    return processed_roi, roi

# Main video capture loop
cap = cv2.VideoCapture(0)

# Set a specific size for the window
window_width = 800
window_height = 600
cv2.namedWindow("Data Collection", cv2.WINDOW_NORMAL)
cv2.resizeWindow("Data Collection", window_width, window_height)

last_process_time = time.time()
process_interval = 0.1  # Process landmarks every 100ms

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip the frame horizontally
    frame = cv2.flip(frame, 1)

    current_time = time.time()
    if current_time - last_process_time >= process_interval:
        # Process the frame and get the ROI with landmarks
        processed_roi, roi_with_landmarks = process_frame(frame, process_landmarks=True)
        last_process_time = current_time
    else:
        # Just get the ROI without processing landmarks
        processed_roi, roi_with_landmarks = process_frame(frame, process_landmarks=False)

    # Draw the fixed ROI box on the frame
    x1, y1, x2, y2 = ROI_BOX
    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

    # Display the ROI with landmarks in the frame
    frame[y1:y2, x1:x2] = roi_with_landmarks

    # Display current gesture and progress
    cv2.putText(frame, f"Capturing Gesture: {GESTURES[CURRENT_GESTURE]}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    cv2.putText(frame, f"Images Captured: {IMG_COUNT}/{IMG_TARGET}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    # Check if target images have been captured
    if IMG_COUNT >= IMG_TARGET:
        cv2.putText(frame, "Gesture capture complete. Press 'n' for the next gesture.", (10, 110), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    # Show the frame
    cv2.imshow("Data Collection", frame)

    # Key controls for gesture collection
    key = cv2.waitKey(1)
    
    # ESC to quit and save data/labels
    if key == 27:  # ESC key to quit
        break
    
    # When 'N' key is pressed and target images have been captured
    elif key == ord('n') and IMG_COUNT >= IMG_TARGET:
        # Move to the next gesture
        CURRENT_GESTURE = (CURRENT_GESTURE + 1) % len(GESTURES)
        IMG_COUNT = 0  # Reset image count for the new gesture
        print(f"Gesture {GESTURES[CURRENT_GESTURE]} is ready. Capturing will start now.")

# Release resources
cap.release()
cv2.destroyAllWindows()

# Save collected data and labels to files
np.save('hand_landmarks.npy', np.array(data))
np.save('labels.npy', np.array(labels))
print("Data and labels saved successfully!")
