In [1]:
pip install opencv-python mediapipe


Collecting mediapipe
  Downloading mediapipe-0.10.21-cp312-cp312-win_amd64.whl.metadata (10 kB)
Collecting jax (from mediapipe)
  Downloading jax-0.6.0-py3-none-any.whl.metadata (22 kB)
Collecting jaxlib (from mediapipe)
  Downloading jaxlib-0.6.0-cp312-cp312-win_amd64.whl.metadata (1.2 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Using cached protobuf-4.25.6-cp310-abi3-win_amd64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-win_amd64.whl.metadata (1.4 kB)
Collecting sentencepiece (from mediapipe)
  Downloading sentencepiece-0.2.0-cp312-cp312-win_amd64.whl.metadata (8.3 kB)
Downloading mediapipe-0.10.21-cp312-cp312-win_amd64.whl (51.0 MB)
   ---------------------------------------- 0.0/51.0 MB ? eta -:--:--
   ---------------------------------------- 0.3/51.0 MB ? eta -:--:--
   ---------------------------------------- 0.5/51.0 MB 1.1 MB/s eta 0:00:46
   ---------------------------------------- 0.5/51.0 MB 1.1 

In [3]:
import cv2
import os
import string
import mediapipe as mp
import numpy as np
import csv
import uuid

# Allowed labels (excluding j and z)
allowed_labels = [ch for ch in string.ascii_lowercase if ch not in ['j', 'z']]
current_label = None

dataset_dir = "asl_dataset"
os.makedirs(dataset_dir, exist_ok=True)

# CSV for all landmarks
csv_path = os.path.join(dataset_dir, "all_landmarks.csv")
if not os.path.exists(csv_path):
    with open(csv_path, mode='w', newline='') as file:
        writer = csv.writer(file)
        header = ['label'] + [f"{axis}{i}" for i in range(21) for axis in ['x', 'y', 'z']]
        writer.writerow(header)

# MediaPipe setup
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1)
hand_connections = mp_hands.HAND_CONNECTIONS

# Depth coloring function
def depth_to_color(z, min_z, max_z):
    ratio = (z - min_z) / (max_z - min_z + 1e-6)
    r = int(255 * ratio)
    g = int(255 * (1 - ratio))
    return (0, g, r)  # BGR

# Webcam start
cap = cv2.VideoCapture(0)
print("Press a–y (excluding j & z) to set label. Space to save. ESC to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    original = frame.copy()
    h, w, _ = frame.shape

    # Process with MediaPipe
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb)

    hand_roi_gray = None
    landmark_row = []

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            landmarks = hand_landmarks.landmark
            z_vals = [lm.z for lm in landmarks]
            min_z, max_z = min(z_vals), max(z_vals)
            landmark_px = []

            # Draw landmarks with depth coloring
            for idx, lm in enumerate(landmarks):
                x_px, y_px = int(lm.x * w), int(lm.y * h)
                landmark_px.append((x_px, y_px, lm.z))
                color = depth_to_color(lm.z, min_z, max_z)
                cv2.circle(frame, (x_px, y_px), 6, color, -1)

            for start_idx, end_idx in hand_connections:
                x1, y1, z1 = landmark_px[start_idx]
                x2, y2, z2 = landmark_px[end_idx]
                avg_z = (z1 + z2) / 2
                color = depth_to_color(avg_z, min_z, max_z)
                cv2.line(frame, (x1, y1), (x2, y2), color, 2)

            # Landmark values for CSV
            for lm in landmarks:
                landmark_row.extend([lm.x, lm.y, lm.z])

            # Get bounding box
            x_coords = [lm.x * w for lm in landmarks]
            y_coords = [lm.y * h for lm in landmarks]
            x_min, x_max = int(min(x_coords)) - 20, int(max(x_coords)) + 20
            y_min, y_max = int(min(y_coords)) - 20, int(max(y_coords)) + 20
            x_min, y_min = max(x_min, 0), max(y_min, 0)
            x_max, y_max = min(x_max, w), min(y_max, h)

            # Background blur
            mask = np.zeros(frame.shape[:2], dtype=np.uint8)
            cv2.rectangle(mask, (x_min, y_min), (x_max, y_max), 255, -1)
            blurred = cv2.GaussianBlur(original, (31, 31), 0)
            frame = np.where(mask[:, :, None] == 255, original, blurred)

            # Crop and grayscale
            hand_roi = original[y_min:y_max, x_min:x_max]
            gray = cv2.cvtColor(hand_roi, cv2.COLOR_BGR2GRAY)
            contrast = cv2.equalizeHist(gray)
            hand_roi_gray = cv2.resize(contrast, (128, 128))
            cv2.imshow("Cropped Hand", cv2.resize(hand_roi_gray, (200, 200)))

    # Label text
    if current_label:
        cv2.putText(frame, f"Label: {current_label.upper()}", (10, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

    cv2.imshow("Webcam", frame)

    key = cv2.waitKey(1) & 0xFF
    if key == 27:
        break
    elif chr(key).lower() in allowed_labels:
        current_label = chr(key).lower()
        print(f"Label set to: {current_label.upper()}")
    elif key == 32 and current_label and hand_roi_gray is not None and landmark_row:
        # Generate 16-digit UUID
        uid = uuid.uuid4().hex[:16]
        label_dir = os.path.join(dataset_dir, current_label)
        os.makedirs(label_dir, exist_ok=True)
        filename = f"{current_label}_{uid}.png"
        filepath = os.path.join(label_dir, filename)
        cv2.imwrite(filepath, hand_roi_gray)

        # Save landmark with label as first column
        with open(csv_path, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([current_label.upper()] + landmark_row)

        print(f"Saved image: {filepath}")
        print(f"Saved landmark row to: {csv_path}")

cap.release()
cv2.destroyAllWindows()


Press a–y (excluding j & z) to set label. Space to save. ESC to quit.
Label set to: A
Label set to: A
Saved image: asl_dataset\a\a_f44f4fc32d3f47ec.png
Saved landmark row to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\a\a_6ac3d1be735948d2.png
Saved landmark row to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\a\a_7189fa6da3724b51.png
Saved landmark row to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\a\a_0352475086c9486a.png
Saved landmark row to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\a\a_c8c7aed9c643477a.png
Saved landmark row to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\a\a_44c6d24cbf174b2c.png
Saved landmark row to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\a\a_a2ebe954fc0742fd.png
Saved landmark row to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\a\a_82e362a583784bc8.png
Saved landmark row to: asl_dataset\all_landmarks.csv
Saved image: asl_dataset\a\a_9648c9d3db2c4c4b.png
Saved landmark row to: a