## Imports

In [10]:
import cv2
import mediapipe as mp
import numpy as np
import os
import glob
import csv

### --- 1. INITIALIZE MEDIAPIPE ---

In [None]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=True, # We are processing static images
    max_num_hands=1,
    min_detection_confidence=0.5
)

I0000 00:00:1762870932.738629  966166 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1762870932.814064  966434 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 25.0.7-0ubuntu0.24.04.2), renderer: llvmpipe (LLVM 20.1.2, 256 bits)


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


### --- 2. DEFINE CLASSES AND DATA FOLDER ---

In [3]:
DATA_DIR = 'asl_alphabet_train'
class_names = [
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
    'del', 'nothing', 'space'
]

In [9]:
# --- 3. CREATE THE NEW CSV FILE ---
csv_file_name = 'asl_landmarks.csv'
print(f"Opening {csv_file_name} to write...")

with open(csv_file_name, mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    
    # --- 4. WRITE THE HEADER ROW ---
    # This is the 'label' column, plus 63 coordinate columns (x0, y0, z0, x1, y1, z1, ...)
    header = ['label']
    for i in range(21): # 21 landmarks
        header.extend([f'x{i}', f'y{i}', f'z{i}'])
    csv_writer.writerow(header)

    # --- 5. LOOP THROUGH ALL IMAGES ---
    for label in class_names:
        folder_path = os.path.join(DATA_DIR, label, '*.jpg')
        image_files = glob.glob(folder_path)
        print(f"Processing {len(image_files)} images for class: {label}")
        
        for image_path in image_files:
            # Load the image
            image = cv2.imread(image_path)
            if image is None:
                continue
                
            # Convert BGR (OpenCV) to RGB (MediaPipe)
            rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            # Process the image to find hands
            results = hands.process(rgb_image)

            # --- 6. EXTRACT & NORMALIZE LANDMARKS ---
            if results.multi_hand_landmarks:
                hand_landmarks = results.multi_hand_landmarks[0] # Get the first (and only) hand
                
                # --- THIS IS THE "SECRET SAUCE" ---
                # We normalize all landmarks to be relative to the wrist (landmark 0).
                # This makes the data "translation-invariant" (it doesn't matter
                # where the hand is on the screen).
                
                wrist = hand_landmarks.landmark[0]
                landmark_vector = []
                
                for landmark in hand_landmarks.landmark:
                    # Calculate coordinates relative to the wrist
                    landmark_vector.append(landmark.x - wrist.x)
                    landmark_vector.append(landmark.y - wrist.y)
                    landmark_vector.append(landmark.z - wrist.z)
                    
                # landmark_vector is now a list of 63 numbers
                
                # --- 7. WRITE TO CSV ---
                # The first column is the label, followed by the 63 numbers
                csv_writer.writerow([label] + landmark_vector)

print("--- Dataset Creation Complete! ---")
hands.close()

Opening asl_landmarks.csv to write...
Processing 3000 images for class: A


ValueError: _graph is None in SolutionBase