In [1]:
import mediapipe as mp
import cv2
import os
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm 

In [6]:
DATA_DIR = '../data/asl_alphabet_train/asl_alphabet_train'
OUTPUT_FILE = '../data/asl_landmarks_train.csv'

In [7]:
# Cell 3 is intentionally empty for parallel processing
# MediaPipe is initialized inside each worker process (in landmark_processor.py)
print("MediaPipe will be initialized in parallel workers...")

MediaPipe will be initialized in parallel workers...


In [8]:
image_paths = []
labels = []

for label in sorted(os.listdir(DATA_DIR)):
    class_dir = os.path.join(DATA_DIR, label)
    
    if os.path.isdir(class_dir):
        for image_file in os.listdir(class_dir):
            if image_file.endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(class_dir, image_file))
                labels.append(label)

print(f"Found {len(image_paths)} images belonging to {len(set(labels))} classes.")

Found 86912 images belonging to 29 classes.


In [9]:
# PARALLEL PROCESSING with ThreadPoolExecutor + Thread-Local MediaPipe
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import mediapipe as mp_module

# Thread-local storage - each thread gets its own MediaPipe model
thread_local = threading.local()

def get_hands_model():
    """Get or create a MediaPipe Hands model for the current thread"""
    if not hasattr(thread_local, 'hands_model'):
        mp_hands = mp_module.solutions.hands
        thread_local.hands_model = mp_hands.Hands(
            static_image_mode=True,
            max_num_hands=1,
            min_detection_confidence=0.5
        )
    return thread_local.hands_model

def process_single_image(file_path):
    """Process one image using thread-local MediaPipe model"""
    try:
        # Get this thread's own MediaPipe model
        hands_model = get_hands_model()
        
        image = cv2.imread(file_path)
        if image is None:
            return [np.nan] * 63

        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = hands_model.process(image_rgb)

        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]
            wrist_coords = hand_landmarks.landmark[0]

            landmark_row = []
            for landmark in hand_landmarks.landmark:
                relative_x = landmark.x - wrist_coords.x
                relative_y = landmark.y - wrist_coords.y
                relative_z = landmark.z - wrist_coords.z
                landmark_row.extend([relative_x, relative_y, relative_z])
            return landmark_row
        else:
            return [np.nan] * 63
    except Exception as e:
        return [np.nan] * 63

# Run with ThreadPoolExecutor
print("Starting PARALLEL processing with thread-local models...")
num_workers = os.cpu_count()
print(f"Using {num_workers} threads (each with its own MediaPipe model)")

processed_data = []
with ThreadPoolExecutor(max_workers=num_workers) as executor:
    # Submit all tasks
    futures = {executor.submit(process_single_image, path): i for i, path in enumerate(image_paths)}
    
    # Collect results with progress bar
    for future in tqdm(as_completed(futures), total=len(image_paths), desc="Extracting landmarks"):
        idx = futures[future]
        processed_data.append((idx, future.result()))

# Sort by original index (as_completed returns in completion order)
processed_data.sort(key=lambda x: x[0])
processed_data = [x[1] for x in processed_data]

# Statistics
successful_count = sum(1 for row in processed_data if not np.isnan(row[0]))
failed_count = len(processed_data) - successful_count

print(f"\nProcessed {len(processed_data)} images.")
print(f"   Successful: {successful_count} ({successful_count/len(processed_data)*100:.2f}%)")
print(f"   Failed: {failed_count} ({failed_count/len(processed_data)*100:.2f}%)")

# Save to CSV
print("\nSaving landmarks to CSV...")
column_names = [f'landmark_{i}_{coord}' for i in range(21) for coord in ['x', 'y', 'z']]
df = pd.DataFrame(processed_data, columns=column_names)
df['label'] = labels
df.to_csv(OUTPUT_FILE, index=False)
print(f"✓ Saved to {OUTPUT_FILE}")
print(f"✓ CSV shape: {df.shape}")
print(f"✓ Classes: {sorted(df['label'].unique())}")

Starting PARALLEL processing with thread-local models...
Using 14 threads (each with its own MediaPipe model)


I0000 00:00:1765051449.236872 2014818 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M4 Pro
I0000 00:00:1765051449.241112 2014833 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M4 Pro
W0000 00:00:1765051449.245516 2014821 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1765051449.248552 2014848 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M4 Pro
W0000 00:00:1765051449.248816 2014836 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765051449.254411 2014824 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1765051449.257098 2014863 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: App

Extracting landmarks:   0%|          | 0/86912 [00:00<?, ?it/s]


Processed 86912 images.
   Successful: 63616 (73.20%)
   Failed: 23296 (26.80%)

Saving landmarks to CSV...
✓ Saved to ../data/asl_landmarks_train.csv
✓ CSV shape: (86912, 64)
✓ Classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']
