<a href="https://colab.research.google.com/github/mikagrin07/ASL_project/blob/main/ASL_Classifier/Making_Annotated_Dataset_submission_cleaned.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Upgrades pip and installs a specific version of MediaPipe

In [None]:
!pip install --upgrade pip
!pip install mediapipe==0.10.7



Downloads the heavy pose landmark model from MediaPipe.

In [None]:
!wget -O pose_landmarker.task -q https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_heavy/float16/1/pose_landmarker_heavy.task

Downloads the hand landmark model from MediaPipe.

In [None]:
!wget -q https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task

Imports MediaPipe, OpenCV, NumPy, and other libraries for processing video and extracting body and hand landmarks.

In [None]:
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import pandas as pd
import os

Mounts Google Drive and sets up tools to display videos in Colab.

In [None]:
from google.colab import drive
import requests
from IPython.display import Video, display

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Load MediaPipe Hand and Pose Landmarker

In [None]:
base_options_hand = python.BaseOptions(model_asset_path='hand_landmarker.task')
options_hand = vision.HandLandmarkerOptions(
    base_options=base_options_hand,
    num_hands=2
)
hand_landmarker = vision.HandLandmarker.create_from_options(options_hand)

base_options_pose = python.BaseOptions(model_asset_path='pose_landmarker.task')
options_pose = vision.PoseLandmarkerOptions(
    base_options=base_options_pose,
    output_segmentation_masks=True
)
pose_landmarker = vision.PoseLandmarker.create_from_options(options_pose)

Expected total columns (x, y for each landmark)

In [None]:
LEFT_HAND_LANDMARKS = 21
RIGHT_HAND_LANDMARKS = 21
POSE_LANDMARKS = 33
TOTAL_COLUMNS = (LEFT_HAND_LANDMARKS + RIGHT_HAND_LANDMARKS + POSE_LANDMARKS) * 2

A function that extracts and saves normalized hand and pose landmarks (X, Y only) from a video to a CSV file, frame by frame, using MediaPipe.

In [None]:
def extract_and_save_normalized_landmarks(video_path, output_csv_path):
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print(f"❌ Error: Could not open video at {video_path}")
        return

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Storage for all frames
    all_frames_data = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert to RGB for MediaPipe
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)

        # Detect hand and pose landmarks
        hand_result = hand_landmarker.detect(mp_frame)
        pose_result = pose_landmarker.detect(mp_frame)

        # Initialize a row with zeros (for missing landmarks)
        frame_data = [0.0] * TOTAL_COLUMNS

        # Store left and right hand landmarks (normalized)
        if hand_result.hand_landmarks:
            for hand_idx, hand in enumerate(hand_result.hand_landmarks):
                # Identify left or right hand
                if hand_result.handedness[hand_idx][0].category_name == "Left":
                    base_idx = 0  # Left hand starts at index 0
                else:
                    base_idx = LEFT_HAND_LANDMARKS * 2  # Right hand starts after left hand

                for landmark_idx, landmark in enumerate(hand):
                    x = landmark.x  # Normalized (0 to 1)
                    y = landmark.y  # Normalized (0 to 1)
                    frame_data[base_idx + landmark_idx * 2] = x
                    frame_data[base_idx + landmark_idx * 2 + 1] = y

        # Store pose landmarks (normalized)
        if pose_result.pose_landmarks:
            base_idx = (LEFT_HAND_LANDMARKS + RIGHT_HAND_LANDMARKS) * 2  # Pose starts after both hands
            for landmark_idx, landmark in enumerate(pose_result.pose_landmarks[0]):
                x = landmark.x  # Normalized (0 to 1)
                y = landmark.y  # Normalized (0 to 1)
                frame_data[base_idx + landmark_idx * 2] = x
                frame_data[base_idx + landmark_idx * 2 + 1] = y

        # Append frame data
        all_frames_data.append(frame_data)

    cap.release()
    # Convert to DataFrame and save
    df = pd.DataFrame(all_frames_data)
    df.to_csv(output_csv_path, index=False, header=False)
    print(f"✅ Saved normalized landmarks (X, Y only) to {output_csv_path}")

A function that processes all `.mp4` videos in a folder (and subfolders), extracts landmarks, and saves them as CSVs—skipping files already processed.

In [None]:
def process_all_videos(input_folder, output_folder):
    # Walk through all subfolders and videos
    for root, dirs, files in os.walk(input_folder):
        for file in files:
            if file.endswith(".mp4"):  # Process only video files
                input_video_path = os.path.join(root, file)

                # Determine subfolder structure for the output
                relative_path = os.path.relpath(root, input_folder)
                output_subfolder = os.path.join(output_folder, relative_path)
                os.makedirs(output_subfolder, exist_ok=True)  # Ensure subfolder exists

                # Output CSV path
                output_csv_path = os.path.join(output_subfolder, file.replace('.mp4', '.csv'))

                # ✅ Check if the CSV file already exists
                if os.path.exists(output_csv_path):
                    print(f"⚠️ Skipping {file}: CSV already exists at {output_csv_path}")
                    continue  # Skip processing

                # Process the video
                print(f"📌 Processing: {input_video_path} → {output_csv_path}")
                extract_and_save_normalized_landmarks(input_video_path, output_csv_path)

Processes all training videos by extracting landmarks and saving them as CSVs in the specified output folder.

In [None]:
input_videos_folder = "/content/drive/MyDrive/ASL_project/Dataset/train"
output_csv_folder = "/content/drive/MyDrive/ASL_project/Dataset/landmark_xy"
process_all_videos(input_videos_folder, output_csv_folder)

⚠️ Skipping 07069_book.mp4: CSV already exists at /content/drive/MyDrive/ASL_project/Dataset/landmark_xy/book/07069_book.csv
⚠️ Skipping 69241_book.mp4: CSV already exists at /content/drive/MyDrive/ASL_project/Dataset/landmark_xy/book/69241_book.csv
⚠️ Skipping 68011_book.mp4: CSV already exists at /content/drive/MyDrive/ASL_project/Dataset/landmark_xy/book/68011_book.csv
⚠️ Skipping 68012_book.mp4: CSV already exists at /content/drive/MyDrive/ASL_project/Dataset/landmark_xy/book/68012_book.csv
⚠️ Skipping 07075_book.mp4: CSV already exists at /content/drive/MyDrive/ASL_project/Dataset/landmark_xy/book/07075_book.csv
⚠️ Skipping 07076_book.mp4: CSV already exists at /content/drive/MyDrive/ASL_project/Dataset/landmark_xy/book/07076_book.csv
⚠️ Skipping 70212_book.mp4: CSV already exists at /content/drive/MyDrive/ASL_project/Dataset/landmark_xy/book/70212_book.csv
⚠️ Skipping 70266_book.mp4: CSV already exists at /content/drive/MyDrive/ASL_project/Dataset/landmark_xy/book/70266_book.csv
