# Sign Langauge Dataset Collection

## 1. Download and Import all libraries

In [1]:
import cv2
import numpy as np
import csv
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
import pandas as pd
from random import sample
import zipfile
import json
import ast

In [2]:
# Initialize MediaPipe Hands and Pose
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

In [3]:
# Initialize MediaPipe Hands and Pose
hands = mp_hands.Hands(static_image_mode=False,
                       max_num_hands=2,
                       min_detection_confidence=0.5,
                       min_tracking_confidence=0.5)

In [4]:
def get_landmarks(frame_rgb):
    """
    Process the frame to extract hand and pose landmarks.
    :param frame_rgb: RGB frame from video
    :return: Hand landmarks, Pose landmarks
    """
    hand_results = hands.process(frame_rgb)
    return hand_results

In [5]:
def draw_landmarks(frame, hand_results):
    """
    Draw hand and pose landmarks on the frame.
    :param frame: Original frame from video
    :param hand_results: Hand landmarks from MediaPipe
    :return: Frame with drawn landmarks
    """
    # Draw hand landmarks
    if hand_results.multi_hand_landmarks:
        for hand_landmarks in hand_results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2),
                mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2)
            )

    return frame

In [6]:
# Constants
NUM_HANDS = 2
NUM_LANDMARKS = 21
NUM_COORDINATES = 3
FRAMES_PER_SEQUENCE = 50
CSV_FILE = "captured_landmarks.csv"

# Zero padding template
ZERO_LANDMARK = [[[0.0, 0.0, 0.0] for _ in range(NUM_LANDMARKS)] for _ in range(NUM_HANDS)]


In [7]:
import os
import cv2
import csv
import numpy as np
import mediapipe as mp
import time


# Initialize CSV
with open(CSV_FILE, mode="w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["class", "landmarks"])  # Header

# Iterate through video files
video_folder = "vibrant"  # Folder containing the video files
video_files = [f for f in os.listdir(video_folder) if f.endswith(".mp4")]

for video_file in video_files:
    file_class = os.path.splitext(video_file)[0]  # Class name (file name without .mp4)
    video_path = os.path.join(video_folder, video_file)

    # Video capture
    cap = cv2.VideoCapture(video_path)
    captured_landmarks = []
    frame_counter = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        hand_results = get_landmarks(frame_rgb)

        # Initialize frame_landmarks with zeros
        frame_landmarks = [[[0.0, 0.0, 0.0] for _ in range(NUM_LANDMARKS)] for _ in range(NUM_HANDS)]

        # Populate detected hand landmarks
        if hand_results.multi_hand_landmarks:
            for idx, hand_landmark in enumerate(hand_results.multi_hand_landmarks):
                if idx < NUM_HANDS:
                    frame_landmarks[idx] = [[lm.x, lm.y, lm.z] for lm in hand_landmark.landmark]

        captured_landmarks.append(frame_landmarks)
        frame_counter += 1

        # Stop collecting after reaching 50 frames
        if frame_counter == FRAMES_PER_SEQUENCE:
            break

    cap.release()

    # Ensure captured_landmarks has exactly 50 frames
    while len(captured_landmarks) < FRAMES_PER_SEQUENCE:
        captured_landmarks.append(ZERO_LANDMARK)

    # Convert captured_landmarks to numpy array
    captured_landmarks_array = np.array(captured_landmarks)

    # Ensure correct shape
    if captured_landmarks_array.shape == (FRAMES_PER_SEQUENCE, NUM_HANDS, NUM_LANDMARKS, NUM_COORDINATES):
        # Save to CSV
        with open(CSV_FILE, mode="a", newline="") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([file_class, captured_landmarks_array.tolist()])  # Convert array to list for CSV
    else:
        print(f"Error: Incorrect shape {captured_landmarks_array.shape} for video {video_file}")

# Release MediaPipe resources
hands.close()
print(f"Captured landmarks saved to {CSV_FILE}")


Captured landmarks saved to captured_landmarks.csv


In [8]:
df = pd.read_csv('captured_landmarks.csv')

In [9]:
# Function to check the shape of landmarks and convert to a numpy array
def check_landmark_shapes(landmark_data):
    # Convert the string back to a list using ast.literal_eval
    landmarks = ast.literal_eval(landmark_data)
    
    # Pad the data to ensure each frame has the same number of hands and landmarks
    max_hands = 2  # Based on the NUM_HANDS constant in your previous code
    max_landmarks = 21  # Based on the NUM_LANDMARKS constant
    
    # Ensure every frame has exactly max_hands hands and each hand has max_landmarks landmarks
    for frame in landmarks:
        while len(frame) < max_hands:
            frame.append([[0.0, 0.0, 0.0]] * max_landmarks)  # Padding with zeros
        for hand in frame:
            while len(hand) < max_landmarks:
                hand.append([0.0, 0.0, 0.0])  # Padding with zeros

    # Convert the list into a numpy array
    landmarks_array = np.array(landmarks)
    
    # Output the shape of the numpy array
    print(f"Landmarks Array Shape: {landmarks_array.shape}")
    
    return landmarks_array.shape


# Iterate through the landmarks column to check the shapes and convert to numpy array
for index, row in df.iterrows():
    # Get the landmark data (assuming it is in the "landmarks" column)
    landmark_data = row['landmarks']
    
    # Check the shape and convert the landmarks data to numpy array
    shape = check_landmark_shapes(landmark_data)
    print(f"Row {index} Landmark Shape: {shape}")

Landmarks Array Shape: (50, 2, 21, 3)
Row 0 Landmark Shape: (50, 2, 21, 3)
Landmarks Array Shape: (50, 2, 21, 3)
Row 1 Landmark Shape: (50, 2, 21, 3)
Landmarks Array Shape: (50, 2, 21, 3)
Row 2 Landmark Shape: (50, 2, 21, 3)
Landmarks Array Shape: (50, 2, 21, 3)
Row 3 Landmark Shape: (50, 2, 21, 3)
Landmarks Array Shape: (50, 2, 21, 3)
Row 4 Landmark Shape: (50, 2, 21, 3)
Landmarks Array Shape: (50, 2, 21, 3)
Row 5 Landmark Shape: (50, 2, 21, 3)
Landmarks Array Shape: (50, 2, 21, 3)
Row 6 Landmark Shape: (50, 2, 21, 3)
Landmarks Array Shape: (50, 2, 21, 3)
Row 7 Landmark Shape: (50, 2, 21, 3)
Landmarks Array Shape: (50, 2, 21, 3)
Row 8 Landmark Shape: (50, 2, 21, 3)
Landmarks Array Shape: (50, 2, 21, 3)
Row 9 Landmark Shape: (50, 2, 21, 3)
Landmarks Array Shape: (50, 2, 21, 3)
Row 10 Landmark Shape: (50, 2, 21, 3)
Landmarks Array Shape: (50, 2, 21, 3)
Row 11 Landmark Shape: (50, 2, 21, 3)
Landmarks Array Shape: (50, 2, 21, 3)
Row 12 Landmark Shape: (50, 2, 21, 3)
Landmarks Array Shape:

In [10]:
df.head()

Unnamed: 0,class,landmarks
0,and,"[[[[0.45911628007888794, 0.8573983907699585, 2..."
1,audio,"[[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0..."
2,barrier,"[[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0..."
3,break,"[[[[0.6377948522567749, 0.7908705472946167, -2..."
4,communication,"[[[[0.7416040897369385, 0.7967065572738647, -5..."
