In [None]:
!python3 -m pip install mediapipe

In [None]:
!wget -O deeplabv3.tflite -q https://storage.googleapis.com/mediapipe-models/image_segmenter/deeplab_v3/float32/1/deeplab_v3.tflite

In [None]:
!wget -q https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task

In [None]:
#Import the necessary modules.
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python.components import processors
from mediapipe.tasks.python import vision

In [None]:
import os
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from matplotlib import pyplot as plt
from mediapipe.framework.formats import landmark_pb2
from mediapipe.python.solutions.drawing_utils import DrawingSpec

def zoom_and_crop(image, landmarks, scale=2.5, output_size=224):
    h, w = image.shape[:2]

    # Convert normalized landmarks to pixel coordinates
    landmark_px = np.array([[lm.x * w, lm.y * h] for lm in landmarks])

    # Bounding box around landmarks
    min_xy = landmark_px.min(axis=0)
    max_xy = landmark_px.max(axis=0)
    center = (min_xy + max_xy) / 2
    size = max(max_xy - min_xy) * scale  # square region with padding

    top_left = center - size / 2
    bottom_right = center + size / 2

    x1, y1 = np.clip(top_left, 0, [w, h]).astype(int)
    x2, y2 = np.clip(bottom_right, 0, [w, h]).astype(int)


    cropped = image[y1:y2, x1:x2]
    cropped = cv2.resize(cropped, (output_size, output_size))

    return cropped

video_path = "SourceVideos/J46.mp4"
output_npy = "Images_Keypoints/letter_J46_sequence.npy"
sequence = []
SEQUENCE_LENGTH = 30  

BaseOptions = python.BaseOptions
HandLandmarker = vision.HandLandmarker
HandLandmarkerOptions = vision.HandLandmarkerOptions
HandLandmarkerResult = vision.HandLandmarkerResult
VisionRunningMode = vision.RunningMode

drawing_utils = mp.solutions.drawing_utils
drawing_styles = mp.solutions.drawing_styles
hand_connections = mp.solutions.hands.HAND_CONNECTIONS

options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path='hand_landmarker.task'),
    running_mode=VisionRunningMode.VIDEO,
    num_hands=2
)

with HandLandmarker.create_from_options(options) as landmarker:
    cap = cv2.VideoCapture(video_path)
    timestamp = 0
    frame_id = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)

        # Detect hand
        result = landmarker.detect_for_video(mp_image, timestamp)

        if result.hand_landmarks:
            # Store flattened landmark list (x, y, z)
            flat_landmarks = [coord for lm in result.hand_landmarks[0] for coord in (lm.x, lm.y, lm.z)]
            sequence.append(flat_landmarks)

           
            landmark_proto = landmark_pb2.NormalizedLandmarkList()
            for lm in result.hand_landmarks[0]:
                landmark_proto.landmark.add(x=lm.x, y=lm.y, z=lm.z)

            drawing_utils.draw_landmarks(
                frame,
                landmark_proto,
                hand_connections,
                landmark_drawing_spec=DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2),
                connection_drawing_spec=DrawingSpec(color=(255, 255, 255), thickness=1)
            )

            cropped_hand = zoom_and_crop(frame, result.hand_landmarks[0], output_size=224)

        
            os.makedirs("images_keypoints", exist_ok=True)
            cv2.imwrite(f"images_keypoints/J46_{frame_id}.jpg", cropped_hand)
            frame_id += 1

    
        plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.pause(0.001)
        plt.clf()

        timestamp += int(1000 / cap.get(cv2.CAP_PROP_FPS)) 

    cap.release()
    cv2.destroyAllWindows()

if sequence:
    os.makedirs(os.path.dirname(output_npy), exist_ok=True)
    np.save(output_npy, np.array(sequence[:SEQUENCE_LENGTH]))
    print(f"Saved {min(SEQUENCE_LENGTH, len(sequence))} frames to {output_npy}")
else:
    print("No hand landmarks were detected :(")


In [None]:
##### CREATING IMAGES WITH BLACK BACKGROUND + LANDMARKS

In [None]:
import os
import cv2
import numpy as np
import random
import mediapipe as mp

# Set roots and other params
video_root = "RawVideos"
output_root = "DynamicLetters_BlackBG"
image_size = 224
frames_per_video = 10
allowed_exts = [".mp4", ".mov", ".MOV"]

# Setup MediaPipe
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
drawing = mp.solutions.drawing_utils

# Go through Videos
for split in ["Train", "Test"]:
    split_path = os.path.join(video_root, split)

    for file in sorted(os.listdir(split_path)):
        if not any(file.endswith(ext) for ext in allowed_exts):
            continue

        label = file[0].upper()
        video_id = os.path.splitext(file)[0]
        video_path = os.path.join(split_path, file)
        save_dir = os.path.join(output_root, split, label, video_id)
        os.makedirs(save_dir, exist_ok=True)

        cap = cv2.VideoCapture(video_path)
        detected_frames = []

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = hands.process(frame_rgb)

            if results.multi_hand_landmarks:
                black = np.zeros((image_size, image_size, 3), dtype=np.uint8)

                for hand_landmarks in results.multi_hand_landmarks:
                    drawing.draw_landmarks(black, hand_landmarks, mp_hands.HAND_CONNECTIONS)


                detected_frames.append(black)

        cap.release()

        # Randomly sample and save up to 10 frames
        selected_frames = random.sample(detected_frames, min(frames_per_video, len(detected_frames)))
        for i, frame in enumerate(selected_frames):
            out_path = os.path.join(save_dir, f"{video_id}_{i}.jpg")
            cv2.imwrite(out_path, frame)

        print(f"{video_id} → {len(selected_frames)} frames saved in {save_dir}")

hands.close()
print("All videos processed!")