In [5]:
import cv2
from matplotlib import pyplot as plt
import numpy as np
import mediapipe as mp
import time
import os


In [6]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
mp_face_mesh = mp.solutions.face_mesh 

In [7]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    return image, results
    

In [8]:
def draw_landmarks(image, results):
    # Face (light gray thin mesh)
    mp_drawing.draw_landmarks(
        image, results.face_landmarks, mp_face_mesh.FACEMESH_TESSELATION,
        None, mp_drawing.DrawingSpec((200,200,200), thickness=1))
    
    # Pose (blue thick lines with green dots)
    mp_drawing.draw_landmarks(
        image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
        mp_drawing.DrawingSpec((0,255,0), circle_radius=2),
        mp_drawing.DrawingSpec((255,0,0), thickness=3))
    
    # Hands (orange left/magenta right)
    mp_drawing.draw_landmarks(
        image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
        None, mp_drawing.DrawingSpec((0,165,255), thickness=2))
    
    mp_drawing.draw_landmarks(
        image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
        None, mp_drawing.DrawingSpec((255,0,255), thickness=2))

In [9]:
def extract_keypoints(results) : 
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(63)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(63)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)

    return np.concatenate([face, pose, lh, rh])
    

In [7]:
video_name = 'movie.mp4'
video_path = os.path.join('DATA_COLLECTION', 'movie', video_name); 

In [8]:
cap = cv2.VideoCapture(video_path)


with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened() : 
        ret, frame = cap.read()
        if not ret : 
            break
    
        image, results = mediapipe_detection(frame, holistic)

        pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten()
        print(pose.shape)

        draw_landmarks(image, results)
        cv2.imshow('modified_frames', image)

        if cv2.waitKey(50) & 0xFF == ord('q'):
            break


cap.release()
cv2.destroyAllWindows()
        

(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)


In [9]:
res = np.array(results)
if res is None:
    print("Frame is empty or not loaded correctly.")
else:
    print("Shape:", res.shape)


Shape: ()


In [3]:
import os
import json

# Load the JSON data from the file
with open('WLASL_v0.3.json', 'r') as f:
    data = json.load(f)

root = 'DATA_COLLECTION_version_3'

def time(start):
    secs = (start + 29) // 30
    mint = secs // 60
    secs = secs % 60
    return f"{mint} : {secs}"

for item in data:
    gloss = item['gloss']
    instances = item['instances']

    for video in instances:
        video_id = video['video_id']
        path = os.path.join(root, video_id)
        start_frame = video['frame_start']
        end_frame = video['frame_end']
        dif = end_frame - start_frame
        save_path = os.path.join(root, gloss, f"{video_id}.mp4")

        if os.path.exists(save_path):
            print('here')
            if dif > 30:
                print(f"The video in: {gloss} {path} has difference: {dif}, starts at: {time(start_frame)}")


here
here
here
here
The video in: book DATA_COLLECTION_version_3\70212 has difference: 99, starts at: 1 : 12
here
The video in: book DATA_COLLECTION_version_3\70266 has difference: 120, starts at: 2 : 5
here
here
here
here
here
here
here
here
The video in: drink DATA_COLLECTION_version_3\70173 has difference: 121, starts at: 3 : 11
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
The video in: computer DATA_COLLECTION_version_3\12306 has difference: 59, starts at: 2 : 51
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
The video in: before DATA_COLLECTION_version_3\05724 has difference: 46, starts at: 0 : 58
here
The video in: before DATA_COLLECTION_version_3\70348 has difference: 95, starts at: 2 : 40
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
The video in: chair DATA_COLLECTION_version_3\70230 has difference: 130, starts at: 1 : 11
here
The v

In [11]:
video_path = os.path.join(root, 'go', f"{24857}.mp4")

cap = cv2.VideoCapture(video_path)


with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened() : 
        ret, frame = cap.read()
        if not ret : 
            break
    
        image, results = mediapipe_detection(frame, holistic)

        pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten()
        print(pose.shape)

        draw_landmarks(image, results)
        cv2.imshow('modified_frames', image)

        if cv2.waitKey(50) & 0xFF == ord('q'):
            break


cap.release()
cv2.destroyAllWindows()
        

(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
(132,)
