# Extracting landmarks from the video

ffmpeg is requiered:
sudo apt install ffmpeg

In [None]:
!pip -q install mediapipe opencv-python moviepy

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
import requests
from moviepy.editor import VideoFileClip

# URL of the video
video_url = "https://media.tagesschau.de/video/2024/1105/TV-20241105-2024-1400.webxxl.h264.mp4"

# Send a GET request to the URL
response = requests.get(video_url, stream=True)

if response.status_code == 200:
    # Try to extract the filename from the Content-Disposition header
    filename = None
    if 'Content-Disposition' in response.headers:
        content_disposition = response.headers['Content-Disposition']
        if 'filename=' in content_disposition:
            filename = content_disposition.split('filename=')[1].strip('\"')

    # Fallback to extracting the filename from the URL if header is not present
    if not filename:
        filename = os.path.basename(video_url)

    # Save the video file
    video_filepath = filename
    with open(video_filepath, "wb") as file:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                file.write(chunk)
    print(f"Video downloaded successfully as {video_filepath}")

    # Extract audio and save as MP3
    audio_filename = os.path.splitext(filename)[0] + ".mp3"  # Change extension to .mp3
    video_clip = VideoFileClip(video_filepath)
    audio_clip = video_clip.audio
    audio_clip.write_audiofile(audio_filename)
    audio_clip.close()
    video_clip.close()

    print(f"Audio extracted and saved as {audio_filename}")
else:
    print(f"Failed to download video. Status code: {response.status_code}")


  if event.key is 'enter':



Video downloaded successfully as TV-20241105-2024-1400.webxxl.h264.mp4
MoviePy - Writing audio in TV-20241105-2024-1400.webxxl.h264.mp3


                                                                       

MoviePy - Done.
Audio extracted and saved as TV-20241105-2024-1400.webxxl.h264.mp3




In [None]:
import cv2
import mediapipe as mp

Google's MediaPipe Hands solution outputs a set of 21 3D hand landmarks for each detected hand in the input image or video. These landmarks represent key points on the hand, such as the tips of fingers, joints, and the wrist. Each landmark is identified by its index and provides three key pieces of data:

- x: The normalized x-coordinate of the landmark, relative to the width of the input image. The value is in the range [0, 1].
- y: The normalized y-coordinate of the landmark, relative to the height of the input image. The value is in the range [0, 1].
- z: The normalized z-coordinate of the landmark, which indicates the depth of the landmark relative to the wrist. A smaller value means the landmark is closer to the camera.

In [None]:
import cv2
import mediapipe as mp
import json
from mediapipe.framework.formats import landmark_pb2  # Import added

def extract_holistic_landmarks(input_video_path, landmarks_output_path):
    # Initialize VideoCapture
    cap = cv2.VideoCapture(input_video_path)
    if not cap.isOpened():
        print(f"Error opening video file {input_video_path}")
        return

    # Get video properties
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))

    # Calculate the starting x-coordinate for the right half
    right_half_width = width // 2
    start_x = width - right_half_width  # Start from right half of the width

    # Initialize the MediaPipe Holistic solution
    mp_holistic = mp.solutions.holistic

    # Open the landmarks output file
    try:
        f_landmarks = open(landmarks_output_path, 'w')
        f_landmarks.write('[')  # Start of JSON array
    except Exception as e:
        print(f"Error opening landmarks file {landmarks_output_path}: {e}")
        return

    # Define a helper function to extract x, y, z from a landmark list
    def get_landmarks(landmark_list):
        return [
            {"x": lm.x, "y": lm.y, "z": lm.z}
            for lm in landmark_list.landmark
        ] if landmark_list else []

    with mp_holistic.Holistic(
        static_image_mode=False,
        model_complexity=1,
        smooth_landmarks=True,
        enable_segmentation=False,
        refine_face_landmarks=False,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5
    ) as holistic:
        frame_idx = 0
        while cap.isOpened():
            ret, frame_bgr = cap.read()
            if not ret:
                break

            # --- Process Only the right half of the Frame ---
            # Crop the right half of the frame
            right_half_frame = frame_bgr[:, start_x:width]

            # Convert the cropped BGR image to RGB
            image = cv2.cvtColor(right_half_frame, cv2.COLOR_BGR2RGB)
            image.flags.writeable = False

            # Process the image with MediaPipe Holistic
            results = holistic.process(image)

            # Prepare data structure for the current frame
            frame_landmarks = {
                "frame": frame_idx,
                "pose_landmarks": [],       # Body pose
                "face_landmarks": [],       # Face
                "left_hand_landmarks": [],  # Left hand
                "right_hand_landmarks": []  # Right hand
            }

            # Pose landmarks (if any)
            if results.pose_landmarks:
                frame_landmarks["pose_landmarks"] = get_landmarks(results.pose_landmarks)

            # Face landmarks (if any)
            if results.face_landmarks:
                frame_landmarks["face_landmarks"] = get_landmarks(results.face_landmarks)

            # Left hand landmarks (if any)
            if results.left_hand_landmarks:
                frame_landmarks["left_hand_landmarks"] = get_landmarks(results.left_hand_landmarks)

            # Right hand landmarks (if any)
            if results.right_hand_landmarks:
                frame_landmarks["right_hand_landmarks"] = get_landmarks(results.right_hand_landmarks)

            # Write the current frame's landmarks to the JSON file
            json.dump(frame_landmarks, f_landmarks)
            if frame_idx < frame_count - 1:
                f_landmarks.write(',\n')  # Add comma except after the last item

            # Increment frame index
            frame_idx += 1

            # Optional: Display progress
            if frame_idx % 100 == 0:
                print(f"Processed {frame_idx}/{frame_count} frames.")

        # Close the JSON array
        f_landmarks.write(']')
        f_landmarks.close()

    # Release resources
    cap.release()

    print(f"Holistic landmarks have been saved to {landmarks_output_path}")


In [None]:
# List of input video file paths
input_videos = [
    'TV-20241105-2024-1400.webxxl.h264.mp4'
    # Add more video file paths as needed
]

# Directory to save output videos and landmarks
output_dir = 'processed_videos'

# Ensure the output directory exists
import os
import time

os.makedirs(output_dir, exist_ok=True)

# Process each video
for input_video in input_videos:
    # Generate output file paths
    base_name = os.path.splitext(os.path.basename(input_video))[0]
    landmarks_output = os.path.join(output_dir, f"{base_name}_landmarks.json")

    # Start timer
    start_time = time.time()

    # Call the process_video function
    print(f"Processing {input_video}...")
    extract_landmarks(input_video, landmarks_output)
    print(f"Finished processing {input_video}")

    # Stop timer
    end_time = time.time()

    # Calculate and print elapsed time
    elapsed_time = end_time - start_time
    print(f"Time taken for {input_video}: {elapsed_time:.2f} seconds\n")

Processing TV-20241105-2024-1400.webxxl.h264.mp4...
Processed 100/24944 frames.
Processed 200/24944 frames.
Processed 300/24944 frames.
Processed 400/24944 frames.
Processed 500/24944 frames.
Processed 600/24944 frames.
Processed 700/24944 frames.
Processed 800/24944 frames.
Processed 900/24944 frames.
Processed 1000/24944 frames.
Processed 1100/24944 frames.
Processed 1200/24944 frames.
Processed 1300/24944 frames.
Processed 1400/24944 frames.
Processed 1500/24944 frames.
Processed 1600/24944 frames.
Processed 1700/24944 frames.
Processed 1800/24944 frames.
Processed 1900/24944 frames.
Processed 2000/24944 frames.
Processed 2100/24944 frames.
Processed 2200/24944 frames.
Processed 2300/24944 frames.
Processed 2400/24944 frames.
Processed 2500/24944 frames.
Processed 2600/24944 frames.
Processed 2700/24944 frames.
Processed 2800/24944 frames.
Processed 2900/24944 frames.
Processed 3000/24944 frames.
Processed 3100/24944 frames.
Processed 3200/24944 frames.
Processed 3300/24944 frames.
