In [None]:
"""
Landmarks Definition: Landmarks are specific points identified on a hand. In MediaPipe's context, these are key points such as fingertips, knuckles, and the palm center.

Coordinates: Each landmark is represented by its (x, y, z) coordinates:
    - x and y: Coordinates within the image or frame, ranging from 0 to 1.0. They indicate the position in the frame.
    - z: Depth coordinate, indicating how far the landmark is from the camera plane. This is provided as a floating-point value.

Detection and Tracking: MediaPipe uses machine learning models to detect and track these landmarks in real-time. It leverages deep learning techniques to accurately identify the positions of these points across frames.
"""

import cv2
import mediapipe as mp

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False,
                       max_num_hands=2,
                       min_detection_confidence=0.5,
                       min_tracking_confidence=0.5)

# Initialize MediaPipe Drawing
mp_drawing = mp.solutions.drawing_utils

# Set the video source: 0 for webcam, or provide a video file path
video_source = "C:/Users/MSI/Desktop/Untitled video.mp4" # Use 0 for webcam, or provide a path to a video file
#video_source = "F:/gopro/cutter/0881/GOPR0881.MP4"

cap = cv2.VideoCapture(video_source)

if not cap.isOpened():
    print(f"Error: Could not open video source: {video_source}")
    exit()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to capture frame from video source.")
        break

    # Convert the BGR image to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame with MediaPipe Hands
    result = hands.process(rgb_frame)

    # Draw hand landmarks
    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Print landmark coordinates
            for idx, landmark in enumerate(hand_landmarks.landmark):
                # Get landmark coordinates
                landmark_x = int(landmark.x * frame.shape[1])
                landmark_y = int(landmark.y * frame.shape[0])
                landmark_z = landmark.z  # Z-coordinate (depth)

                # Print coordinates of each landmark
                print(f"Landmark {idx}: ({landmark_x}, {landmark_y}, {landmark_z})")

                # Draw circles on the landmarks (optional)
                cv2.circle(frame, (landmark_x, landmark_y), 5, (255, 0, 0), -1)

            # Draw hand landmarks on the frame
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Display the frame
    cv2.imshow('Hand Detection', frame)

    # Exit on 'q' key press or window close
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q') or cv2.getWindowProperty('Hand Detection', cv2.WND_PROP_VISIBLE) < 1:
        break

# Release resources
cap.release()
cv2.destroyAllWindows()
hands.close()

In [4]:
import cv2
import mediapipe as mp
import datetime

def process_video(video_source, save_video, save_landmarks, output_video_path=None, output_landmarks_path=None):
    # Initialize MediaPipe Hands
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(static_image_mode=False,
                           max_num_hands=2,
                           min_detection_confidence=0.5,
                           min_tracking_confidence=0.5)

    # Initialize MediaPipe Drawing
    mp_drawing = mp.solutions.drawing_utils

    # Get the current timestamp for default file naming
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

    # Define default output video file path if not provided
    if output_video_path is None:
        output_video_path = f"output_video_{timestamp}.avi"

    # Define default output text file path for landmarks if not provided
    if output_landmarks_path is None:
        output_landmarks_path = f"extracted_landmarks_{timestamp}.txt"

    cap = cv2.VideoCapture(video_source)

    if not cap.isOpened():
        print(f"Error: Could not open video source: {video_source}")
        return

    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Define the codec and create VideoWriter object
    if save_video:
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    # Open the landmarks text file
    if save_landmarks:
        landmark_file = open(output_landmarks_path, 'w')

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture frame from video source.")
            break

        # Convert the BGR image to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame with MediaPipe Hands
        result = hands.process(rgb_frame)

        # Draw hand landmarks
        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                # Print and save landmark coordinates
                for idx, landmark in enumerate(hand_landmarks.landmark):
                    # Get landmark coordinates
                    landmark_x = int(landmark.x * frame.shape[1])
                    landmark_y = int(landmark.y * frame.shape[0])
                    landmark_z = landmark.z  # Z-coordinate (depth)

                    # Print coordinates of each landmark
                    print(f"Landmark {idx}: ({landmark_x}, {landmark_y}, {landmark_z})")

                    # Save landmarks to file
                    if save_landmarks:
                        landmark_file.write(f"Landmark {idx}: ({landmark_x}, {landmark_y}, {landmark_z})\n")

                    # Draw circles on the landmarks (optional)
                    cv2.circle(frame, (landmark_x, landmark_y), 5, (255, 0, 0), -1)

                # Draw hand landmarks on the frame
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

        # Display the frame
        cv2.imshow('Hand Detection', frame)

        # Write the frame to the output video file
        if save_video:
            out.write(frame)

        # Exit on 'q' key press or window close
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q') or cv2.getWindowProperty('Hand Detection', cv2.WND_PROP_VISIBLE) < 1:
            break

    # Release resources
    cap.release()
    if save_video:
        out.release()
    cv2.destroyAllWindows()
    hands.close()

    # Close the landmarks text file
    if save_landmarks:
        landmark_file.close()

def main():
    # Set the video source: 0 for webcam, or provide a video file path
    #video_source = 0 # Use 0 for webcam, or provide a path to a video file
    video_source = "C:/Users/MSI/Desktop/Untitled video.mp4" # Use 0 for webcam, or provide a path to a video file


    # Set whether to save the resulting video and landmarks
    save_video = True
    save_landmarks = True

    # Specify custom paths for saving video and landmarks (optional)
    output_video_path = "C:/Users/MSI/Desktop/custom_output_video_path.mp4"
    output_landmarks_path = "C:/Users/MSI/Desktop/custom_extracted_landmarks_path.txt"

    process_video(video_source, save_video, save_landmarks, output_video_path, output_landmarks_path)

if __name__ == '__main__':
    main()


Landmark 0: (1433, 1046, -2.4974207235572976e-07)
Landmark 1: (1291, 1037, -0.025312824174761772)
Landmark 2: (1180, 951, -0.03277086839079857)
Landmark 3: (1116, 848, -0.03357819840312004)
Landmark 4: (1071, 764, -0.02988537959754467)
Landmark 5: (1287, 823, -0.02748826891183853)
Landmark 6: (1184, 714, -0.030658649280667305)
Landmark 7: (1123, 683, -0.029132064431905746)
Landmark 8: (1084, 676, -0.026874318718910217)
Landmark 9: (1359, 780, -0.015950612723827362)
Landmark 10: (1264, 660, -0.019424358382821083)
Landmark 11: (1198, 620, -0.018731534481048584)
Landmark 12: (1148, 612, -0.01798631064593792)
Landmark 13: (1405, 761, -0.0055629597045481205)
Landmark 14: (1315, 662, -0.01093394123017788)
Landmark 15: (1252, 629, -0.011916576884686947)
Landmark 16: (1203, 626, -0.011618337593972683)
Landmark 17: (1430, 757, 0.0034588156268000603)
Landmark 18: (1361, 680, -0.0023886386770755053)
Landmark 19: (1307, 656, -0.003379767294973135)
Landmark 20: (1263, 656, -0.0024922383017838)
Land