In [None]:
"""
Landmarks Definition: Landmarks are specific points identified on a hand. In MediaPipe's context, these are key points such as fingertips, knuckles, and the palm center.

Coordinates: Each landmark is represented by its (x, y, z) coordinates:
    - x and y: Coordinates within the image or frame, ranging from 0 to 1.0. They indicate the position in the frame.
    - z: Depth coordinate, indicating how far the landmark is from the camera plane. This is provided as a floating-point value.

Detection and Tracking: MediaPipe uses machine learning models to detect and track these landmarks in real-time. It leverages deep learning techniques to accurately identify the positions of these points across frames.
"""

import cv2
import mediapipe as mp

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False,
                       max_num_hands=2,
                       min_detection_confidence=0.5,
                       min_tracking_confidence=0.5)

# Initialize MediaPipe Drawing
mp_drawing = mp.solutions.drawing_utils

# Set the video source: 0 for webcam, or provide a video file path
#video_source = "C:/Users/MSI/Desktop/Untitled video.mp4" # Use 0 for webcam, or provide a path to a video file
#video_source = "F:/gopro/cutter/0881/GOPR0881.MP4"

cap = cv2.VideoCapture(video_source)

if not cap.isOpened():
    print(f"Error: Could not open video source: {video_source}")
    exit()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to capture frame from video source.")
        break

    # Convert the BGR image to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame with MediaPipe Hands
    result = hands.process(rgb_frame)

    # Draw hand landmarks
    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Print landmark coordinates
            for idx, landmark in enumerate(hand_landmarks.landmark):
                # Get landmark coordinates
                landmark_x = int(landmark.x * frame.shape[1])
                landmark_y = int(landmark.y * frame.shape[0])
                landmark_z = landmark.z  # Z-coordinate (depth)

                # Print coordinates of each landmark
                print(f"Landmark {idx}: ({landmark_x}, {landmark_y}, {landmark_z})")

                # Draw circles on the landmarks (optional)
                cv2.circle(frame, (landmark_x, landmark_y), 5, (255, 0, 0), -1)

            # Draw hand landmarks on the frame
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Display the frame
    cv2.imshow('Hand Detection', frame)

    # Exit on 'q' key press or window close
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q') or cv2.getWindowProperty('Hand Detection', cv2.WND_PROP_VISIBLE) < 1:
        break

# Release resources
cap.release()
cv2.destroyAllWindows()
hands.close()

In [13]:
# real-time hand detection using rtsp stream with mediapipe 

import cv2
import mediapipe as mp
import time

# Replace with your RTSP stream URL
rtsp_url = "rtsp://7vhK82:RiubbNyHyi9O@192.168.1.43:554/live/ch1"

# Initialize MediaPipe hands and drawing utilities
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Connect to the RTSP stream
cap = cv2.VideoCapture(rtsp_url)

if not cap.isOpened():
    print("Error: Unable to open video stream")
    exit()

# Initialize variables for FPS calculation
prev_frame_time = 0
new_frame_time = 0

with mp_hands.Hands(
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7) as hands:
    
    while cap.isOpened():
        ret, image = cap.read()
        if not ret:
            print("Error: Unable to read frame")
            break

        # Calculate FPS
        new_frame_time = time.time()
        fps = 1 / (new_frame_time - prev_frame_time)
        prev_frame_time = new_frame_time

        # Convert the FPS to an integer
        fps = int(fps)

        # Convert the frame rate to a string
        fps_text = "FPS: " + str(fps)

        # Flip the image horizontally for a later selfie-view display
        # Convert the BGR image to RGB.
        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
        # To improve performance, optionally mark the image as not writeable to pass by reference.
        image.flags.writeable = False
        results = hands.process(image)

        # Draw the hand annotations on the image.
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

        # Display the frame rate on the image
        cv2.putText(image, fps_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "feed: 01", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        # Display the frame
        cv2.imshow('RTSP Stream with MediaPipe Hands and FPS', image)

        # Press 'q' to exit the loop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()
