In [1]:
"""
Landmarks Definition: Landmarks are specific points identified on a hand. In MediaPipe's context, these are key points such as fingertips, knuckles, and the palm center.

Coordinates: Each landmark is represented by its (x, y, z) coordinates:
    - x and y: Coordinates within the image or frame, ranging from 0 to 1.0. They indicate the position in the frame.
    - z: Depth coordinate, indicating how far the landmark is from the camera plane. This is provided as a floating-point value.

Detection and Tracking: MediaPipe uses machine learning models to detect and track these landmarks in real-time. It leverages deep learning techniques to accurately identify the positions of these points across frames.
"""

import cv2
import mediapipe as mp
import os

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False,
                       max_num_hands=10,
                       min_detection_confidence=0.5,
                       min_tracking_confidence=0.5)

# Initialize MediaPipe Drawing
mp_drawing = mp.solutions.drawing_utils

# Set the video source: 0 for webcam, or provide a video file path
#video_source = "C:/Users/MSI/Desktop/chrome_F1vsSREpWC.mp4"
video_source = 0

cap = cv2.VideoCapture(video_source)

if not cap.isOpened():
    print(f"Error: Could not open video source: {video_source}")
    exit()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to capture frame from video source.")
        break

    # Convert the BGR image to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame with MediaPipe Hands
    result = hands.process(rgb_frame)

    # Draw hand landmarks
    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Print landmark coordinates
            for idx, landmark in enumerate(hand_landmarks.landmark):
                # Get landmark coordinates
                landmark_x = int(landmark.x * frame.shape[1])
                landmark_y = int(landmark.y * frame.shape[0])
                landmark_z = landmark.z  # Z-coordinate (depth)

                # Print coordinates of each landmark
                print(f"Landmark {idx}: ({landmark_x}, {landmark_y}, {landmark_z})")

                # Check the video source and obtain the file name
                if video_source == 0:
                    file_name = "webcam"
                else:
                    file_name = video_source.split("/")[-1].split(".")[0]

                # Save landmark coordinates as csv file
                # Overwrites the file if it already exists

                if not os.path.exists('hand_landmarks_' + file_name +'.csv'):
                   with open('hand_landmarks_' + file_name +'.csv', 'w') as f:
                    f.write(f"Landmark_{idx}, {landmark_x},{landmark_y},{landmark_z}\n")

                # Draw circles on the landmarks (optional)
                cv2.circle(frame, (landmark_x, landmark_y), 5, (255, 0, 0), -1)

            # Draw hand landmarks on the frame
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Display the frame
    cv2.imshow('Hand Detection', frame)

    # Exit on 'q' key press or window close
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q') or cv2.getWindowProperty('Hand Detection', cv2.WND_PROP_VISIBLE) < 1:
        break

# Release resources
cap.release()
cv2.destroyAllWindows()
hands.close()






Landmark 0: (620, 300, 1.0572372275419184e-06)
Landmark 1: (565, 333, -0.059618327766656876)
Landmark 2: (492, 329, -0.09728936851024628)
Landmark 3: (433, 314, -0.1253216713666916)
Landmark 4: (378, 306, -0.15625442564487457)
Landmark 5: (473, 234, -0.10648470371961594)
Landmark 6: (408, 192, -0.1624479591846466)
Landmark 7: (362, 166, -0.2005109190940857)
Landmark 8: (323, 144, -0.22793227434158325)
Landmark 9: (507, 195, -0.112321637570858)
Landmark 10: (444, 133, -0.16594122350215912)
Landmark 11: (396, 94, -0.20633405447006226)
Landmark 12: (353, 62, -0.23521625995635986)
Landmark 13: (551, 174, -0.12148717045783997)
Landmark 14: (494, 111, -0.181052103638649)
Landmark 15: (455, 71, -0.21939916908740997)
Landmark 16: (418, 35, -0.2444140762090683)
Landmark 17: (602, 167, -0.13238690793514252)
Landmark 18: (581, 107, -0.19354282319545746)
Landmark 19: (566, 67, -0.22123689949512482)
Landmark 20: (549, 33, -0.23774133622646332)
Landmark 0: (601, 279, 8.355112868230208e-07)
Landmark 

##### real time hand detection using rtsp stream with mediapipe


In [None]:
# real-time hand detection using rtsp stream with mediapipe 
# single feed

import cv2
import mediapipe as mp
import time

# Replace with your RTSP stream URL
rtsp_url = "rtsp://4kkzxW:hDneHFEeidTc@192.168.1.123:554/live/ch1"
#rtsp_url= "rtsp://TK1Xnf:LbAiQiGLPvRd@192.168.1.174:554/live/ch1"
#rtsp_url="rtsp://UmZF6h:atAIz1ecLgC8@192.168.1.127:554/live/ch1"


# Initialize MediaPipe hands and drawing utilities
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Connect to the RTSP stream
cap = cv2.VideoCapture(rtsp_url)

if not cap.isOpened():
    print("Error: Unable to open video stream")
    exit()

# Initialize variables for FPS calculation
prev_frame_time = 0
new_frame_time = 0

with mp_hands.Hands(
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7) as hands:
    
    while cap.isOpened():
        ret, image = cap.read()
        if not ret:
            print("Error: Unable to read frame")
            break

        # Calculate FPS
        new_frame_time = time.time()
        fps = 1 / (new_frame_time - prev_frame_time)
        prev_frame_time = new_frame_time

        # Convert the FPS to an integer
        fps = int(fps)

        # Convert the frame rate to a string
        fps_text = "FPS: " + str(fps)

        # Flip the image horizontally for a later selfie-view display
        # Convert the BGR image to RGB.
        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
        # To improve performance, optionally mark the image as not writeable to pass by reference.
        image.flags.writeable = False
        results = hands.process(image)

        # Draw the hand annotations on the image.
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

        # Display the frame rate on the image
        cv2.putText(image, fps_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.putText(image, "feed: 01", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        # Display the frame
        cv2.imshow('RTSP Stream with MediaPipe Hands and FPS', image)

        # Press 'q' to exit the loop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


: 

multi feed stream with cli and recording option with start and end time

to run: 
python hand_detection_rtsp.py --streams "1:rtsp://your_rtsp_url1,2:rtsp://your_rtsp_url2" --start "10:00 AM" --end "03:00 PM"


In [1]:
import cv2
import mediapipe as mp
import time
from datetime import datetime
import argparse

# Define function to convert AM/PM times to seconds since midnight
def time_to_seconds(time_str):
    dt = datetime.strptime(time_str, "%I:%M %p")
    return dt.hour * 3600 + dt.minute * 60

# Define function to parse arguments
def parse_arguments():
    parser = argparse.ArgumentParser(description="Real-time hand detection using RTSP streams with MediaPipe.")
    parser.add_argument('--streams', type=str, required=True, 
                        help="Comma-separated list of RTSP stream URLs in the format 'feed_number:url'.")
    parser.add_argument('--start', type=str, default=None, 
                        help="Start time for recording in AM/PM format (e.g., '10:00 AM').")
    parser.add_argument('--end', type=str, default=None, 
                        help="End time for recording in AM/PM format (e.g., '03:00 PM').")
    return parser.parse_args()

# Parse command line arguments
args = parse_arguments()

# Convert stream URLs from arguments
rtsp_streams = {}
for stream in args.streams.split(','):
    key, url = stream.split(':')
    rtsp_streams[key] = url

# Convert start and end times to seconds
start_time_seconds = time_to_seconds(args.start) if args.start else None
end_time_seconds = time_to_seconds(args.end) if args.end else None

# Initialize MediaPipe hands and drawing utilities
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Connect to the RTSP streams
caps = {key: cv2.VideoCapture(url) for key, url in rtsp_streams.items()}

if not all(cap.isOpened() for cap in caps.values()):
    print("Error: Unable to open one or more video streams")
    exit()

# Initialize variables for FPS calculation
prev_frame_time = 0
new_frame_time = 0

# Initialize video writers
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out_videos = {}

# Flags for recording
is_recording = False
record_start_time = None

with mp_hands.Hands(
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7) as hands:
    
    start_program_time = time.time()
    while all(cap.isOpened() for cap in caps.values()):
        frames = {}
        for key, cap in caps.items():
            ret, frame = cap.read()
            if not ret:
                print(f"Error: Unable to read frame from feed {key}")
                break
            frames[key] = frame
        
        if len(frames) != len(caps):
            break

        # Calculate FPS
        new_frame_time = time.time()
        fps = 1 / (new_frame_time - prev_frame_time)
        prev_frame_time = new_frame_time

        # Convert the FPS to an integer
        fps = int(fps)

        # Convert the frame rate to a string
        fps_text = "FPS: " + str(fps)

        processed_frames = []
        for key, image in frames.items():
            # Process the image
            image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
            image.flags.writeable = False
            results = hands.process(image)

            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            cv2.putText(image, fps_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
            cv2.putText(image, f"feed: {key}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

            processed_frames.append(image)

            # Write the frame to the video file if recording
            if is_recording:
                if key not in out_videos:
                    out_videos[key] = cv2.VideoWriter(f'feed_{key}.avi', fourcc, 20.0, (image.shape[1], image.shape[0]))
                out_videos[key].write(image)

        # Concatenate images horizontally
        combined_image = cv2.hconcat(processed_frames)

        # Display the combined frame
        cv2.imshow('RTSP Streams with MediaPipe Hands and FPS', combined_image)

        # Handle key events
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break
        elif key == ord('r'):
            is_recording = not is_recording
            if is_recording:
                record_start_time = time.time()
                print("Recording started")
            else:
                for out in out_videos.values():
                    out.release()
                out_videos.clear()
                print("Recording stopped")
                record_start_time = None

        # Automatic start and stop based on provided start and end times
        current_time = time.time() - start_program_time
        if start_time_seconds is not None and end_time_seconds is not None:
            if current_time >= start_time_seconds and current_time < end_time_seconds and not is_recording:
                is_recording = True
                record_start_time = time.time()
                print("Recording started automatically at start time")
            elif current_time >= end_time_seconds and is_recording:
                is_recording = False
                for out in out_videos.values():
                    out.release()
                out_videos.clear()
                print("Recording stopped automatically at end time")
                record_start_time = None

# Release all resources
for cap in caps.values():
    cap.release()
for out in out_videos.values():
    out.release()
cv2.destroyAllWindows()





usage: ipykernel_launcher.py [-h] --streams STREAMS [--start START]
                             [--end END]
ipykernel_launcher.py: error: the following arguments are required: --streams


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


without argeparse

In [2]:
import cv2
import mediapipe as mp
import time
import numpy as np

# Define dictionary with multiple RTSP stream URLs
rtsp_streams = {
    'feed1': 'rtsp://UmZF6h:atAIz1ecLgC8@192.168.1.127:554/live/ch1',
    'feed2': 'rtsp://TK1Xnf:LbAiQiGLPvRd@192.168.1.174:554/live/ch1',
    'feed3': 'rtsp://4kkzxW:hDneHFEeidTc@192.168.1.123:554/live/ch1'
}

# Initialize MediaPipe hands and drawing utilities
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Connect to the RTSP streams
caps = {key: cv2.VideoCapture(url) for key, url in rtsp_streams.items()}

if not all(cap.isOpened() for cap in caps.values()):
    print("Error: Unable to open one or more video streams")
    exit()

# Initialize variables for FPS calculation
prev_frame_time = 0
new_frame_time = 0

with mp_hands.Hands(
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7) as hands:
    
    while all(cap.isOpened() for cap in caps.values()):
        frames = {}
        for key, cap in caps.items():
            ret, frame = cap.read()
            if not ret:
                print(f"Error: Unable to read frame from feed {key}")
                break
            frames[key] = frame
        
        if len(frames) != len(caps):
            break

        # Calculate FPS
        new_frame_time = time.time()
        fps = 1 / (new_frame_time - prev_frame_time)
        prev_frame_time = new_frame_time

        # Convert the FPS to an integer
        fps = int(fps)

        # Convert the frame rate to a string
        fps_text = "FPS: " + str(fps)

        processed_frames = []
        for key, image in frames.items():
            # Process the image
            image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
            image.flags.writeable = False
            results = hands.process(image)

            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    mp_drawing.draw_landmarks(
                        image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            cv2.putText(image, fps_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
            cv2.putText(image, f"feed: {key}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

            processed_frames.append(image)

        # Ensure we have exactly 4 frames for the 2x2 grid
        blank_frame = np.zeros_like(processed_frames[0])
        while len(processed_frames) < 4:
            processed_frames.append(blank_frame)

        # Arrange frames in a 2x2 grid
        top_row = cv2.hconcat([processed_frames[0], processed_frames[1]])
        bottom_row = cv2.hconcat([processed_frames[2], processed_frames[3]])
        combined_image = cv2.vconcat([top_row, bottom_row])

        # Display the combined frame
        cv2.imshow('RTSP Streams with MediaPipe Hands and FPS', combined_image)

        # Handle key events
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break

# Release all resources
for cap in caps.values():
    cap.release()
cv2.destroyAllWindows()
