In [1]:
import cv2 #It does webcam capture and drawing
import mediapipe as mp #it is library for real tiime hand tracking
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2 #we are using it for structure the data for drawing
import numpy as np #we are using it for copying and manipulation of array

MARGIN = 10  
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) 

In [2]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_styles = mp.solutions.drawing_styles

def draw_landmarks_on_image(rgb_image, hand_landmarks_list, handedness_list): # it takes a image and hand landmark from mediapipe
    annotated_image = np.copy(rgb_image) #to copy the original image and preserve the original one
    height, width, _ = annotated_image.shape

    for idx in range(len(hand_landmarks_list)):
        hand_landmarks = hand_landmarks_list[idx] # get current hand 21 landmark points
        handedness = handedness_list[idx] #get label of current hand as left or right

        hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        hand_landmarks_proto.landmark.extend([
            landmark_pb2.NormalizedLandmark(
                x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks.landmark
        ])

        mp_drawing.draw_landmarks(
            annotated_image,
            hand_landmarks_proto,
            mp_hands.HAND_CONNECTIONS,
            mp_styles.get_default_hand_landmarks_style(),
            mp_styles.get_default_hand_connections_style())

        x_coords = [landmark.x for landmark in hand_landmarks.landmark] # we are getting pixel coordinates 
        y_coords = [landmark.y for landmark in hand_landmarks.landmark]
        text_x = int(min(x_coords) * width)
        text_y = int(min(y_coords) * height) - MARGIN

        label = handedness.classification[0].label  # 'Left' or 'Right'
        cv2.putText(annotated_image, label, (text_x, text_y),
                    cv2.FONT_HERSHEY_DUPLEX, FONT_SIZE, HANDEDNESS_TEXT_COLOR,
                    FONT_THICKNESS, cv2.LINE_AA)

    return annotated_image


In [5]:
cap = cv2.VideoCapture(0) #initializing camera

In [6]:
with mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands: #opening webcam
    fps = 20  # or use this fps = int(cap.get(cv2.CAP_PROP_FPS)) but as video in output was playing much fast so I mentioned fps
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_size = (width, height)
    output_path = 'C:\\Users\\Balaji\\GITHUB-ML\\Palm-Detection\\output.mp4'
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, frame_size)
    while cap.isOpened():          # so here we are in a loop which would last until we press esc
        success, frame = cap.read()
        if not success:
            print("Ignoring empty camera frame.")
            continue

        frame = cv2.flip(frame, 1) #mirroring your image for better understanding
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # convert opencv format to mediapipe format

        results = hands.process(rgb_frame) #runs the model

        if results.multi_hand_landmarks and results.multi_handedness:
            frame = draw_landmarks_on_image(
                frame, results.multi_hand_landmarks, results.multi_handedness)
        out.write(frame)
        cv2.imshow('Hand Detection', frame) #show updated frame in a window
        if cv2.waitKey(1) & 0xFF == 27:  # ESC to exit
            break

cap.release()
out.release() #to release output video
cv2.destroyAllWindows()