https://docs.opencv.org/4.x/dd/d43/tutorial_py_video_display.html

# Setup
Import the necessary modules.

# Import Model
Import the external Hand Landmarker model asset by the path.
Then create an options object for the landmarker parameters by using BaseOptions object, `model_asset_path`, set the running mode to Live Stream and a hand landmarker instance with the livestream mode.

The resultListener must be called to set up a listener to receive results asynchronously.

Create the HandLandmarker object with the options passed as a parameter

# Load the Data
The input media must first be converted into a mediapipe.Image object.
For video or live stream, the frames need to be loaded as numpy arrays to be passed into the parameter.

# Running the Hand Landmarker Process
The Hand Landmarker preprocesses the data, detects hands in the image or frame, and detect the hand landmarks.
For livestream, the image data is sent to the Hand Landmarker and results are accessible via the `result_callback` provided in the `HandLandmarkerOptions` object.

# Displaying the results
The Hand Landmarker returns hand landmarker result object `HandLandmarkerResult` for each detection run, containing:
- hand landmark image coordinates
- hand landmark world coordinates
- left/right hand marker

Each of the 21 hand landmarks are in `(x, y, z)` coordinates.
 `x` and `y` are normalized as width, height: `[0.0, 1.0]`.
 `z` is the landmark depth from the origin (0, 0) which starts at the wrist. The magnitude is the closeness to the camera.
 
`result_callback`	Sets the result listener to receive the detection results asynchronously when the hand landmarker is in live stream mode. Only applicable when running mode is set to LIVE_STREAM. The results are only available in this callback.

As coordinates obtained from hand_landmarks are normalized [0,1], they need to be multiplied by the input image's original width and height to display on the correct coordinates on the output frame/image.


In [1]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2 as cv

In [2]:
model_path = 'hand_landmarker.task'

In [3]:
BaseOptions = mp.tasks.BaseOptions
HandLandmarkerOptions = vision.HandLandmarkerOptions
HandLandmarkerResult = vision.HandLandmarkerResult
VisionRunningMode = vision.RunningMode

In [4]:
options = HandLandmarkerOptions(base_options=BaseOptions(model_asset_path=model_path),
                                running_mode=VisionRunningMode.IMAGE, num_hands=2)
HandLandmarker = vision.HandLandmarker.create_from_options(options)

In [5]:
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np

MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

def print_result(result: HandLandmarkerResult, output_image: mp.Image, timestamp_ms: int):
    print('hand landmarker result: {}'.format(result))

def draw_landmarks(current_frame, landmarker_result):
  hand_landmarks_list = landmarker_result.hand_landmarks
  handedness_list = landmarker_result.handedness
  annotated_image = np.copy(current_frame)

  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]

    # Draw the hand landmarks.
    hand_landmarks_protocol = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_protocol.landmark.extend([
    # Normalize the coordinates
      landmark_pb2.NormalizedLandmark(
          x=landmark.x, 
          y=landmark.y, 
          z=landmark.z) for landmark in hand_landmarks ])
    
    # Draw the landmarks
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_protocol,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())

    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN
    
    # Draw bounding box
    xmin, xmax = int(min(x_coordinates) * width), int(max(x_coordinates) * width)
    ymin, ymax = int(min(y_coordinates) * height), int(max(y_coordinates) * height)
    boxW, boxH = xmax - xmin, ymax - ymin
    cv.rectangle(annotated_image, 
                 pt1=(xmin - 20, ymin - 20), pt2=(xmin + boxW + 20, ymin + boxH + 20), 
                 color=(0, 0, 255), thickness=2)
    
    # Draw handedness (left or right hand) on the image.
    cv.putText(annotated_image, f"{handedness[0].category_name}", (text_x, text_y), cv.FONT_HERSHEY_DUPLEX, FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv.LINE_AA)
               
  return annotated_image

In [None]:
# Create Video Capture Object
# Arg as the device index (0 or -1) or name of a video file
capture = cv.VideoCapture(0) 
if not capture.isOpened():
    print("Cannot open camera")
    exit()
    
while True:
    ret, current_frame = capture.read()
    #ret = cap.set(cv.CAP_PROP_FRAME_WIDTH,320)
    #ret = cap.set(cv.CAP_PROP_FRAME_HEIGHT,240)

    # if frame is read correctly ret is True
    if not ret: 
        print("Can't receive frame (stream end?). Exiting ...") 
        break
    current_frame = cv.flip(current_frame, 1)
    gray = cv.cvtColor(current_frame, cv.COLOR_BGR2GRAY)

    # Convert the frame received from OpenCV to a MediaPipe’s Image object.
    mp_frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=current_frame)
    
    HandLandmarkerResult = HandLandmarker.detect(mp_frame)
    annotated_frame = draw_landmarks(mp_frame.numpy_view(), HandLandmarkerResult)
    
    cv.imshow('annotated_frame', cv.cvtColor(annotated_frame, cv.COLOR_BGR2RGB))
    
    if cv.waitKey(1) == ord('q'): 
        break
        # When everything done, release the capture
        capture.release()
        cv.destroyAllWindows()