In [79]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.tasks.python.vision import PoseLandmarkerResult
from mediapipe import solutions as mp_solutions
from mediapipe.framework.formats import landmark_pb2

import cv2 as cv

In [83]:
# Absolute Path to the Model
modelPath = '/home/matt/Documents/projects/swingAnalysis/swing-analysis-prototyping/models/pose_landmarker_full.task'

# Set up MediaPipe base options
BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Set up OpenCV Video Capture
cap = cv.VideoCapture('../data/videos_160/363.mp4')
fps = cap.get(cv.CAP_PROP_FPS)
print(cap.get(cv.CAP_PROP_FRAME_COUNT))
print(4 / cap.get(cv.CAP_PROP_FRAME_COUNT) * 46)
frame_index = 0

403.0
0.456575682382134


In [84]:
# Create a pose landmarker instance with video mode on
options = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=modelPath),
    running_mode=VisionRunningMode.VIDEO
)

In [85]:
# Run the Landmarker
with PoseLandmarker.create_from_options(options) as landmarker:
    while cap.isOpened():
        ret, frame = cap.read()
        # if frame is read correctly ret is True
        if not ret:
            print("Can't receive frame (stream end?). Exiting ...")
            break
            
        # Get the timestamp
        timestamp = int(1000 * frame_index / fps)
        frame_index += 1
        # Convert the CV image to MP
        mpImage = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

        # Get the poseresult
        pose_landmarker_result = landmarker.detect_for_video(mpImage, timestamp)
        leftShoulder = pose_landmarker_result.pose_landmarks[0][11]
        rightShoulder = pose_landmarker_result.pose_landmarks[0][12]
        print(f'Left Shoulder:\n\tX: {leftShoulder.x}\n\tY: {leftShoulder.y}\n\tZ: {leftShoulder.z}')
        print(f'Right Shoulder:\n\tX: {rightShoulder.x}\n\tY: {rightShoulder.y}\n\tZ: {rightShoulder.z}')
        
        # Display the frames
        annotatedFrame = frame.copy()

        if pose_landmarker_result.pose_landmarks:
            for landmarks in pose_landmarker_result.pose_landmarks:
                # Convert list of landmarks to NormalizedLandmarkList
                # New Task api is not supported by 
                landmark_list_proto = landmark_pb2.NormalizedLandmarkList()
                for lmk in landmarks:
                    landmark_list_proto.landmark.add(
                        x=lmk.x,
                        y=lmk.y,
                        z=lmk.z,
                        visibility=lmk.visibility,
                        presence=lmk.presence
                    )
                
                mp_solutions.drawing_utils.draw_landmarks(
                    image=annotatedFrame,
                    landmark_list=landmark_list_proto,
                    connections=mp_solutions.pose.POSE_CONNECTIONS,
                    landmark_drawing_spec=mp_solutions.drawing_utils.DrawingSpec(color=(0,255,0), thickness=2, circle_radius=2),
                    connection_drawing_spec=mp_solutions.drawing_utils.DrawingSpec(color=(0,0,255), thickness=2),
                )

        # Keep the frame in original size
        # cv.imshow('Pose Detection', annotatedFrame)
        # Resize the frame to be larger
        resized_frame = cv.resize(annotatedFrame, None, fx=3.0, fy=3.0, interpolation=cv.INTER_LINEAR)
        cv.imshow('Pose Detection', resized_frame)
        
        # Wait between each one
        if cv.waitKey(1) == ord('q'): # Change wait time to play video slower or faster
            break
        # input("press enter to continue") # Uncomment this line to have step by step functionality
        
    cap.release()
    cv.destroyAllWindows()

I0000 00:00:1750870843.794462    8343 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1750870843.796775  147413 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 25.0.3-1ubuntu2), renderer: zink Vulkan 1.4(NVIDIA GeForce RTX 4070 (NVIDIA_PROPRIETARY))
W0000 00:00:1750870843.856979  147419 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1750870843.883614  147422 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Left Shoulder:
	X: 0.5108174681663513
	Y: 0.3920901417732239
	Z: -0.4363493323326111
Right Shoulder:
	X: 0.3936229944229126
	Y: 0.41427478194236755
	Z: -0.48718708753585815
Left Shoulder:
	X: 0.5112444758415222
	Y: 0.3923051059246063
	Z: -0.4764864444732666
Right Shoulder:
	X: 0.39350637793540955
	Y: 0.4130624830722809
	Z: -0.49723634123802185
Left Shoulder:
	X: 0.5129883289337158
	Y: 0.39241141080856323
	Z: -0.49693727493286133
Right Shoulder:
	X: 0.3935401439666748
	Y: 0.4108943045139313
	Z: -0.5048427581787109
Left Shoulder:
	X: 0.5153527855873108
	Y: 0.3921906352043152
	Z: -0.5181496143341064
Right Shoulder:
	X: 0.3937768042087555
	Y: 0.4109823703765869
	Z: -0.5408245325088501
Left Shoulder:
	X: 0.5151299834251404
	Y: 0.39128801226615906
	Z: -0.4855574071407318
Right Shoulder:
	X: 0.39377540349960327
	Y: 0.40906739234924316
	Z: -0.5060061812400818
Left Shoulder:
	X: 0.515377938747406
	Y: 0.3893389403820038
	Z: -0.5056500434875488
Right Shoulder:
	X: 0.39399605989456177
	Y: 0.408377