In [1]:
import mediapipe as mp
from mediapipe.python.solutions.pose import PoseLandmark
from pprint import pprint
from PIL import Image
import pandas as pd
from mediapipe.tasks import python as mpy
from mediapipe.tasks.python import vision
from pathlib import Path
import numpy as np
import cv2
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

In [2]:
# print(PoseLandmark.__dict__)
# print(PoseLandmark._member_names_)

In [3]:
# download one from here:
# https://ai.google.dev/edge/mediapipe/solutions/vision/pose_landmarker/index#models
model_path = "/Users/sidsurakanti/projects/what-punch/pose_landmarker_full.task"
# model_path = "/"

BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = vision.PoseLandmarker
PoseLandmarkerOptions = vision.PoseLandmarkerOptions
running_mode = vision.RunningMode.IMAGE 
load_mp_img = mp.Image.create_from_file 

options = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=running_mode
)
detector = PoseLandmarker.create_from_options(options)

I0000 00:00:1750054690.970811  329114 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M3
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1750054691.010086  329275 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1750054691.018928  329275 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [4]:
def draw_landmarks_on_image(rgb_image, detection_result):
  pose_landmarks_list = detection_result.pose_landmarks
  annotated_image = np.copy(rgb_image)

  for idx in range(len(pose_landmarks_list)):
    pose_landmarks = pose_landmarks_list[idx]

    # Draw the pose landmarks.
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      pose_landmarks_proto,
      solutions.pose.POSE_CONNECTIONS,
      solutions.drawing_styles.get_default_pose_landmarks_style())

  return annotated_image

In [5]:
results = []
folder = Path.cwd() / "assets" / "test"
# folder = Path("assets/test")

for file in sorted(list(folder.glob("*.png"))):
    # img = Image.open(str(file))
    img = load_mp_img(str(file))
    # pprint(type(img).numpy_view.__doc__)
    # pprint(type(img).__doc__)
    
    landmarks = detector.detect(img)
    marks = landmarks.pose_world_landmarks[0] 
    results.append(marks)

    # pprint(type(marks[1]).__dict__)
    
    # print(img.numpy_view().shape)

    alpha_stripped = img.numpy_view()[..., :3]
    bgr_img = cv2.cvtColor(alpha_stripped, cv2.COLOR_RGB2BGR)
    annotated_image = draw_landmarks_on_image(bgr_img, landmarks)

    cv2.imshow("preview", annotated_image)
    key = cv2.waitKey(0)  # waits for key press
    if key == ord('q'): break
    cv2.destroyAllWindows()  # closes the window

W0000 00:00:1750054691.068633  329275 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


In [6]:
data = []
for res in results:
    data.append([[lm.x, lm.y, lm.z, lm.visibility] for idx, (name, lm) in enumerate(zip(PoseLandmark._member_names_, res))])

data = np.array(data)
idxs = pd.MultiIndex.from_product([np.arange(data.shape[0]), PoseLandmark._member_names_], names=["example", "landmark"])
df = pd.DataFrame(data.reshape(-1, 4), index=idxs, columns=["x", "y", "z", "visibility"])

In [7]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y,z,visibility
example,landmark,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,NOSE,0.105812,-0.604585,-0.103344,0.999987
0,LEFT_EYE_INNER,0.112420,-0.642191,-0.089312,0.999982
0,LEFT_EYE,0.112934,-0.642498,-0.088762,0.999977
0,LEFT_EYE_OUTER,0.112563,-0.643110,-0.088865,0.999981
0,RIGHT_EYE_INNER,0.081429,-0.642052,-0.094331,0.999977
...,...,...,...,...,...
12,RIGHT_ANKLE,-0.032893,0.456626,0.217147,0.008197
12,LEFT_HEEL,0.232361,0.534365,0.210826,0.027732
12,RIGHT_HEEL,-0.025365,0.478766,0.247267,0.012169
12,LEFT_FOOT_INDEX,0.227529,0.613419,0.178792,0.018753


In [8]:
data

array([[[ 1.05811864e-01, -6.04584515e-01, -1.03343725e-01,
          9.99986649e-01],
        [ 1.12420090e-01, -6.42191350e-01, -8.93120766e-02,
          9.99982357e-01],
        [ 1.12933926e-01, -6.42498493e-01, -8.87620449e-02,
          9.99977112e-01],
        ...,
        [-1.89780757e-01,  8.21635187e-01, -3.79500389e-02,
          7.54013717e-01],
        [ 1.58148631e-01,  8.63013327e-01, -4.90999222e-03,
          9.29346859e-01],
        [-1.57209530e-01,  8.82734537e-01, -1.52438641e-01,
          9.39227104e-01]],

       [[ 1.17746837e-01, -6.17418408e-01, -1.09086752e-01,
          9.99984503e-01],
        [ 1.12492070e-01, -6.56446576e-01, -9.32047367e-02,
          9.99984026e-01],
        [ 1.12820603e-01, -6.56820893e-01, -9.23182964e-02,
          9.99975920e-01],
        ...,
        [-1.71936706e-01,  8.04385543e-01, -5.37109375e-03,
          7.68085957e-01],
        [ 1.65397421e-01,  8.66701603e-01,  4.90942001e-02,
          9.41354990e-01],
        [-1.410