Install and Import Dependencies

In [1]:
%pip install mediapipe opencv-python

Collecting mediapipe
  Downloading mediapipe-0.10.14-cp312-cp312-macosx_11_0_universal2.whl.metadata (9.7 kB)
Collecting opencv-python
  Downloading opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl.metadata (20 kB)
Collecting absl-py (from mediapipe)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting attrs>=19.1.0 (from mediapipe)
  Downloading attrs-23.2.0-py3-none-any.whl.metadata (9.5 kB)
Collecting flatbuffers>=2.0 (from mediapipe)
  Downloading flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting jax (from mediapipe)
  Downloading jax-0.4.30-py3-none-any.whl.metadata (22 kB)
Collecting jaxlib (from mediapipe)
  Downloading jaxlib-0.4.30-cp312-cp312-macosx_11_0_arm64.whl.metadata (1.0 kB)
Collecting matplotlib (from mediapipe)
  Downloading matplotlib-3.9.1-cp312-cp312-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting opencv-contrib-python (from mediapipe)
  Downloading opencv_contrib_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl

In [5]:
import cv2
import mediapipe as mp
import numpy as np
mp_drawing = mp.solutions.drawing_utils #drawing utilities
mp_pose = mp.solutions.pose #pose estimation model

Matplotlib is building the font cache; this may take a moment.


In [11]:
cap = cv2.VideoCapture(0) #select the device
while cap.isOpened():
    ret, frame = cap.read() #get the frames
    cv2.imshow('Mediapipe Feed', frame) #popup video feed

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release() #release the device
cv2.destroyAllWindows() 

Make Detections

In [18]:
cap = cv2.VideoCapture(0)
#Setup mediapipe instance
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read() #get the frames

        #Recolor image (OpenCV -> Mediapipe)
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False #save memory

        #Make detection
        results = pose.process(image)

        #Recolor image back to BGR
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        #Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(57,255,20),thickness=2, circle_radius=2),
                                  mp_drawing.DrawingSpec(color=(245,66,230),thickness=2, circle_radius=2)
                                  )

        cv2.imshow('Mediapipe Feed', image) #popup video feed

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release() #release the device
    cv2.destroyAllWindows() 

I0000 00:00:1721230163.290138  107010 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M2
W0000 00:00:1721230163.375244  159496 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1721230163.381684  159496 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extract Joint Coordinates

![alt text](<Markdown/mediapipe landmarks.png>)

In [24]:
cap = cv2.VideoCapture(0)
#Setup mediapipe instance
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read() #get the frames

        #Recolor image (OpenCV -> Mediapipe)
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False #save memory

        #Make detection
        results = pose.process(image)

        #Recolor image back to BGR
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        #Extract landmarks
        try:
            landmarks = results.pose_landmarks.landmark
            print(landmarks)
        except:
            pass

        #Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(57,255,20),thickness=2, circle_radius=2),
                                  mp_drawing.DrawingSpec(color=(245,66,230),thickness=2, circle_radius=2)
                                  )

        cv2.imshow('Mediapipe Feed', image) #popup video feed

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release() #release the device
    cv2.destroyAllWindows() 

I0000 00:00:1721232999.152549  107010 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M2
W0000 00:00:1721232999.236952  184596 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1721232999.242456  184598 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


[x: 0.464915544
y: 0.409573734
z: -0.487283319
visibility: 0.999997377
, x: 0.477994919
y: 0.366514206
z: -0.438983917
visibility: 0.999997258
, x: 0.488025308
y: 0.369622469
z: -0.438830197
visibility: 0.999996066
, x: 0.49800235
y: 0.373993874
z: -0.438679963
visibility: 0.999995589
, x: 0.446925908
y: 0.362558603
z: -0.45481205
visibility: 0.999997377
, x: 0.434683293
y: 0.363934398
z: -0.454391688
visibility: 0.999996543
, x: 0.422526896
y: 0.367186666
z: -0.454555333
visibility: 0.999996901
, x: 0.506797194
y: 0.407548428
z: -0.203022093
visibility: 0.999994755
, x: 0.392753571
y: 0.412252843
z: -0.26536563
visibility: 0.99999845
, x: 0.482424647
y: 0.478841484
z: -0.405355513
visibility: 0.999994636
, x: 0.442766696
y: 0.473534107
z: -0.424106777
visibility: 0.999997
, x: 0.559409738
y: 0.748492181
z: -0.0468774177
visibility: 0.998172879
, x: 0.290641
y: 0.762168765
z: -0.228643939
visibility: 0.999356806
, x: 0.644220591
y: 1.06114125
z: -0.177248061
visibility: 0.326161534
, x

In [27]:
rwrist = [landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value].y]
relbow = [landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value].y]
rshoulder = [landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].x, landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].y]
rwrist

[0.7325011491775513, 0.8804816603660583]