In [1]:
import cv2
import mediapipe as mp
from pose_format.pose import Pose


# Initialiser MediaPipe Holistic
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Indices des landmarks à extraire (yeux, sourcils, bouche, contour du visage)
FACE_LANDMARKS = {
    "left_eye": [33, 133, 160, 159, 158, 157, 173, 144, 145, 153, 154, 155],  # Oeil gauche
    "right_eye": [362, 263, 387, 386, 385, 384, 373, 380, 381, 382, 383, 362],  # Oeil droit
    "left_eyebrow": [70, 63, 105, 66, 107, 55, 46, 53, 52],  # Sourcil gauche
    "right_eyebrow": [336, 296, 334, 293, 300, 285, 276, 283, 282],  # Sourcil droit
    "mouth": [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 415, 375, 321, 405, 314, 17, 84, 181, 91, 146],  # Bouche
    "face_contour": [10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288, 397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136, 172, 58, 132, 93, 234, 127, 162, 21, 54, 103, 67, 109, 10]  # Contour du visage complété
}


# Fonction pour extraire les landmarks
def extract_landmarks(results):
    landmarks = {
        "face": {},
        "left_hand": [],
        "right_hand": [],
        "body": []
    }

    # Extraire les landmarks du visage (yeux, sourcils, bouche, contour du visage)
    if results.face_landmarks:
        for key, indices in FACE_LANDMARKS.items():
            landmarks["face"][key] = [(results.face_landmarks.landmark[i].x,
                                       results.face_landmarks.landmark[i].y, results.face_landmarks.landmark[i].z) for i in indices]

    # Extraire les landmarks des mains
    if results.left_hand_landmarks:
        landmarks["left_hand"] = [(lm.x, lm.y, lm.z) for lm in results.left_hand_landmarks.landmark]

    if results.right_hand_landmarks:
        landmarks["right_hand"] = [(lm.x, lm.y, lm.z) for lm in results.right_hand_landmarks.landmark]

    ignore_list = [8, 6, 5, 4, 1, 2, 3, 7, 0, 10, 9]
    # Extraire les landmarks du corps (pose) en ignorant les indices dans ignore_list
    if results.pose_landmarks:
        landmarks["body"] = [
            (lm.x, lm.y, lm.z) for idx, lm in enumerate(results.pose_landmarks.landmark) if idx not in ignore_list
        ]

    return landmarks

In [16]:
# Fonction principale pour capturer les landmarks à partir d'un fichier vidéo
def get_landmarks_from_video(video_path):
    # Ouvrir le fichier vidéo
    cap = cv2.VideoCapture(video_path)
    landmarks_list = []

    # Initialiser l'objet Holistic de MediaPipe
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        while cap.isOpened():
            ret, frame = cap.read()

            if not ret:
                break

            # Convertir l'image en RGB car MediaPipe utilise des images RGB
            image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Traiter l'image pour obtenir les landmarks
            results = holistic.process(image_rgb)

            # Revenir à l'image en BGR pour l'affichage avec OpenCV
            image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)

            # Extraire les landmarks
            landmarks = extract_landmarks(results)
            landmarks_list.append(landmarks)

            # Afficher les landmarks du visage simplifiés
            for key, points in landmarks["face"].items():
                for point in points:
                    x = int(point[0] * frame.shape[1])
                    y = int(point[1] * frame.shape[0])
                    cv2.circle(image_bgr, (x, y), 3, (0, 255, 0), -1)

            # Afficher les landmarks du corps simplifiés
            for point in landmarks["body"]:
                x = int(point[0] * frame.shape[1])
                y = int(point[1] * frame.shape[0])
                cv2.circle(image_bgr, (x, y), 3, (0, 255, 0), -1)

            # Afficher les landmarks des mains simplifiés
            for point in landmarks["left_hand"]:
                x = int(point[0] * frame.shape[1])
                y = int(point[1] * frame.shape[0])
                cv2.circle(image_bgr, (x, y), 3, (0, 255, 0), -1)

            for point in landmarks["right_hand"]:
                x = int(point[0] * frame.shape[1])
                y = int(point[1] * frame.shape[0])
                cv2.circle(image_bgr, (x, y), 3, (0, 255, 0), -1)

            # Afficher l'image avec les landmarks
            #cv2.imshow('Holistic Landmarks', image_bgr)
            if len(landmarks["face"]) > 0 and len(landmarks["left_hand"]) > 0 and len(landmarks["right_hand"]) > 0:
                return landmarks

            # Quitter si la touche 'q' est pressée
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break



    # Libérer la capture et fermer les fenêtres
    cap.release()
    cv2.destroyAllWindows()

In [17]:
video_path = "tmp/bonjour-3.mp4"
landmarks = get_landmarks_from_video(video_path)

I0000 00:00:1728303929.868539       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2


In [18]:
landmarks

{'face': {'left_eye': [(0.39507997035980225,
    0.20099063217639923,
    0.009953289292752743),
   (0.4187898635864258, 0.20390605926513672, 0.004851043689996004),
   (0.40092232823371887, 0.19578634202480316, 0.003767622634768486),
   (0.405447393655777, 0.1947542130947113, 0.0023312768898904324),
   (0.40989160537719727, 0.19550181925296783, 0.0020950527396053076),
   (0.4142932891845703, 0.1979026198387146, 0.0029825225938111544),
   (0.41744181513786316, 0.20140409469604492, 0.004076726734638214),
   (0.4017183482646942, 0.20510049164295197, 0.0047659315168857574),
   (0.40618324279785156, 0.20608803629875183, 0.0033496259711682796),
   (0.41062042117118835, 0.20603224635124207, 0.0030661262571811676),
   (0.41481465101242065, 0.20525066554546356, 0.003745835740119219),
   (0.4174831509590149, 0.20468290150165558, 0.004813331179320812)],
  'right_eye': [(0.4486438035964966, 0.20547308027744293, 0.00449479790404439),
   (0.4727729260921478, 0.2050374150276184, 0.009081163443624973)

In [1]:
from pose_format import Pose

with open("essen.pose", "rb") as f:
    pose = Pose.read(f.read())

In [13]:
pose.body.data

masked_array(
  data=[[[[414.33599853515625, 233.2265625, -0.4097636342048645],
          [230.30616760253906, 233.8053741455078, -0.3740450441837311],
          [431.9736022949219, 383.7851257324219, -0.3131425976753235],
          ...,
          [--, --, --],
          [--, --, --],
          [--, --, --]]],


        [[[414.21270751953125, 232.62646484375, -0.40085864067077637],
          [230.28883361816406, 233.11093139648438, -0.3839576840400696],
          [431.9697265625, 378.9504699707031, -0.283613920211792],
          ...,
          [--, --, --],
          [--, --, --],
          [--, --, --]]],


        [[[414.07916259765625, 232.41770935058594,
           -0.40022173523902893],
          [230.24319458007812, 232.72247314453125,
           -0.39005860686302185],
          [431.9488220214844, 377.28717041015625, -0.27927592396736145],
          ...,
          [--, --, --],
          [--, --, --],
          [--, --, --]]],


        ...,


        [[[408.9756774902344, 228.2

In [2]:
for c in pose.header.components:
    print(c.name, c.limbs)
    print("---------------------------------------------------------------")

POSE_LANDMARKS [(3, 5), (0, 6), (1, 7), (6, 7), (0, 2), (1, 3), (0, 1), (2, 4)]
---------------------------------------------------------------
FACE_LANDMARKS [(34, 18), (29, 32), (6, 56), (63, 10), (52, 51), (32, 23), (19, 15), (79, 105), (35, 19), (107, 108), (127, 69), (77, 97), (80, 73), (2, 36), (0, 8), (35, 15), (82, 97), (122, 90), (18, 22), (37, 65), (14, 6), (74, 80), (42, 41), (54, 53), (87, 86), (110, 111), (12, 18), (1, 7), (34, 12), (105, 92), (89, 5), (43, 17), (123, 92), (124, 79), (23, 64), (66, 55), (81, 74), (40, 46), (8, 0), (97, 82), (80, 96), (49, 48), (12, 34), (67, 118), (73, 84), (3, 88), (9, 10), (18, 34), (75, 80), (92, 123), (31, 38), (59, 39), (11, 22), (36, 21), (20, 34), (62, 30), (63, 17), (48, 42), (100, 93), (13, 18), (70, 0), (126, 99), (96, 82), (77, 81), (69, 67), (12, 19), (64, 24), (82, 74), (72, 124), (5, 89), (18, 12), (24, 25), (25, 26), (112, 113), (89, 123), (15, 19), (124, 72), (97, 77), (20, 12), (71, 70), (35, 20), (46, 45), (116, 127), (75

In [5]:
try:
    import mediapipe as mp
except ImportError:
    raise ImportError("Please install mediapipe with: pip install mediapipe")

mp_holistic = mp.solutions.holistic
BODY_LIMBS = [(int(a), int(b)) for a, b in mp_holistic.POSE_CONNECTIONS]
print(BODY_LIMBS)

[(15, 21), (16, 20), (18, 20), (3, 7), (14, 16), (23, 25), (28, 30), (11, 23), (27, 31), (6, 8), (15, 17), (24, 26), (16, 22), (4, 5), (5, 6), (29, 31), (12, 24), (23, 24), (0, 1), (9, 10), (1, 2), (0, 4), (11, 13), (30, 32), (28, 32), (15, 19), (16, 18), (25, 27), (26, 28), (12, 14), (17, 19), (2, 3), (11, 12), (27, 29), (13, 15)]


In [8]:
HAND_POINTS = mp_holistic.HandLandmark._member_names_
HAND_LIMBS = [(int(a), int(b)) for a, b in mp_holistic.HAND_CONNECTIONS]
HAND_LIMBS

[(3, 4),
 (0, 5),
 (17, 18),
 (0, 17),
 (13, 14),
 (13, 17),
 (18, 19),
 (5, 6),
 (5, 9),
 (14, 15),
 (0, 1),
 (9, 10),
 (1, 2),
 (9, 13),
 (10, 11),
 (19, 20),
 (6, 7),
 (15, 16),
 (2, 3),
 (11, 12),
 (7, 8)]

In [3]:
for c in pose.header.components:
    print(c.name, c.points)
    print("---------------------------------------------------------------")

POSE_LANDMARKS ['LEFT_SHOULDER', 'RIGHT_SHOULDER', 'LEFT_ELBOW', 'RIGHT_ELBOW', 'LEFT_WRIST', 'RIGHT_WRIST', 'LEFT_HIP', 'RIGHT_HIP']
---------------------------------------------------------------
FACE_LANDMARKS ['0', '7', '10', '13', '14', '17', '21', '33', '37', '39', '40', '46', '52', '53', '54', '55', '58', '61', '63', '65', '66', '67', '70', '78', '80', '81', '82', '84', '87', '88', '91', '93', '95', '103', '105', '107', '109', '127', '132', '133', '136', '144', '145', '146', '148', '149', '150', '152', '153', '154', '155', '157', '158', '159', '160', '161', '162', '163', '172', '173', '176', '178', '181', '185', '191', '234', '246', '249', '251', '263', '267', '269', '270', '276', '282', '283', '284', '285', '288', '291', '293', '295', '296', '297', '300', '308', '310', '311', '312', '314', '317', '318', '321', '323', '324', '332', '334', '336', '338', '356', '361', '362', '365', '373', '374', '375', '377', '378', '379', '380', '381', '382', '384', '385', '386', '387', '388', '3

In [32]:
[face_component] = [c for c in pose.header.components if c.name == 'FACE_LANDMARKS']


In [33]:
face_range = range(
        pose.header._get_point_index('FACE_LANDMARKS', face_component.points[0]),
        pose.header._get_point_index('FACE_LANDMARKS', face_component.points[-1]),
    )

In [34]:
face_range

range(33, 500)

In [8]:
import mediapipe as mp

mp_holistic = mp.solutions.holistic

In [10]:

BODY_POINTS = mp_holistic.PoseLandmark._member_names_

BODY_LIMBS = [(int(a), int(b)) for a, b in mp_holistic.POSE_CONNECTIONS]
print(BODY_LIMBS)

[(15, 21), (16, 20), (18, 20), (3, 7), (14, 16), (23, 25), (28, 30), (11, 23), (27, 31), (6, 8), (15, 17), (24, 26), (16, 22), (4, 5), (5, 6), (29, 31), (12, 24), (23, 24), (0, 1), (9, 10), (1, 2), (0, 4), (11, 13), (30, 32), (28, 32), (15, 19), (16, 18), (25, 27), (26, 28), (12, 14), (17, 19), (2, 3), (11, 12), (27, 29), (13, 15)]


In [None]:
[(11, 12), (12, 14), (14, 16), (11, 13), (13, 15), (12, 24), (23, 24), (12, 24)]

In [9]:

HAND_POINTS = mp_holistic.HandLandmark._member_names_
HAND_LIMBS = [(int(a), int(b)) for a, b in mp_holistic.HAND_CONNECTIONS]

In [10]:
HAND_POINTS

['WRIST',
 'THUMB_CMC',
 'THUMB_MCP',
 'THUMB_IP',
 'THUMB_TIP',
 'INDEX_FINGER_MCP',
 'INDEX_FINGER_PIP',
 'INDEX_FINGER_DIP',
 'INDEX_FINGER_TIP',
 'MIDDLE_FINGER_MCP',
 'MIDDLE_FINGER_PIP',
 'MIDDLE_FINGER_DIP',
 'MIDDLE_FINGER_TIP',
 'RING_FINGER_MCP',
 'RING_FINGER_PIP',
 'RING_FINGER_DIP',
 'RING_FINGER_TIP',
 'PINKY_MCP',
 'PINKY_PIP',
 'PINKY_DIP',
 'PINKY_TIP']

In [11]:
HAND_LIMBS

[(3, 4),
 (0, 5),
 (17, 18),
 (0, 17),
 (13, 14),
 (13, 17),
 (18, 19),
 (5, 6),
 (5, 9),
 (14, 15),
 (0, 1),
 (9, 10),
 (1, 2),
 (9, 13),
 (10, 11),
 (19, 20),
 (6, 7),
 (15, 16),
 (2, 3),
 (11, 12),
 (7, 8)]

In [2]:
FACEMESH_CONTOURS_POINTS = [
        p for p in sorted(set([p for p_tup in list(mp_holistic.FACEMESH_CONTOURS) for p in p_tup]))
    ]
FACEMESH_CONTOURS_POINTS[:20]

[0, 7, 10, 13, 14, 17, 21, 33, 37, 39, 40, 46, 52, 53, 54, 55, 58, 61, 63, 65]

In [2]:
mp_holistic.FACEMESH_CONTOURS

frozenset({(0, 267),
           (7, 163),
           (10, 338),
           (13, 312),
           (14, 317),
           (17, 314),
           (21, 54),
           (33, 7),
           (33, 246),
           (37, 0),
           (39, 37),
           (40, 39),
           (46, 53),
           (52, 65),
           (53, 52),
           (54, 103),
           (58, 132),
           (61, 146),
           (61, 185),
           (63, 105),
           (65, 55),
           (66, 107),
           (67, 109),
           (70, 63),
           (78, 95),
           (78, 191),
           (80, 81),
           (81, 82),
           (82, 13),
           (84, 17),
           (87, 14),
           (88, 178),
           (91, 181),
           (93, 234),
           (95, 88),
           (103, 67),
           (105, 66),
           (109, 10),
           (127, 162),
           (132, 93),
           (136, 172),
           (144, 145),
           (145, 153),
           (146, 91),
           (148, 176),
           (149, 150),
   

In [3]:
mp_holistic.FACEMESH_TESSELATION

frozenset({(18, 17),
           (82, 38),
           (8, 9),
           (456, 248),
           (167, 2),
           (303, 271),
           (69, 104),
           (253, 450),
           (41, 42),
           (315, 16),
           (73, 74),
           (339, 373),
           (258, 385),
           (105, 63),
           (219, 48),
           (304, 272),
           (236, 217),
           (231, 22),
           (85, 84),
           (365, 367),
           (237, 218),
           (243, 112),
           (436, 427),
           (409, 410),
           (248, 456),
           (330, 266),
           (136, 135),
           (88, 95),
           (394, 430),
           (107, 108),
           (118, 117),
           (439, 438),
           (442, 443),
           (260, 466),
           (115, 48),
           (342, 276),
           (121, 128),
           (200, 421),
           (113, 124),
           (132, 137),
           (23, 230),
           (343, 357),
           (443, 444),
           (62, 78),
           (26,