In [1]:
pip install pandas numpy opencv-python mediapipe

Collecting opencv-python
  Downloading opencv_python-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl.metadata (19 kB)
Collecting mediapipe
  Downloading mediapipe-0.10.32-py3-none-manylinux_2_28_x86_64.whl.metadata (9.8 kB)
Collecting absl-py~=2.3 (from mediapipe)
  Downloading absl_py-2.4.0-py3-none-any.whl.metadata (3.3 kB)
Collecting sounddevice~=0.5 (from mediapipe)
  Downloading sounddevice-0.5.5-py3-none-any.whl.metadata (1.4 kB)
Collecting flatbuffers~=25.9 (from mediapipe)
  Downloading flatbuffers-25.12.19-py2.py3-none-any.whl.metadata (1.0 kB)
Collecting opencv-contrib-python (from mediapipe)
  Downloading opencv_contrib_python-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl.metadata (19 kB)
Collecting cffi (from sounddevice~=0.5->mediapipe)
  Downloading cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.6 kB)
Collecting pycparser (from cffi->sounddevice~=0.5->mediapipe)
  Downloading pycparser-3.0-py3-none-any.whl.metadata (8.2 kB)
Downloading ope

In [5]:
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import time

In [6]:
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils

hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=2,
    min_detection_confidence=0.6,
    min_tracking_confidence=0.6
)

I0000 00:00:1772027845.177230   19340 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1772027845.198020   19950 gl_context.cc:369] GL version: 3.0 (OpenGL ES 3.0 Mesa 25.0.7-0ubuntu0.24.04.2), renderer: Mesa Intel(R) HD Graphics 4000 (IVB GT2)


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1772027845.272575   19947 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1772027845.308520   19946 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [7]:
def extract_hand(hand_landmarks):
    feat = []
    for lm in hand_landmarks.landmark:
        feat.extend([lm.x, lm.y, lm.z])
    return feat

In [None]:
cap = cv2.VideoCapture(0)

current_gesture = "none"
collecting = False
records = []

print("""
q : quit
c : toggle collect
1-9 : set gesture (gesture_1, gesture_2...)
""")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    res = hands.process(rgb)

    L_feat = [0.0] * 63
    R_feat = [0.0] * 63
    L_exist = 0
    R_exist = 0

    if res.multi_hand_landmarks:
        for hand_lm, handedness in zip(
            res.multi_hand_landmarks,
            res.multi_handedness
        ):
            label = handedness.classification[0].label
            feat = extract_hand(hand_lm)

            if label == "Left":
                L_feat = feat
                L_exist = 1
            else:
                R_feat = feat
                R_exist = 1

            mp_draw.draw_landmarks(
                frame, hand_lm, mp_hands.HAND_CONNECTIONS
            )

    if collecting:
        row = (
            [current_gesture, L_exist, R_exist]
            + L_feat
            + R_feat
        )
        records.append(row)

    cv2.putText(
        frame,
        f"Gesture: {current_gesture} | Collecting: {collecting}",
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.7,
        (0, 255, 0) if collecting else (0, 0, 255),
        2
    )

    cv2.imshow("Landmark Collector", frame)

    key = cv2.waitKey(1) & 0xFF

    if key == ord("q"):
        break
    elif key == ord("c"):
        collecting = not collecting
        print("Collecting:", collecting)
    elif ord("1") <= key <= ord("9"):
        current_gesture = f"gesture_{key - ord('0')}"
        print("Gesture set:", current_gesture)

cap.release()
cv2.destroyAllWindows()


q : quit
c : toggle collect
1-9 : set gesture (gesture_1, gesture_2...)



W0000 00:00:1772027855.466982   19946 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


KeyboardInterrupt: 

: 

In [8]:
columns = (
    ["gesture", "L_exist", "R_exist"]
    + [f"L_{a}{i}" for i in range(21) for a in ["x","y","z"]]
    + [f"R_{a}{i}" for i in range(21) for a in ["x","y","z"]]
)

df = pd.DataFrame(records, columns=columns)
df.to_csv("hand_landmarks.csv", index=False)
print("Saved:", df.shape)

Saved: (136, 129)
