In [11]:
import cv2
import numpy as np
import pandas as pd
import mediapipe as mp
from utils import draw_landmarks, calculate_landmarks
import time
import itertools
import copy

In [12]:
mp_hands_sol = mp.solutions.hands
mp_hands = mp_hands_sol.Hands(
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.5,
)

In [13]:
def get_landmarks(image, keypoints):
    landmark_list = []
    if keypoints.multi_hand_landmarks is not None:
        for hand_landmarks, handedness in zip(keypoints.multi_hand_landmarks, keypoints.multi_handedness):
            landmark_list = calculate_landmarks(image, hand_landmarks)
    return landmark_list

In [14]:
def preprocess_landmarks(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)

    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y, base_z = landmark_point[0], landmark_point[1], landmark_point[2]

        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y
        temp_landmark_list[index][2] = temp_landmark_list[index][2] - base_z


    temp_landmark_list = list(itertools.chain.from_iterable(temp_landmark_list))

    max_value = max(list(map(abs, temp_landmark_list)))

    def normalize_(n):
        return n / max_value

    temp_landmark_list = list(map(normalize_, temp_landmark_list))

    return temp_landmark_list

In [15]:
def create_datapoint(pose, landmarks, datapoints):
    datapoint = {
        'index': len(datapoints),
        'pose': pose,
        'keypoints': preprocess_landmarks(landmarks),
    }
    datapoints.append(datapoint)

In [16]:
poses = ['HOLD', 'GRAB', 'FIST', 'INDEX', 'PEACE', 'OK']
data = []

In [17]:
def capture_pose(length=5):
    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 800)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 600)
    current_pose = None
    while True:
        ret, image = cap.read()
        hand_kpts = mp_hands.process(image)
        landmarks = get_landmarks(image, hand_kpts)
        image = draw_landmarks(image, landmarks)
        if current_pose is not None and len(landmarks) > 0:
            create_datapoint(current_pose, landmarks, data)
        cv2.putText(image, f'Capturing pose {current_pose}', (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 1, cv2.LINE_AA)
        cv2.imshow('frame', image)
        if not ret:
            break
        key = cv2.waitKey(25)
        if key & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            break
        elif key & 0xFF == ord('n'):
            current_pose = None
        else:
            for pose in poses:
                if key & 0xFF == ord(pose[0].lower()):
                    current_pose = pose
    cap.release()
    cv2.destroyAllWindows()

In [18]:
capture_pose()

In [19]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,index,pose,keypoints
0,0,HOLD,"[0.0, 0.0, 0.0, -0.2, -0.05652173913043478, -7..."
1,1,HOLD,"[0.0, 0.0, 0.0, -0.2, -0.05217391304347826, -7..."
2,2,HOLD,"[0.0, 0.0, 0.0, -0.20087336244541484, -0.04803..."
3,3,HOLD,"[0.0, 0.0, 0.0, -0.20087336244541484, -0.05240..."
4,4,HOLD,"[0.0, 0.0, 0.0, -0.2, -0.04782608695652174, -7..."


In [20]:
df.to_csv('poses_left.csv')