## Create Keypoint Dataset - Instructions
1. Prepare a video of the hand gesture
    -Ideally, the video contains only your target gesture. Cut the video to exclude moments before or after your gesture
2. Use the final cell to specify your video path. 
3. Adjust the gesture ids values to match your target dataset
4. Running the log_keypoints_from_video_or_folder() script will use mediapipe hand tracking to infer keypoint positions, and add that classificaiton to keypoint.csv
5. Use train-keypoint-classifier\isolated_data\assembler.ipynb and train-keypoint-classifier\isolated_data\analysis.ipynb to combine gesture csvs into a single training dataset

In [None]:
# third-party imports
import csv
import copy
import itertools
import os
from collections import deque
import cv2 as cv
import numpy as np
import mediapipe as mp

In [None]:
#import the runtime models
from runtime_models.keypoint_classifier.keypoint_classifier import KeyPointClassifier
from runtime_models.point_history_classifier.point_history_classifier import PointHistoryClassifier

In [None]:
#tells mediapipe to use static image mode, or video stream mode (for temportal tracking of keypoints)
USE_STATIC_IMAGE_MODE = False 
MIN_DETECTION_CONFIDENCE = 0.6 
MIN_TRACKING_CONFIDENCE = 0.6

In [None]:
#helper methods
def read_keypoint_classifier_labels():
    with open(
        "runtime_models/keypoint_classifier/keypoint_classifier_label.csv",
        encoding="utf-8-sig",
    ) as f:
        keypoint_classifier_labels = csv.reader(f)
        keypoint_classifier_labels = [row[0] for row in keypoint_classifier_labels]
        return keypoint_classifier_labels


def read_point_history_classifier_labels():
    with open(
        "runtime_models/point_history_classifier/point_history_classifier_label.csv",
        encoding="utf-8-sig",
    ) as f:
        point_history_classifier_labels = csv.reader(f)
        point_history_classifier_labels = [
            row[0] for row in point_history_classifier_labels
        ]
        return point_history_classifier_labels


def avi_to_images(video_file):
    images = []
    cap = cv.VideoCapture(video_file)
    frame_count = 0
    while True:
        ret, image = cap.read()
        if not ret:
            break
        images.append(image)

        frame_count += 1
    cap.release()
    return images


def calc_landmark_list(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_point = []

    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)

        landmark_point.append([landmark_x, landmark_y])

    return landmark_point


def pre_process_landmark(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)

    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y = landmark_point[0], landmark_point[1]

        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y

    temp_landmark_list = list(itertools.chain.from_iterable(temp_landmark_list))

    max_value = max(list(map(abs, temp_landmark_list)))

    def normalize_(n):
        return n / max_value

    temp_landmark_list = list(map(normalize_, temp_landmark_list))

    return temp_landmark_list


def pre_process_point_history(image, point_history):
    image_width, image_height = image.shape[1], image.shape[0]

    temp_point_history = copy.deepcopy(point_history)

    base_x, base_y = 0, 0
    for index, point in enumerate(temp_point_history):
        if index == 0:
            base_x, base_y = point[0], point[1]

        temp_point_history[index][0] = (
            temp_point_history[index][0] - base_x
        ) / image_width
        temp_point_history[index][1] = (
            temp_point_history[index][1] - base_y
        ) / image_height

    temp_point_history = list(itertools.chain.from_iterable(temp_point_history))

    return temp_point_history


def count_zeros(list):
    zero_count = 0
    zero_examples = [0, 0.0, "0", "0.0", [0.0, 0.0]]
    for i in list:
        if i in zero_examples:
            zero_count += 1
    return zero_count


def log(
    gesture_number,
    landmark_list,
    print_mode=True,
    write_mode=True,
):
    csv_path = "keypoint.csv"
    row_content = [gesture_number, *landmark_list]

    if print_mode:
        print(row_content)

    if write_mode:
        with open(csv_path, "a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(row_content)

In [None]:
#main logging method
def log_keypoints_from_video_or_folder(path, gesture_number=-1):
    """
    logging_mode:
        0: No logging
        1: Keypoint classifier logging
        2: Point history classifier logging
    gesture_number:
        the number of the gesture that we're adding to the data file
    """

    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(
        static_image_mode=USE_STATIC_IMAGE_MODE,
        max_num_hands=1,
        min_detection_confidence=MIN_DETECTION_CONFIDENCE,
        min_tracking_confidence=MIN_TRACKING_CONFIDENCE,
    )
    keypoint_classifier = KeyPointClassifier()
    point_history_classifier = PointHistoryClassifier()
    history_length = 16
    point_history = deque(maxlen=history_length)
    finger_gesture_history = deque(maxlen=history_length)

    # load the images depending if folder or video file
    if os.path.isdir(path):
        print(f"Processing this path as a folder: {path}")
        image_files = sorted([f for f in os.listdir(path)])
        image_paths = [os.path.join(path, image_file) for image_file in image_files]
        images = [cv.imread(image_path) for image_path in image_paths]
    elif os.path.isfile(path):
        print(f"Processing this path as a video file: {path}")
        images = avi_to_images(path)
    else:
        print(f"WARNING! This path is neither a folder nor a file: {path}")
        return

    # play video once
    for image in images:
        image = cv.flip(image, 1)
        debug_image = copy.deepcopy(image)

        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)

        image.flags.writeable = False
        results = hands.process(image)
        image.flags.writeable = True

        if results.multi_hand_landmarks is not None:
            for hand_landmarks, handedness in zip(
                results.multi_hand_landmarks, results.multi_handedness
            ):
                landmark_list = calc_landmark_list(debug_image, hand_landmarks)

                pre_processed_landmark_list = pre_process_landmark(landmark_list)
                pre_processed_point_history_list = pre_process_point_history(
                    debug_image, point_history
                )
                point_history_csv_path = "point_history.csv"

                log(
                    gesture_number,
                    pre_processed_landmark_list,
                )

                hand_sign_id = keypoint_classifier(pre_processed_landmark_list)

                if hand_sign_id == 2:
                    point_history.append(landmark_list[8])
                else:
                    point_history.append([0, 0])

                finger_gesture_id = 0
                point_history_len = len(pre_processed_point_history_list)
                if point_history_len == (history_length * 2):
                    finger_gesture_id = point_history_classifier(
                        pre_processed_point_history_list
                    )

                finger_gesture_history.append(finger_gesture_id)
        else:
            point_history.append([0, 0])

Here, input the labels for each icon

These labels will be used to label these hand-point datums as their respective class

In [None]:
open_id = 0
closed_id = 1
cam_point_id = 2
vert_point_id = 3
selected_id = open_id

In [None]:
video_path = r"C:\my_files\data\matt_gesture\labeled_videos\vertical_point_video_2_V123.avi"
log_keypoints_from_video_or_folder(video_path,gesture_number=selected_id)
