## Dataset Generator

> @ Author: Chen Wei

In [2]:
import os, copy, itertools, csv
import cv2 as cv
import numpy as np
import mediapipe as mp

2024-04-23 18:48:33.097085: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [3]:
def get_bounding_rect_and_landmark_points(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]
    landmark_points = []
    
    for landmark in landmarks[0].landmark:
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        landmark_point = [landmark_x, landmark_y]
        landmark_points.append(landmark_point)
        
    x, y, w, h = cv.boundingRect(np.array(landmark_points))
    return [x, y, x+w, y+h], landmark_points

In [4]:
def pre_process_landmark(landmark_points):
    temp_landmark_points = copy.deepcopy(landmark_points)
    
    # Convert to relative coordinates
    base_x = temp_landmark_points[0][0]
    base_y = temp_landmark_points[0][1]
    
    index = 1
    for landmark_point in temp_landmark_points[1:]:
        temp_landmark_points[index][0] = temp_landmark_points[index][0] - base_x
        temp_landmark_points[index][1] = temp_landmark_points[index][1] - base_y
        index += 1
    
    # Convert to a one-dimensional list
    temp_landmark_points = list(itertools.chain.from_iterable(temp_landmark_points))
    
    # normalize list
    max_value = max(temp_landmark_points, key=abs)
    temp_landmark_points = list(map(lambda x: x/max_value, temp_landmark_points))
    
    return temp_landmark_points

In [5]:
os.chdir("..")
root_path = os.getcwd()
root_path

'/root/hand-gesture-recognition-mediapipe'

In [90]:
dataset_path = os.path.join(root_path, "dataset/ori_dataset")

# csv_data_path = os.path.join(root_path, "dataset/keypoint.csv")
# csv_data_file = open(csv_data_path, "w", newline="")
# csv_data_writer = csv.writer(csv_data_file)

csv_label_path = os.path.join(root_path, "dataset/keypoint_classifier_label.csv")
csv_label_file = open(csv_label_path, "w", newline="")
csv_label_writer = csv.writer(csv_label_file)

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=1,
    min_detection_confidence=0.5)

class_name = None
class_index = -1
for root, dirs, files in os.walk(dataset_path, topdown=False):
    if root.endswith(".ipynb_checkpoints") or root.endswith("dataset"):
            continue
    for file in files:
        if file.startswith("."): continue
        new_class_name = root.split("/")[-1]
        if class_name != new_class_name:
            class_name = new_class_name
            print("class_name: {}".format(class_name))
            class_index += 1
            csv_label_writer.writerow([class_name])
        
        file_path = os.path.join(root, file)
        image = cv.imread(file_path)
        flip_image = cv.flip(image, 1)
        debug_image = copy.deepcopy(flip_image)
        rgb_image = cv.cvtColor(flip_image, cv.COLOR_BGR2RGB)
        results = hands.process(rgb_image)
        if results.multi_hand_landmarks is not None:
            hand_landmarks = results.multi_hand_landmarks
            handedness = results.multi_handedness
            brect, landmark_points = get_bounding_rect_and_landmark_points(debug_image, hand_landmarks) # [x, y, x+w, y+h], landmark_points
            pre_processed_landmark_points = pre_process_landmark(landmark_points)
            csv_data_writer.writerow([class_index, *pre_processed_landmark_points])
            
            

class_name: call
class_name: dislike
class_name: fist
class_name: four
class_name: like
class_name: mute
class_name: ok
class_name: one
class_name: stop_inverted
class_name: rock
class_name: peace_inverted
class_name: stop
class_name: palm
class_name: peace
class_name: three
class_name: three2
class_name: two_up
class_name: two_up_inverted


In [83]:
def dataset_loader():
    pass

In [84]:
print("done")

done
