# Data Preprocessing

In [1]:
import pandas as pd
import numpy as np
import ast

## Loading Data

In [2]:
# If you are using the automated dataset (usually with lot's of frame captured)
df = pd.read_csv("../data/hand_landmarks_dataset.csv")

In [3]:
# If you are using the manual dataset (small number of frames captured)
# df = pd.read_csv("../data/hand_landmarks_dataset_manual.csv")

In [4]:
df

Unnamed: 0,x0,y0,x1,y1,x2,y2,x3,y3,x4,y4,...,y16,x17,y17,x18,y18,x19,y19,x20,y20,label
0,0.269497,1.038921,0.305227,1.040505,0.344397,1.013743,0.355642,0.981788,0.349772,0.951295,...,1.062047,0.234030,0.876983,0.228671,0.943635,0.233605,1.017466,0.233923,1.056983,1
1,0.288499,1.010693,0.318804,0.996517,0.346039,0.955908,0.338388,0.915551,0.326612,0.867486,...,0.957814,0.234958,0.814380,0.224109,0.844781,0.231364,0.918228,0.242048,0.953683,1
2,0.266102,0.972654,0.313219,0.951894,0.357090,0.887377,0.358399,0.805984,0.332014,0.747224,...,0.864877,0.233910,0.754484,0.238763,0.730673,0.247585,0.806240,0.253285,0.850902,1
3,0.260013,0.919207,0.312043,0.894442,0.353433,0.810146,0.349351,0.715753,0.320890,0.650174,...,0.791231,0.235647,0.684741,0.238130,0.649145,0.245539,0.729522,0.251274,0.772971,1
4,0.252596,0.876270,0.310088,0.833190,0.348971,0.746047,0.341028,0.646938,0.309173,0.581289,...,0.734093,0.223737,0.636978,0.226378,0.581608,0.236270,0.658376,0.242971,0.710453,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
903,0.461355,0.655601,0.511157,0.703100,0.571751,0.712482,0.621753,0.717357,0.662093,0.741062,...,0.287558,0.527591,0.439341,0.542971,0.358903,0.553009,0.308101,0.563345,0.262567,5
904,0.453474,0.703381,0.505889,0.749684,0.566118,0.760033,0.614755,0.768418,0.655030,0.790713,...,0.326631,0.517873,0.488673,0.530760,0.410110,0.541695,0.357395,0.554194,0.310094,5
905,0.445783,0.808974,0.496933,0.860340,0.556681,0.870660,0.604514,0.874522,0.643183,0.892837,...,0.415971,0.512336,0.588080,0.528162,0.499725,0.541270,0.442909,0.555221,0.393930,5
906,0.479247,0.886815,0.528122,0.923171,0.574986,0.912352,0.610069,0.887019,0.635641,0.866625,...,0.594284,0.507271,0.733369,0.529004,0.655950,0.541441,0.603638,0.553429,0.555913,5


## Data Normalization

In [5]:
# Normalize relative to wrist position (x0, y0) AND scale normalization
normalized_df = df.copy()

for idx, row in df.iterrows():
    wrist_x = row["x0"]  # Wrist x coordinate
    wrist_y = row["y0"]  # Wrist y coordinate

    # Extract all landmark coordinates
    landmarks = []
    for i in range(21):  # Assuming 21 landmarks (0-20)
        x_col = f"x{i}"
        y_col = f"y{i}"
        if x_col in df.columns and y_col in df.columns:
            landmarks.append([row[x_col], row[y_col]])

    landmarks = np.array(landmarks)

    # Normalize relative to wrist
    landmarks_normalized = landmarks - landmarks[0]  # Subtract wrist coordinates

    # Scale normalization - normalize to unit scale
    # Calculate the maximum distance from wrist to any landmark
    distances = np.linalg.norm(landmarks_normalized, axis=1)
    max_distance = np.max(distances)

    # Avoid division by zero
    if max_distance > 0:
        scale_factor = 1.0 / max_distance
    else:
        scale_factor = 1.0

    # Apply scale normalization
    landmarks_scaled = landmarks_normalized * scale_factor

    # Update the dataframe
    for i in range(21):
        x_col = f"x{i}"
        y_col = f"y{i}"
        if x_col in df.columns and y_col in df.columns:
            normalized_df.loc[idx, x_col] = landmarks_scaled[i, 0]
            normalized_df.loc[idx, y_col] = landmarks_scaled[i, 1]

## Saving Normalized Data

In [6]:
# If you are using the automated dataset
normalized_df.to_csv(
    "../data/hand_landmarks_dataset_normalized_to_the_wrist.csv", index=False
)

In [7]:
# If you are using the manual dataset
# normalized_df.to_csv(
#     "../data/hand_landmarks_dataset_normalized_to_the_wrist_manual.csv", index=False
# )