# Data Preprocessing

In [1]:
import pandas as pd
import numpy as np
import ast

## Loading Data

In [2]:
# If you are using the automated dataset (usually with lot's of frame captured)
df = pd.read_csv("../data/hand_landmarks_dataset.csv")

In [3]:
# If you are using the manual dataset (small number of frames captured)
# df = pd.read_csv("../data/hand_landmarks_dataset_manual.csv")

In [4]:
df

Unnamed: 0,x0,y0,x1,y1,x2,y2,x3,y3,x4,y4,...,y16,x17,y17,x18,y18,x19,y19,x20,y20,label
0,0.112744,1.002886,0.139458,0.924262,0.179153,0.842563,0.196472,0.773636,0.183660,0.711905,...,0.982547,0.186452,0.992788,0.242192,1.001988,0.257459,1.010547,0.262986,1.011944,Thumb_Up
1,0.117105,0.978692,0.130759,0.909880,0.171385,0.823776,0.193299,0.752703,0.188956,0.691872,...,0.963713,0.208927,0.985687,0.266728,0.995892,0.250792,1.010948,0.227805,1.015338,Thumb_Up
2,0.123632,0.910718,0.143369,0.821527,0.185051,0.732386,0.210989,0.667876,0.210934,0.615956,...,0.874301,0.254248,0.924860,0.307976,0.924109,0.289740,0.929391,0.265845,0.926463,Thumb_Up
3,0.139660,0.873068,0.160446,0.782714,0.201882,0.692249,0.222892,0.619703,0.212347,0.558886,...,0.853130,0.270262,0.889033,0.323575,0.891329,0.301898,0.901422,0.276269,0.902476,Thumb_Up
4,0.146467,0.846866,0.167593,0.756787,0.206880,0.664376,0.225695,0.590964,0.215911,0.529350,...,0.828326,0.277677,0.863058,0.333720,0.865085,0.313120,0.875243,0.287548,0.875253,Thumb_Up
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2863,0.398795,0.715189,0.342019,0.689019,0.291966,0.711174,0.260679,0.746571,0.227816,0.771230,...,0.891790,0.429478,0.796487,0.431496,0.841918,0.420554,0.860515,0.405741,0.861701,you
2864,0.397781,0.714078,0.340224,0.687992,0.290015,0.710467,0.261254,0.746361,0.228428,0.769690,...,0.890767,0.431862,0.796274,0.435465,0.839924,0.425243,0.860321,0.409433,0.862526,you
2865,0.398220,0.712826,0.339487,0.686605,0.289583,0.708566,0.260327,0.744253,0.228137,0.767532,...,0.891343,0.431967,0.796463,0.435802,0.842501,0.426388,0.862676,0.411957,0.863276,you
2866,0.402291,0.713850,0.343470,0.686734,0.292650,0.708653,0.262577,0.744574,0.228841,0.767063,...,0.891870,0.433264,0.800198,0.435661,0.845247,0.426095,0.863592,0.412277,0.862907,you


## Data Normalization

In [5]:
# Normalize relative to wrist position (x0, y0) AND scale normalization
normalized_df = df.copy()

for idx, row in df.iterrows():
    wrist_x = row["x0"]  # Wrist x coordinate
    wrist_y = row["y0"]  # Wrist y coordinate

    # Extract all landmark coordinates
    landmarks = []
    for i in range(21):  # Assuming 21 landmarks (0-20)
        x_col = f"x{i}"
        y_col = f"y{i}"
        if x_col in df.columns and y_col in df.columns:
            landmarks.append([row[x_col], row[y_col]])

    landmarks = np.array(landmarks)

    # Normalize relative to wrist
    landmarks_normalized = landmarks - landmarks[0]  # Subtract wrist coordinates

    # Scale normalization - normalize to unit scale
    # Calculate the maximum distance from wrist to any landmark
    distances = np.linalg.norm(landmarks_normalized, axis=1)
    max_distance = np.max(distances)

    # Avoid division by zero
    if max_distance > 0:
        scale_factor = 1.0 / max_distance
    else:
        scale_factor = 1.0

    # Apply scale normalization
    landmarks_scaled = landmarks_normalized * scale_factor

    # Update the dataframe
    for i in range(21):
        x_col = f"x{i}"
        y_col = f"y{i}"
        if x_col in df.columns and y_col in df.columns:
            normalized_df.loc[idx, x_col] = landmarks_scaled[i, 0]
            normalized_df.loc[idx, y_col] = landmarks_scaled[i, 1]

## Saving Normalized Data

In [6]:
# If you are using the automated dataset
normalized_df.to_csv(
    "../data/hand_landmarks_dataset_normalized_to_the_wrist.csv", index=False
)

In [32]:
# If you are using the manual dataset
# normalized_df.to_csv(
#     "../data/hand_landmarks_dataset_normalized_to_the_wrist_manual.csv", index=False
# )