# Notebook to create the final dataframes

The purpose of this notebook is to create the final dataframes that will be used for training the model. In particular, the path corresponding to the images will be added as a new column

In [592]:
import os
import pandas as pd

In [593]:
def get_user_hand_properties_paths(root_dir: str, user_id: str, hand_pose: str):
    '''
    Function to read the hand_properties.csv file for the left hand from the horizontal camera viewpoint
    params:
    - root_dir: str: root directory of the dataset
    - user_id: str: user id, must be in the format '00X' being X the id of the user
    - hand_pose: str: hand pose, must be one of the 17 available hand poses
    return:
    - left_hand_horizontal_path: str: path to the hand_properties.csv file for the left hand from the horizontal camera viewpoint
    - rigth_hand_horizontal_path: str: path to the hand_properties.csv file for the right hand from the horizontal camera viewpoint
    - left_hand_vertical_path: str: path to the hand_properties.csv file for the left hand from the vertical camera viewpoint
    - rigth_hand_vertical_path: str: path to the hand_properties.csv file for the right hand from the vertical camera viewpoint
    '''
    left_hand_horizontal_path = os.path.join(root_dir, user_id, 'Left_Hand', hand_pose, 'Horizontal', 'hand_properties.csv')
    rigth_hand_horizontal_path = os.path.join(root_dir, user_id, 'Right_Hand', hand_pose, 'Horizontal', 'hand_properties.csv')
    left_hand_vertical_path = os.path.join(root_dir, user_id, 'Left_Hand', hand_pose, 'Vertical', 'hand_properties.csv')
    rigth_hand_vertical_path = os.path.join(root_dir, user_id, 'Right_Hand', hand_pose, 'Vertical', 'hand_properties.csv')
    return left_hand_horizontal_path, rigth_hand_horizontal_path, left_hand_vertical_path, rigth_hand_vertical_path

In [594]:
# root_dir = '/autofs/thau00a/home/sestebanro/thau01/Multiview_Leap2_Hand_Pose_Dataset'
root_dir = '/autofs/thau02b/sestebanro/MultiViewLeap2_Dataset/'
hand_pose = 'Spok'
user_id = '018'

In [595]:
left_hand_horizontal_path, rigth_hand_horizontal_path, left_hand_vertical_path, rigth_hand_vertical_path = get_user_hand_properties_paths(root_dir, user_id, hand_pose)
# Check if the paths exist
print("Left hand horizontal path exists: ", os.path.exists(left_hand_horizontal_path))
print("Right hand horizontal path exists: ", os.path.exists(rigth_hand_horizontal_path))
print("Left hand vertical path exists: ", os.path.exists(left_hand_vertical_path))
print("Right hand vertical path exists: ", os.path.exists(rigth_hand_vertical_path))

Left hand horizontal path exists:  True
Right hand horizontal path exists:  True
Left hand vertical path exists:  True
Right hand vertical path exists:  True


In [596]:
def get_user_hand_images_paths(root_dir: str, user_id: str, hand_pose: str):
    '''
    Function to read the hand_properties.csv file for the left hand from the horizontal camera viewpoint
    params:
    - root_dir: str: root directory of the dataset
    - user_id: str: user id, must be in the format '00X' being X the id of the user
    - hand_pose: str: hand pose, must be one of the 17 available hand poses
    return:
    - left_hand_horizontal_path: str: path to the hand_properties.csv file for the left hand from the horizontal camera viewpoint
    - rigth_hand_horizontal_path: str: path to the hand_properties.csv file for the right hand from the horizontal camera viewpoint
    - left_hand_vertical_path: str: path to the hand_properties.csv file for the left hand from the vertical camera viewpoint
    - rigth_hand_vertical_path: str: path to the hand_properties.csv file for the right hand from the vertical camera viewpoint
    '''
    left_hand_horizontal_image_path = os.path.join(root_dir, user_id, 'Left_Hand', hand_pose, 'Horizontal', 'images')
    rigth_hand_horizontal_image_path = os.path.join(root_dir, user_id, 'Right_Hand', hand_pose, 'Horizontal', 'images')
    left_hand_vertical_image_path = os.path.join(root_dir, user_id, 'Left_Hand', hand_pose, 'Vertical', 'images')
    rigth_hand_vertical_image_path = os.path.join(root_dir, user_id, 'Right_Hand', hand_pose, 'Vertical', 'images')
    return left_hand_horizontal_image_path, rigth_hand_horizontal_image_path, left_hand_vertical_image_path, rigth_hand_vertical_image_path
    

In [597]:
# root_dir = '/autofs/thau00a/home/sestebanro/thau01/Multiview_Leap2_Hand_Pose_Dataset'
root_dir = '/autofs/thau02b/sestebanro/MultiViewLeap2_Dataset/'
hand_pose = 'Spok'
# user_id = '005'

In [598]:
left_hand_horizontal_image_path, rigth_hand_horizontal_image_path, left_hand_vertical_image_path, rigth_hand_vertical_image_path = get_user_hand_images_paths(root_dir, user_id, hand_pose)
# Check if the paths exist
print("Left hand horizontal path exists: ", os.path.exists(left_hand_horizontal_image_path))
print("Right hand horizontal path exists: ", os.path.exists(rigth_hand_horizontal_image_path))
print("Left hand vertical path exists: ", os.path.exists(left_hand_vertical_image_path))
print("Right hand vertical path exists: ", os.path.exists(rigth_hand_vertical_image_path))

Left hand horizontal path exists:  True
Right hand horizontal path exists:  True
Left hand vertical path exists:  True
Right hand vertical path exists:  True


In [599]:
# Function to list all the images in a directory and take only those ending in _left. Also check that the final list length is 1000
def get_hand_images(image_path: str, side: str = 'left'):
    '''
    Function to list all the images in a directory and take only those ending in _left. Also check that the final list length is 1000
    params:
    - image_path: str: path to the directory containing the images
    return:
    - left_hand_images: list: list of images ending in _left
    '''
    if side == 'left':
        left_hand_images = [img for img in os.listdir(image_path) if img.endswith('_left.bmp')]
        # Sort the list by their image number being in the format 'XXXX_left.bmp'
        left_hand_images = sorted(left_hand_images, key=lambda x: int(x.split('_')[0]))
        assert len(left_hand_images) == 1000
        return left_hand_images
    elif side == 'right':
        right_hand_images = [img for img in os.listdir(image_path) if img.endswith('_right.bmp')]
        # Sort the list by their image number being in the format 'XXXX_right.bmp'
        right_hand_images = sorted(right_hand_images, key=lambda x: int(x.split('_')[0]))
        assert len(right_hand_images) == 1000
        return right_hand_images
    return left_hand_images

In [600]:
left_hand_horizontal_images = get_hand_images(left_hand_horizontal_image_path)
righ_hand_horizontal_images = get_hand_images(rigth_hand_horizontal_image_path)
left_hand_vertical_images = get_hand_images(left_hand_vertical_image_path)
righ_hand_vertical_images = get_hand_images(rigth_hand_vertical_image_path)

In [601]:
left_hand_horizontal_path

'/autofs/thau02b/sestebanro/MultiViewLeap2_Dataset/018/Left_Hand/Spok/Horizontal/hand_properties.csv'

## Read the dataframes and add the paths

In [602]:
# Read the hand_properties.csv file for the left hand from the horizontal camera viewpoint and check that their length is 1000
left_hand_horizontal_df = pd.read_csv(left_hand_horizontal_path)
rigth_hand_horizontal_df = pd.read_csv(rigth_hand_horizontal_path)
left_hand_vertical_df = pd.read_csv(left_hand_vertical_path)
rigth_hand_vertical_df = pd.read_csv(rigth_hand_vertical_path)

assert len(left_hand_horizontal_df) == 1000
assert len(rigth_hand_horizontal_df) == 1000
assert len(left_hand_vertical_df) == 1000
assert len(rigth_hand_vertical_df) == 1000

In [603]:
def get_all_poses(root_dir, user_id, hand):
    '''
    Function to get all the available poses for a specific hand
    '''
    hand_path = os.path.join(root_dir, user_id, hand)
    poses = os.listdir(hand_path)
    return poses

In [604]:
poses = get_all_poses(root_dir, user_id, 'Right_Hand')
poses

['Dislike',
 'Three',
 'Spiderman',
 'Spok',
 'OK',
 'ClosedFist',
 'Call',
 'L',
 'One',
 'Rock',
 'Four',
 'Stop',
 'Tiger',
 'OpenPalm',
 'Like',
 'C',
 'Two']

In [605]:
import json

# Create a dictionary with the hand poses as keys and the index of the pose as values
poses_images = {}
poses_dict = {pose: idx for idx, pose in enumerate(poses)}
# Write the dictionary to a file in data/poses_dict.json
with open('/autofs/thau00a/home/sestebanro/thau03/Multi_View_Leap2/data/poses_dict.json', 'w') as f:
    json.dump(poses_dict, f)
poses_dict

{'Dislike': 0,
 'Three': 1,
 'Spiderman': 2,
 'Spok': 3,
 'OK': 4,
 'ClosedFist': 5,
 'Call': 6,
 'L': 7,
 'One': 8,
 'Rock': 9,
 'Four': 10,
 'Stop': 11,
 'Tiger': 12,
 'OpenPalm': 13,
 'Like': 14,
 'C': 15,
 'Two': 16}

In [606]:
os.path.exists(os.path.join(root_dir, user_id, hand, pose, orientation, 'hand_properties.csv'))

True

In [607]:
# Create a folder for the user named train_"user_id" and inside create a folder for each hand and then for each pose
side = 'left'
save = True
data_dir = "/autofs/thau00a/home/sestebanro/thau03/Multi_View_Leap2/data/train_datasets"
train_dir = os.path.join(data_dir, 'train_' + user_id)
os.makedirs(train_dir, exist_ok=True)
for hand in os.listdir(os.path.join(root_dir, user_id)):
    hand_dir = os.path.join(train_dir, hand)
    os.makedirs(hand_dir, exist_ok=True)
    for pose in get_all_poses(root_dir, user_id, hand):
        pose_dir = os.path.join(hand_dir, pose)
        os.makedirs(pose_dir, exist_ok=True)
        for orientation in os.listdir(os.path.join(root_dir, user_id, hand, pose)):
            orientation_dir = os.path.join(pose_dir, orientation)
            df = pd.read_csv(os.path.join(root_dir, user_id, hand, pose, orientation, 'hand_properties.csv'), encoding='ISO-8859-1') # , encoding='ISO-8859-1'
            images = get_hand_images(os.path.join(root_dir, user_id, hand, pose, orientation, 'images'), side=side)
            # Add the images path to the dataframe in a new column called image_path checking that the path exists
            df['image_path'] = [os.path.join(root_dir, user_id, hand, pose, orientation, 'images', img) for img in images if os.path.exists(os.path.join(root_dir, user_id, hand, pose, orientation, 'images', img))]
            # Add the pose index to the dataframe in a new column called pose_index using the poses_dict and the value in pose column
            df['pose_index'] = df['pose'].map(poses_dict)
            # Save the dataframe to a csv file in the pose directory adding the orientation and the side of the camera used to capture the images
            if save:
                df.to_csv(os.path.join(pose_dir, f'hand_properties_{orientation}_{side}.csv'), index=False)

In [615]:
df

Unnamed: 0,frame_id,subject_id,which_hand,pose,device,pinch_distance,grab_angle,pinch_strength,grab_strength,palm_width,...,arm_prev_joint_z,arm_next_joint_x,arm_next_joint_y,arm_next_joint_z,arm_rotation_x,arm_rotation_y,arm_rotation_z,arm_rotation_w,image_path,pose_index
0,1,7,Left_Hand,Two,Vertical,56.100590,1.529733,1.000000,0.0,68.246132,...,323.088226,-83.533081,240.210724,139.002319,0.199012,-0.076714,0.266099,0.940053,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16
1,2,7,Left_Hand,Two,Vertical,55.440536,1.048560,0.948462,0.0,69.891617,...,325.046478,-74.641525,237.464081,140.409058,0.231774,-0.033810,-0.090863,0.967927,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16
2,3,7,Left_Hand,Two,Vertical,53.484028,0.915128,1.000000,0.0,72.480835,...,337.097168,-103.182388,234.940033,142.356155,0.213366,-0.041837,-0.139142,0.966108,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16
3,4,7,Left_Hand,Two,Vertical,53.824696,0.916993,0.991644,0.0,72.528511,...,336.510376,-108.098389,234.573746,140.766235,0.209076,-0.039113,-0.137631,0.967375,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16
4,5,7,Left_Hand,Two,Vertical,54.283726,0.942329,0.958459,0.0,72.181175,...,332.825256,-112.860596,233.630844,137.718903,0.207695,-0.037495,-0.106511,0.971654,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,996,7,Left_Hand,Two,Vertical,55.390244,1.688853,0.807673,0.0,71.273529,...,312.638672,-46.926861,281.591553,147.983200,0.294655,-0.154632,0.112864,0.936231,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16
996,997,7,Left_Hand,Two,Vertical,53.918434,1.627899,0.825249,0.0,72.100014,...,312.816986,-54.328106,273.774872,148.508560,0.292743,-0.174092,0.233903,0.910650,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16
997,998,7,Left_Hand,Two,Vertical,53.171921,1.580849,0.825372,0.0,72.735977,...,316.318451,-57.523540,268.775116,149.902740,0.284390,-0.183314,0.289341,0.895433,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16
998,999,7,Left_Hand,Two,Vertical,49.033810,1.205065,0.844444,0.0,75.079140,...,336.853760,-66.224594,256.324402,157.680756,0.231286,-0.210687,0.557043,0.769298,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16


In [609]:
# Create a function to create the dataset for a specific user given the user_id, the root directory of the dataset and the data directory where the dataset will be saved
def create_user_dataset(user_id: str, root_dir: str, data_dir: str, side: str = 'left', save: bool = True):
    '''
    Function to create the dataset for a specific user given the user_id, the root directory of the dataset and the data directory where the dataset will be saved
    params:
    - user_id: str: user id, must be in the format '00X' being X the id of the user
    - root_dir: str: root directory of the dataset
    - data_dir: str: data directory where the dataset will be saved
    - side: str: side of the hand to be used, must be 'left' or 'right'
    - save: bool: flag to save the dataset to a csv file
    '''
    train_dir = os.path.join(data_dir, 'train_' + user_id)
    os.makedirs(train_dir, exist_ok=True)
    for hand in os.listdir(os.path.join(root_dir, user_id)):
        hand_dir = os.path.join(train_dir, hand)
        os.makedirs(hand_dir, exist_ok=True)
        for pose in get_all_poses(root_dir, user_id, hand):
            pose_dir = os.path.join(hand_dir, pose)
            os.makedirs(pose_dir, exist_ok=True)
            for orientation in os.listdir(os.path.join(root_dir, user_id, hand, pose)):
                orientation_dir = os.path.join(pose_dir, orientation)
                df = pd.read_csv(os.path.join(root_dir, user_id, hand, pose, orientation, 'hand_properties.csv'))
                images = get_hand_images(os.path.join(root_dir, user_id, hand, pose, orientation, 'images'), side=side)
                # Add the images path to the dataframe in a new column called image_path checking that the path exists
                df['image_path'] = [os.path.join(root_dir, user_id, hand, pose, orientation, 'images', img) for img in images if os.path.exists(os.path.join(root_dir, user_id, hand, pose, orientation, 'images', img))]
                # Add the pose index to the dataframe in a new column called pose_index using the poses_dict and the value in pose column
                df['pose_index'] = df['pose'].map(poses_dict)
                # Save the dataframe to a csv file in the pose directory adding the orientation and the side of the camera used to capture the images
                if save:
                    df.to_csv(os.path.join(pose_dir, f'hand_properties_{orientation}_{side}.csv'), index=False)

In [610]:
df.head()

Unnamed: 0,frame_id,subject_id,which_hand,pose,device,pinch_distance,grab_angle,pinch_strength,grab_strength,palm_width,...,arm_prev_joint_z,arm_next_joint_x,arm_next_joint_y,arm_next_joint_z,arm_rotation_x,arm_rotation_y,arm_rotation_z,arm_rotation_w,image_path,pose_index
0,1,18,Right_Hand,Two,Horizontal,62.836105,1.327276,0.977275,0.0,72.866531,...,304.985077,-74.77623,206.987198,102.128525,0.165273,-0.059914,-0.004334,0.984417,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16
1,2,18,Right_Hand,Two,Horizontal,62.443985,1.236738,0.654641,0.0,72.634224,...,301.993835,-78.006332,255.677414,100.335312,0.170472,-0.055885,-0.014154,0.983675,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16
2,3,18,Right_Hand,Two,Horizontal,60.275238,1.165541,0.547832,0.0,72.584778,...,301.368256,-78.809753,275.939453,100.13308,0.17229,-0.056252,-0.028434,0.983028,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16
3,4,18,Right_Hand,Two,Horizontal,59.603382,1.319855,0.599667,0.0,73.895363,...,312.21991,-95.059784,333.024963,102.855629,0.144511,-0.041394,-0.066158,0.986421,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16
4,5,18,Right_Hand,Two,Horizontal,60.611687,1.309707,0.622524,0.0,73.838326,...,309.895355,-97.171097,325.162262,100.169159,0.140172,-0.041881,-0.062722,0.987251,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,16


In [611]:
df[['frame_id', 'image_path']]

Unnamed: 0,frame_id,image_path
0,1,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...
1,2,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...
2,3,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...
3,4,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...
4,5,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...
...,...,...
995,996,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...
996,997,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...
997,998,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...
998,999,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...


# Fix the problems in train_007 and train_019

In [2]:
import os
import re
import string

import pandas as pd

In [4]:
df = pd.read_csv('/autofs/thau00a/home/sestebanro/thau03/Multi_View_Leap2/data/train_datasets/train_007/Left_Hand/Two/hand_properties_Vertical_left.csv')
# df.loc[df['frame_id'] == 905, 'index_metacarpal_rotation_z'] = float(0.61839)
encoding_error = df[df['frame_id']==905]['index_metacarpal_rotation_z'].values[0]
encoding_error = "-0.29466>"
# Step 1: Remove non-printable characters
for char in encoding_error:
    if char not in string.printable:
        # Remove the character from the string
        encoding_error = encoding_error.replace(char, '')
        print(f"Removed character {char}")

# Step 2: Remove non-ASCII characters (like Ã, Â) using regex
encoding_error = re.sub(r'[^\x00-\x7F]+', '', encoding_error)

# Step 3: Remove any element that is not a digit or a dot
encoding_error = ''.join([char for char in encoding_error if char.isdigit() or char == '.'])

# Output the final cleaned string
print(encoding_error)

Removed character 
Removed character 
0.29466


In [5]:
def clean_and_cast_to_float(value):
    if isinstance(value, (int, float)):
        return value
    # Remove non-printable characters using a regex, allow only digits, '.' and '-'
    printable = set(string.printable)
    clean_value = ''.join([ch for ch in str(value) if ch in printable])

    # Using regex to keep only digits, '.', and '-' at the start for negative numbers
    clean_value = re.sub(r'[^\d.-]', '', clean_value)

    # Check for proper formatting of the negative sign (at the beginning) and decimal point
    if clean_value.count('-') > 1 or (clean_value.count('-') == 1 and clean_value.index('-') != 0):
        clean_value = clean_value.replace('-', '')  # Remove all invalid '-' signs

    # Return as float
    try:
        return float(clean_value)
    except ValueError:
        raise ValueError(f"Cannot convert {value} to float after cleaning.")

# Example usage:
data = ["123.45", "12\x00.3\x01", "-123.45", "\x02-12abc", "12.-345"]
cleaned_data = [clean_and_cast_to_float(d) for d in data]
print(cleaned_data)

[123.45, 12.3, -123.45, -12.0, 12.345]


In [15]:
df = pd.read_csv('/autofs/thau00a/home/sestebanro/thau03/Multi_View_Leap2/data/hand_properties_horizontal.csv')
df2 = pd.read_csv('/autofs/thau00a/home/sestebanro/thau03/Multi_View_Leap2/data/hand_properties_vertical.csv')

  df = pd.read_csv('/autofs/thau00a/home/sestebanro/thau03/Multi_View_Leap2/data/hand_properties_horizontal.csv')
  df2 = pd.read_csv('/autofs/thau00a/home/sestebanro/thau03/Multi_View_Leap2/data/hand_properties_vertical.csv')


In [8]:
# Check the amount of nan values
df.isna().sum().sum()

29

In [16]:
# Check the amount of nan values
df2.isna().sum().sum()

55

In [14]:
nan_rows = df[df.isna().any(axis=1)]
nan_rows

Unnamed: 0,frame_id,subject_id,which_hand,pose,device,pinch_distance,grab_angle,pinch_strength,grab_strength,palm_width,...,arm_prev_joint_z,arm_next_joint_x,arm_next_joint_y,arm_next_joint_z,arm_rotation_x,arm_rotation_y,arm_rotation_z,arm_rotation_w,image_path,pose_index
625231,232,19,Left_Hand,OpenPalm,Horizontal,77.2491,0.101635,0.0,0.0,81.323349,...,,,,,,,,,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,13


In [17]:
nan_rows = df2[df.isna().any(axis=1)]
nan_rows

Unnamed: 0,frame_id,subject_id,which_hand,pose,device,pinch_distance,grab_angle,pinch_strength,grab_strength,palm_width,...,arm_prev_joint_z,arm_next_joint_x,arm_next_joint_y,arm_next_joint_z,arm_rotation_x,arm_rotation_y,arm_rotation_z,arm_rotation_w,image_path,pose_index
625231,232,19,Left_Hand,OpenPalm,Vertical,74.908966,0.0,0.0,0.0,74.00045,...,289.183167,-239.767899,232.699005,87.29586,0.176108,0.096364,0.620576,0.758014,/autofs/thau02b/sestebanro/MultiViewLeap2_Data...,13


In [None]:
df = pd.read_csv('/autofs/thau00a/home/sestebanro/thau03/Multi_View_Leap2/data/train_datasets/train_007/Left_Hand/Two/hand_properties_Vertical_left.csv')

In [614]:
type(df[df['frame_id']==905]['index_metacarpal_rotation_z'].values[0])

str