# Data preprocessing for 2D turning data

## Import libraries

In [3]:
import os
import pandas as pd

## Import labels and specify patient data directory

In [4]:
labs = pd.read_csv('/Users/suhrudp/Library/CloudStorage/OneDrive-Personal/Stats/REMAP Open Dataset PD/21h9f9e30v9cl2fapjggz4q1x7/Turning/Data/turning_human_labels/Turning_human_labels.csv')

dir = '/Users/suhrudp/Library/CloudStorage/OneDrive-Personal/Stats/REMAP Open Dataset PD/21h9f9e30v9cl2fapjggz4q1x7/Turning/Data/turning_2D3D_skeletons_coarsened/Turning_coarsen_CSV'

In [5]:
#column_names = [f'{coord}{i}' for i in range(18) for coord in ('x', 'y', 'z')]
column_names = [f'{["x", "y", "z"][i % 3]}{i // 3}' for i in range(75)]

In [8]:
df = pd.read_csv('/Users/suhrudp/Library/CloudStorage/OneDrive-Personal/Stats/REMAP Open Dataset PD/21h9f9e30v9cl2fapjggz4q1x7/Turning/Data/turning_2D3D_skeletons_coarsened/Turning_coarsen_CSV/Pt204_C_n_350/input_3D/keypoints3d.csv', sep=',', header=None, names=column_names, skiprows=1)

In [9]:
df

Unnamed: 0,x0,y0,z0,x1,y1,z1,x2,y2,z2,x3,...,z14,x15,y15,z15,x16,y16,z16,x17,y17,z17
0,0.0,0.0,0.556,-0.052,-0.056,0.540,-0.028,-0.024,0.252,0.070,...,1.046,-0.142,-0.362,1.124,0.072,-0.452,1.074,,,
1,0.0,0.0,0.554,-0.052,-0.056,0.538,-0.028,-0.024,0.250,0.072,...,1.042,-0.140,-0.362,1.118,0.076,-0.450,1.072,,,
2,0.0,0.0,0.556,-0.052,-0.056,0.540,-0.028,-0.024,0.252,0.074,...,1.046,-0.138,-0.362,1.120,0.078,-0.448,1.072,,,
3,0.0,0.0,0.556,-0.054,-0.056,0.538,-0.028,-0.024,0.252,0.076,...,1.044,-0.136,-0.364,1.118,0.082,-0.450,1.070,,,
4,0.0,0.0,0.552,-0.054,-0.054,0.536,-0.026,-0.024,0.252,0.080,...,1.042,-0.136,-0.364,1.112,0.084,-0.450,1.064,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,0.0,0.0,0.472,0.094,0.008,0.462,0.116,0.234,0.236,0.240,...,0.892,0.278,0.068,0.636,0.124,0.240,0.796,,,
86,0.0,0.0,0.470,0.096,0.006,0.462,0.120,0.238,0.236,0.242,...,0.892,0.278,0.066,0.636,0.126,0.238,0.798,,,
87,0.0,0.0,0.466,0.096,0.004,0.458,0.122,0.240,0.234,0.244,...,0.888,0.280,0.064,0.632,0.128,0.234,0.796,,,
88,0.0,0.0,0.464,0.098,0.002,0.454,0.126,0.244,0.234,0.246,...,0.886,0.280,0.062,0.628,0.130,0.232,0.794,,,


# Merge all the data

In [13]:
merged_data = pd.DataFrame()

for folder in os.listdir(dir):
    if folder.startswith("Pt"):
        input_3d_path = os.path.join(dir, folder, 'input_3D', 'keypoints3d.csv')
        if os.path.exists(input_3d_path):
            try:
                part_id_end_index = folder.index('_')
                part_id_str = folder[2:part_id_end_index]
                
                trans_id_start_index = folder.index('n_') + 2
                trans_id_str = folder[trans_id_start_index:]
                
                part_id = int(part_id_str)
                trans_id = int(trans_id_str)
            except ValueError as e:
                print(f"Skipping {folder}: Error extracting IDs - {str(e)}")
                continue
            except Exception as e:
                print(f"Skipping {folder}: Unexpected error - {str(e)}")
                continue

            if any((labs['Participant ID number'] == part_id) & (labs['Turn ID'] == trans_id)):
                data = pd.read_csv(input_3d_path, sep=',', header=None, names=column_names, skiprows=1)
                
                coordinate_columns = data.columns[2:]
                x_columns = [col for col in coordinate_columns if 'x' in col]
                y_columns = [col for col in coordinate_columns if 'y' in col]
                z_columns = [col for col in coordinate_columns if 'z' in col]

                new_x_names = [f'x{i}' for i, _ in enumerate(x_columns)]
                new_y_names = [f'y{i}' for i, _ in enumerate(y_columns)]
                new_z_names = [f'z{i}' for i, _ in enumerate(z_columns)]
                rename_mapping = dict(zip(x_columns + y_columns + z_columns, new_x_names + new_y_names + new_z_names))

                data.rename(columns=rename_mapping, inplace=True)

                # Correctly build the list of required columns
                required_columns = list(data.columns[:2]) + new_x_names + new_y_names + new_z_names
                data = data[required_columns]

                data['Participant ID number'] = part_id
                data['Turn ID'] = trans_id

                data = pd.merge(labs, data, on=['Participant ID number', 'Turn ID'])
                
                merged_data = pd.concat([merged_data, data], ignore_index=True)

In [14]:
merged_data.columns

Index(['Turn ID', 'Participant ID number', 'PD_or_C',
       'number_of_turning_steps', 'turning_angle', 'type_of_turn',
       'turning_duration', 'On_or_Off_medication', 'DBS_state',
       'clinical_assessment', 'x0', 'x0', 'y0', 'y0', 'x0', 'x0', 'x1', 'x2',
       'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x10', 'x11', 'x12', 'x13',
       'x14', 'x15', 'x16', 'y0', 'y0', 'y1', 'y2', 'y3', 'y4', 'y5', 'y6',
       'y7', 'y8', 'y9', 'y10', 'y11', 'y12', 'y13', 'y14', 'y15', 'y16', 'z0',
       'z1', 'z2', 'z3', 'z4', 'z5', 'z6', 'z7', 'z8', 'z9', 'z10', 'z11',
       'z12', 'z13', 'z14', 'z15', 'z16', 'z17'],
      dtype='object')

## Save the csv

In [15]:
merged_data.to_csv('merged_turn_3d_data.csv')