# Data preprocessing for 2D turning data

## Import libraries

In [1]:
import os
import pandas as pd

## Import labels and specify patient data directory

In [2]:
labs = pd.read_csv('/Users/suhrudp/Library/CloudStorage/OneDrive-Personal/Stats/REMAP Open Dataset PD/21h9f9e30v9cl2fapjggz4q1x7/Turning/Data/turning_human_labels/Turning_human_labels.csv')

dir = '/Users/suhrudp/Library/CloudStorage/OneDrive-Personal/Stats/REMAP Open Dataset PD/21h9f9e30v9cl2fapjggz4q1x7/Turning/Data/turning_2D3D_skeletons_coarsened/Turning_coarsen_CSV'

In [3]:
#column_names = [f'{coord}{i}' for i in range(18) for coord in ('x', 'y')]
column_names = [f'{"x" if i % 2 == 0 else "y"}{i // 2}' for i in range(34)]

# Merge all the data

In [4]:
merged_data = pd.DataFrame()

for folder in os.listdir(dir):
    if folder.startswith("Pt"):
        input_2d_path = os.path.join(dir, folder, 'input_2D', 'keypoints.csv')
        if os.path.exists(input_2d_path):
            try:
                part_id_end_index = folder.index('_')
                part_id_str = folder[2:part_id_end_index]
                
                trans_id_start_index = folder.index('n_') + 2
                trans_id_str = folder[trans_id_start_index:]
                
                part_id = int(part_id_str)
                trans_id = int(trans_id_str)
            except ValueError as e:
                print(f"Skipping {folder}: Error extracting IDs - {str(e)}")
                continue
            except Exception as e:
                print(f"Skipping {folder}: Unexpected error - {str(e)}")
                continue

            if any((labs['Participant ID number'] == part_id) & (labs['Turn ID'] == trans_id)):
                data = pd.read_csv(input_2d_path, sep=',', header=None, names=column_names, skiprows=1)
                
                coordinate_columns = data.columns[2:]
                x_columns = [col for col in coordinate_columns if 'x' in col]
                y_columns = [col for col in coordinate_columns if 'y' in col]

                new_x_names = [f'x{i}' for i, _ in enumerate(x_columns)]
                new_y_names = [f'y{i}' for i, _ in enumerate(y_columns)]
                rename_mapping = dict(zip(x_columns + y_columns, new_x_names + new_y_names))

                data.rename(columns=rename_mapping, inplace=True)

                required_columns = list(data.columns[:2]) + new_x_names + new_y_names
                data = data[required_columns]

                data['Participant ID number'] = part_id
                data['Turn ID'] = trans_id

                data = pd.merge(labs, data, on=['Participant ID number', 'Turn ID'])
                
                merged_data = pd.concat([merged_data, data], ignore_index=True)

In [7]:
merged_data

Unnamed: 0,Turn ID,Participant ID number,PD_or_C,number_of_turning_steps,turning_angle,type_of_turn,turning_duration,On_or_Off_medication,DBS_state,clinical_assessment,...,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15
0,1049,412,PD,3,90_degrees,step_turn,1.591,Off medication,-,No,...,108.0,92.0,88.0,82.0,98.0,118.0,130.0,90.0,106.0,120.0
1,1049,412,PD,3,90_degrees,step_turn,1.591,Off medication,-,No,...,108.0,94.0,88.0,80.0,98.0,118.0,130.0,90.0,106.0,120.0
2,1049,412,PD,3,90_degrees,step_turn,1.591,Off medication,-,No,...,108.0,94.0,88.0,82.0,98.0,118.0,130.0,90.0,106.0,120.0
3,1049,412,PD,3,90_degrees,step_turn,1.591,Off medication,-,No,...,108.0,94.0,88.0,82.0,98.0,118.0,130.0,90.0,106.0,120.0
4,1049,412,PD,3,90_degrees,step_turn,1.591,Off medication,-,No,...,108.0,94.0,88.0,82.0,98.0,118.0,130.0,90.0,106.0,120.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121303,3181,744,PD,3,90_degrees,pivot_turn,1.868,On medication,-,No,...,114.0,92.0,86.0,78.0,92.0,118.0,112.0,92.0,112.0,112.0
121304,3181,744,PD,3,90_degrees,pivot_turn,1.868,On medication,-,No,...,114.0,88.0,84.0,80.0,92.0,116.0,114.0,94.0,114.0,108.0
121305,3181,744,PD,3,90_degrees,pivot_turn,1.868,On medication,-,No,...,114.0,90.0,84.0,82.0,94.0,116.0,112.0,94.0,112.0,110.0
121306,3181,744,PD,3,90_degrees,pivot_turn,1.868,On medication,-,No,...,110.0,88.0,82.0,78.0,90.0,116.0,104.0,92.0,112.0,104.0


## Save the csv

In [5]:
merged_data.to_csv('merged_turn_2d_data1.csv')