In [1]:
import os
import pandas as pd

In [12]:
def combine_joint_data_for_subjects(base_dir):
    # List all subject directories
    subjects = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
    
    for subject in subjects:
        subject_dir = os.path.join(base_dir, subject)
        combined_df = pd.DataFrame()
        
        # List all joint files within the subject directory
        joint_files = [f for f in os.listdir(subject_dir) if f.endswith('.csv')]
        
        for joint_file in joint_files:
            joint_name = joint_file.split('.')[0]  # Extract joint name from file name
            file_path = os.path.join(subject_dir, joint_file)
            
            # Read the CSV file with explicit delimiter
            df = pd.read_csv(file_path, delimiter=',', dtype=str)  # Read as strings for safer processing
            
            # Debug: Print first few rows to inspect format
            print(f"Inspecting {file_path}:")
            print(df.head())

            # Check if the first column contains improperly split data
            first_column = df.columns[0]
            if df[first_column].str.contains(',', na=False).any():
                print(f"Detected unsplit data in {file_path}, applying fix.")
                df = df[first_column].str.split(',', expand=True)
                df.columns = ['Time', 'Ax', 'Ay', 'Az', 'Gx', 'Gy', 'Gz']
            
            # Drop the "Time" column if it exists
            if 'Time' in df.columns:
                df = df.drop(columns=['Time'])
            
            # Rename columns to include the joint name
            df.columns = [f"{col}_{joint_name}" for col in df.columns]
            
            # Debugging prints
            print(f"Processing {file_path}")
            print(f"Renamed columns: {df.columns}")
            
            # Concatenate horizontally
            if combined_df.empty:
                combined_df = df
            else:
                combined_df = pd.concat([combined_df, df], axis=1)
        
        # Save the combined DataFrame to a CSV file
        combined_csv_path = os.path.join(base_dir, f'{subject}_combined_joint_data.csv')
        combined_df.to_csv(combined_csv_path, index=False)
        print(f"CSV file for {subject} created successfully.")

# Define the base directory where the subject folders are located
base_directory = 'New_Dataset'  # Replace with the actual path to your base directory

# Combine joint data for each subject
combine_joint_data_for_subjects(base_directory)


Inspecting New_Dataset/test1_new_recorded/RS.csv:
    Time     Ax     Ay    Az    Gx    Gy     Gz
0  14865  -0.00   0.01  1.00  0.64  0.01  -0.45
1  14870  -0.00   0.01  1.01  0.53  0.05  -0.34
2  14876   0.00   0.00  1.00  0.49  0.11  -0.36
3  14881  -0.00  -0.00  1.01  0.47  0.02  -0.53
4  14886   0.00   0.00  1.00  0.26  0.18  -0.56
Processing New_Dataset/test1_new_recorded/RS.csv
Renamed columns: Index(['Ax_RS', 'Ay_RS', 'Az_RS', 'Gx_RS', 'Gy_RS', 'Gz_RS'], dtype='object')
Inspecting New_Dataset/test1_new_recorded/RA.csv:
    Time     Ax    Ay    Az    Gx     Gy     Gz
0  14867  -0.00  0.00  1.01  0.22  -0.10   0.19
1  14872   0.00  0.00  1.00  0.25  -0.05   0.04
2  14878   0.00  0.00  1.00  0.26  -0.01   0.28
3  14883  -0.01  0.01  1.00  0.27  -0.08   0.30
4  14888   0.00  0.01  0.99  0.15  -0.11  -0.06
Processing New_Dataset/test1_new_recorded/RA.csv
Renamed columns: Index(['Ax_RA', 'Ay_RA', 'Az_RA', 'Gx_RA', 'Gy_RA', 'Gz_RA'], dtype='object')
Inspecting New_Dataset/test1_new_rec

In [13]:
# Define the base directory where the subject folders are located
base_directory = 'New_Dataset'  # Replace with the actual path to your base directory

# Combine joint data for each subject
combine_joint_data_for_subjects(base_directory)

Inspecting New_Dataset/test1_new_recorded/RS.csv:
    Time     Ax     Ay    Az    Gx    Gy     Gz
0  14865  -0.00   0.01  1.00  0.64  0.01  -0.45
1  14870  -0.00   0.01  1.01  0.53  0.05  -0.34
2  14876   0.00   0.00  1.00  0.49  0.11  -0.36
3  14881  -0.00  -0.00  1.01  0.47  0.02  -0.53
4  14886   0.00   0.00  1.00  0.26  0.18  -0.56
Processing New_Dataset/test1_new_recorded/RS.csv
Renamed columns: Index(['Ax_RS', 'Ay_RS', 'Az_RS', 'Gx_RS', 'Gy_RS', 'Gz_RS'], dtype='object')
Inspecting New_Dataset/test1_new_recorded/RA.csv:
    Time     Ax    Ay    Az    Gx     Gy     Gz
0  14867  -0.00  0.00  1.01  0.22  -0.10   0.19
1  14872   0.00  0.00  1.00  0.25  -0.05   0.04
2  14878   0.00  0.00  1.00  0.26  -0.01   0.28
3  14883  -0.01  0.01  1.00  0.27  -0.08   0.30
4  14888   0.00  0.01  0.99  0.15  -0.11  -0.06
Processing New_Dataset/test1_new_recorded/RA.csv
Renamed columns: Index(['Ax_RA', 'Ay_RA', 'Az_RA', 'Gx_RA', 'Gy_RA', 'Gz_RA'], dtype='object')
Inspecting New_Dataset/test1_new_rec