In [2]:
import pandas as pd
import os

In [4]:
# Define correct column names (from PAMAP2 documentation)
columns = ['timestamp', 'activity_id', 'heart_rate',
           #IMU Hand
           'hand_temp', 'acc_hand_16g_x','acc_hand_16g_y', 'acc_hand_16g_z', 'acc_hand_6g_x','acc_hand_6g_y', 'acc_hand_6g_z', 
           'gyro_hand_x', 'gyro_hand_y', 'gyro_hand_z', 'mag_hand_x', 'mag_hand_y', 'mag_hand_z', 
           'orient_hand_w', 'orient_hand_x','orient_hand_y', 'orient_hand_z', 
           #IMU Chest
           'chest_temp', 'acc_chest_16g_x', 'acc_chest_16g_y', 'acc_chest_16g_z', 'acc_chest_6g_x', 'acc_chest_6g_y', 'acc_chest_6g_z',
           'gyro_chest_x', 'gyro_chest_y', 'gyro_chest_z', 'mag_chest_x', 'mag_chest_y', 'mag_chest_z',
           'orient_chest_w', 'orient_chest_x','orient_chest_y', 'orient_chest_z', 
           #IMU Ankle
           'ankle_temp', 'acc_ankle_16g_x','acc_ankle_16g_y', 'acc_ankle_16g_z', 'acc_ankle_6g_x','acc_ankle_6g_y', 'acc_ankle_6g_z',
           'gyro_ankle_x', 'gyro_ankle_y', 'gyro_ankle_z', 'mag_ankle_x', 'mag_ankle_y', 'mag_ankle_z',
           'orient_ankle_w', 'orient_ankle_x', 'orient_ankle_y', 'orient_ankle_z']


data_folder = 'datafiles/'  

In [6]:
combined_data = []

for filename in os.listdir(data_folder):
    if filename.endswith('.dat'):
        subject_id = int(filename.replace('subject', '').replace('.dat', ''))
        file_path = os.path.join(data_folder, filename)
        
        # Read using whitespace delimiter
        df = pd.read_csv(file_path, sep=r'\s+', header=None, names=columns, engine='python')

        df['subject_id'] = subject_id
        combined_data.append(df)
        print(f"Loaded subject {subject_id} | First activity: {df['activity_id'].iloc[0]}")

Loaded subject 108 | First activity: 0
Loaded subject 109 | First activity: 0
Loaded subject 107 | First activity: 0
Loaded subject 106 | First activity: 0
Loaded subject 104 | First activity: 0
Loaded subject 105 | First activity: 0
Loaded subject 101 | First activity: 0
Loaded subject 102 | First activity: 0
Loaded subject 103 | First activity: 0


In [8]:
# Combine all subjects into one DataFrame
full_data = pd.concat(combined_data, ignore_index=True)
print("Combined shape:", full_data.shape)

Combined shape: (2872533, 55)


In [10]:
full_data.head()

Unnamed: 0,timestamp,activity_id,heart_rate,hand_temp,acc_hand_16g_x,acc_hand_16g_y,acc_hand_16g_z,acc_hand_6g_x,acc_hand_6g_y,acc_hand_6g_z,...,gyro_ankle_y,gyro_ankle_z,mag_ankle_x,mag_ankle_y,mag_ankle_z,orient_ankle_w,orient_ankle_x,orient_ankle_y,orient_ankle_z,subject_id
0,5.89,0,,33.3125,-9.7976,-1.4567,1.01438,-9.70702,-1.60337,1.32098,...,0.010364,0.010731,-35.455,38.3829,-17.9083,0.546211,0.370903,0.695334,0.283892,108
1,5.9,0,,33.3125,-9.91204,-1.41822,0.97405,-9.78262,-1.60285,1.29085,...,-0.005926,-0.006704,-35.4508,38.2543,-17.7663,0.545806,0.370867,0.69568,0.283872,108
2,5.91,0,,33.3125,-9.87531,-1.57027,0.975113,-9.78237,-1.57265,1.29079,...,-0.002265,0.014646,-35.6984,38.2688,-17.058,0.545484,0.371607,0.695437,0.284117,108
3,5.92,0,,33.3125,-9.72175,-1.6087,1.05452,-9.70677,-1.60334,1.35118,...,0.035314,0.010982,-34.9492,38.7447,-17.6233,0.545474,0.372077,0.694859,0.284935,108
4,5.93,0,,33.3125,-9.7992,-1.49497,0.975999,-9.58657,-1.57408,1.27549,...,-0.001065,-0.00041,-35.2105,38.8857,-17.908,0.54581,0.372349,0.694122,0.285732,108


In [12]:
full_data.to_csv('combined_data.csv', index=False)

In [14]:
print("Unique subjects:", full_data['subject_id'].unique())
print("Total subjects:", full_data['subject_id'].nunique())

Unique subjects: [108 109 107 106 104 105 101 102 103]
Total subjects: 9


In [16]:
subject_101_data = full_data[full_data['subject_id'] == 101]
print(f"Shape of data: {subject_101_data.shape}")
print(subject_101_data.head())

Shape of data: (376417, 55)
         timestamp  activity_id  heart_rate  hand_temp  acc_hand_16g_x  \
1796283       8.38            0       104.0       30.0         2.37223   
1796284       8.39            0         NaN       30.0         2.18837   
1796285       8.40            0         NaN       30.0         2.37357   
1796286       8.41            0         NaN       30.0         2.07473   
1796287       8.42            0         NaN       30.0         2.22936   

         acc_hand_16g_y  acc_hand_16g_z  acc_hand_6g_x  acc_hand_6g_y  \
1796283         8.60074         3.51048        2.43954        8.76165   
1796284         8.56560         3.66179        2.39494        8.55081   
1796285         8.60107         3.54898        2.30514        8.53644   
1796286         8.52853         3.66021        2.33528        8.53622   
1796287         8.83122         3.70000        2.23055        8.59741   

         acc_hand_6g_z  ...  gyro_ankle_y  gyro_ankle_z  mag_ankle_x  \
1796283        3

In [None]:
#columns = ['timestamp', 'activity_id', 'heart_rate'] + \
#         [f'imu_hand_{i}' for i in range(1, 18)] + \
#          [f'imu_chest_{i}' for i in range(1, 18)] + \
#         [f'imu_ankle_{i}' for i in range(1, 18)]
