In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [60]:
current_path = os.getcwd()
parent_path = os.path.dirname(current_path) 
data_folder = os.path.join(parent_path,'data')

acc_folder = os.path.join(data_folder,'Acceleration')
grf_folder = os.path.join(data_folder,'GRF')
gyr_folder = os.path.join(data_folder,'Gyroscope')

In [61]:
rshank_cols = ['r shank_1','r shank_2','r shank_3']

#Read Acc data
acc_df = pd.DataFrame()
for csv in os.listdir(acc_folder):
    if '.csv' in csv:
        df = pd.read_csv(os.path.join(acc_folder,csv),usecols=rshank_cols)
        df['trial_id'] = csv.removesuffix('.csv')
        acc_df = pd.concat([acc_df,df])
acc_df = acc_df.reset_index()
acc_df = acc_df.rename(columns={'r shank_1':'acc_1','r shank_2':'acc_2','r shank_3':'acc_3'})
#Read gyro data       
gyr_df = pd.DataFrame()
for csv in os.listdir(gyr_folder):
    if '.csv' in csv:
        df = pd.read_csv(os.path.join(gyr_folder,csv),usecols=rshank_cols)
        df['trial_id'] = csv.removesuffix('.csv')
        gyr_df = pd.concat([gyr_df,df])
gyr_df = gyr_df.reset_index()
gyr_df = gyr_df.rename(columns={'r shank_1':'gyr_1','r shank_2':'gyr_2','r shank_3':'gyr_3'})
    
#Read grf data  
grf_cols = ['GRF_re_1', 'GRF_re_2',	'GRF_re_3']     
grf_df = pd.DataFrame()
for csv in os.listdir(grf_folder):
    if '.csv' in csv:
        df = pd.read_csv(os.path.join(grf_folder,csv),usecols=grf_cols)
        df['trial_id'] = csv.removesuffix('.csv')
        grf_df = pd.concat([grf_df,df])
grf_df = grf_df.reset_index()
grf_df = grf_df.rename(columns={'GRF_re_1':'grf_1','GRF_re_2':'grf_2','GRF_re_3':'grf_3'})


In [62]:
#Merge the data into a single dataframe
combined_df = acc_df.copy()
combined_df = combined_df.merge(gyr_df, how='inner', on=['trial_id','index'])
combined_df = combined_df.merge(grf_df, how='inner', on=['trial_id','index'])

In [63]:
combined_df.isnull().sum(axis=0)

index       0
acc_1       0
acc_2       0
acc_3       0
trial_id    0
gyr_1       0
gyr_2       0
gyr_3       0
grf_1       0
grf_2       0
grf_3       0
dtype: int64

In [64]:
combined_df

Unnamed: 0,index,acc_1,acc_2,acc_3,trial_id,gyr_1,gyr_2,gyr_3,grf_1,grf_2,grf_3
0,0,-3.068552,10.062727,-3.552338,WBDS05walkT02,-2.093580,2.166469,5.542162,5.004481,3.263469,-5.057525
1,1,-3.132334,10.657120,-3.872218,WBDS05walkT02,-2.070621,2.232922,5.397465,4.181824,2.634771,-3.850198
2,2,-3.140384,11.315799,-4.192135,WBDS05walkT02,-2.047895,2.295712,5.246922,3.393344,2.037612,-2.840496
3,3,-2.647038,11.591384,-4.096701,WBDS05walkT02,-2.024882,2.356410,5.090984,2.667465,1.492733,-2.074690
4,4,-2.209601,11.850034,-4.009583,WBDS05walkT02,-2.001589,2.409800,4.929500,2.021463,1.011743,-1.581231
...,...,...,...,...,...,...,...,...,...,...,...
1347168,4495,-0.549873,9.441714,-1.522437,WBDS20walkT03,0.653604,0.779570,-0.732108,0.486657,-33.389697,532.867478
1347169,4496,-0.590934,9.433526,-1.560083,WBDS20walkT03,0.659699,0.757854,-0.727324,1.720236,-33.222900,531.943837
1347170,4497,-0.629784,9.424232,-1.600065,WBDS20walkT03,0.665773,0.734899,-0.722472,2.758790,-33.093568,531.339881
1347171,4498,-0.643288,9.460828,-1.715508,WBDS20walkT03,0.671948,0.712611,-0.717865,3.585996,-32.997009,530.985490


In [65]:
#Save
output_path = os.path.join(data_folder,'parquet','raw','raw.parquet')
combined_df.to_parquet(output_path,engine='pyarrow')