In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

In [None]:
DATA_DIR = '../data/raw/realdisp'
DATA_FILES = sorted(os.listdir(DATA_DIR))

In [3]:
SENSOR_PLACEMENT = ['RLA', 'RUA', 'BACK', 'LUA', 'LLA', 'RC', 'RT', 'LT', 'LC']
SENSOR_LIST = ['ACC_X', 'ACC_Y', 'ACC_Z', 'GYR_X', 'GYR_Y', 'GYR_Z', 
               'MAG_X', 'MAG_Y', 'MAG_Z', 'QUAT_1', 'QUAT_2', 'QUAT_3', 'QUAT_4']

DATA_COLUMNS = ['TIME_SECOND', 'TIME_MICROSECOND']

SENSOR_READINGS = list()

for loc in SENSOR_PLACEMENT:
    for sensor in SENSOR_LIST:
        SENSOR_READINGS.append(str(loc + '_' + sensor))
        
DATA_COLUMNS.extend(SENSOR_READINGS)
DATA_COLUMNS.append('LABEL')

In [4]:
SENSOR_READINGS

['RLA_ACC_X',
 'RLA_ACC_Y',
 'RLA_ACC_Z',
 'RLA_GYR_X',
 'RLA_GYR_Y',
 'RLA_GYR_Z',
 'RLA_MAG_X',
 'RLA_MAG_Y',
 'RLA_MAG_Z',
 'RLA_QUAT_1',
 'RLA_QUAT_2',
 'RLA_QUAT_3',
 'RLA_QUAT_4',
 'RUA_ACC_X',
 'RUA_ACC_Y',
 'RUA_ACC_Z',
 'RUA_GYR_X',
 'RUA_GYR_Y',
 'RUA_GYR_Z',
 'RUA_MAG_X',
 'RUA_MAG_Y',
 'RUA_MAG_Z',
 'RUA_QUAT_1',
 'RUA_QUAT_2',
 'RUA_QUAT_3',
 'RUA_QUAT_4',
 'BACK_ACC_X',
 'BACK_ACC_Y',
 'BACK_ACC_Z',
 'BACK_GYR_X',
 'BACK_GYR_Y',
 'BACK_GYR_Z',
 'BACK_MAG_X',
 'BACK_MAG_Y',
 'BACK_MAG_Z',
 'BACK_QUAT_1',
 'BACK_QUAT_2',
 'BACK_QUAT_3',
 'BACK_QUAT_4',
 'LUA_ACC_X',
 'LUA_ACC_Y',
 'LUA_ACC_Z',
 'LUA_GYR_X',
 'LUA_GYR_Y',
 'LUA_GYR_Z',
 'LUA_MAG_X',
 'LUA_MAG_Y',
 'LUA_MAG_Z',
 'LUA_QUAT_1',
 'LUA_QUAT_2',
 'LUA_QUAT_3',
 'LUA_QUAT_4',
 'LLA_ACC_X',
 'LLA_ACC_Y',
 'LLA_ACC_Z',
 'LLA_GYR_X',
 'LLA_GYR_Y',
 'LLA_GYR_Z',
 'LLA_MAG_X',
 'LLA_MAG_Y',
 'LLA_MAG_Z',
 'LLA_QUAT_1',
 'LLA_QUAT_2',
 'LLA_QUAT_3',
 'LLA_QUAT_4',
 'RC_ACC_X',
 'RC_ACC_Y',
 'RC_ACC_Z',
 'RC_GYR_X',
 'RC_

In [4]:
def get_metadata(filename:str):
    _name = filename.split('.')[0]
    
    subject = int(''.join(filter(str.isdigit, _name.split('_')[0])))
    disp = ''.join(i for i in _name.split('_')[1] if not i.isdigit())
    
    return subject, disp

In [5]:
def get_merged_REALDISP_data():
    merged_df = pd.DataFrame()
    
    for d_file in tqdm(DATA_FILES):
        subject, disp = get_metadata(d_file)
        
        data = np.loadtxt(os.path.join(DATA_DIR, d_file))
        df = pd.DataFrame.from_records(data)
        
        df = df[df[119] != 0.0].reset_index(drop=True)
        df.columns = DATA_COLUMNS
        
        df['SUBJECT'] = subject
        df['DISPLACEMENT'] = disp
        
        merged_df = pd.concat([merged_df, df], ignore_index=True)
        
    merged_df = merged_df.sort_values(by=['TIME_SECOND', 'TIME_MICROSECOND'], ignore_index=True)
    
    idx = merged_df[(merged_df['TIME_SECOND'] == 0.0) & (merged_df['TIME_MICROSECOND'] == 0.0)].index
    merged_df = merged_df.drop(idx, inplace=False).reset_index(drop=True)
    
    return merged_df

In [6]:
merged_df = get_merged_REALDISP_data()

100%|██████████| 46/46 [15:05<00:00, 19.68s/it]


In [5]:
merged_df

NameError: name 'merged_df' is not defined

In [8]:
merged_df.to_csv('../data/clean_realdisp_data.csv', index=False)

In [6]:
realdisp_data = pd.read_csv('../data/clean_realdisp_data.csv')

In [10]:
realdisp_data

Unnamed: 0,TIME_SECOND,TIME_MICROSECOND,RLA_ACC_X,RLA_ACC_Y,RLA_ACC_Z,RLA_GYR_X,RLA_GYR_Y,RLA_GYR_Z,RLA_MAG_X,RLA_MAG_Y,...,LC_MAG_X,LC_MAG_Y,LC_MAG_Z,LC_QUAT_1,LC_QUAT_2,LC_QUAT_3,LC_QUAT_4,LABEL,SUBJECT,DISPLACEMENT
0,11.0,520000.0,-7.4613,-0.66154,4.6290,-3.20340,0.75196,-0.17525,0.74363,-0.007458,...,0.72301,0.20873,0.263760,0.59156,0.427050,0.60380,-0.321110,29.0,15,ideal
1,11.0,540000.0,-7.4412,0.37053,5.3020,-1.77090,0.58809,-0.16867,0.74298,-0.002966,...,0.72131,0.20452,0.274680,0.59036,0.426510,0.60812,-0.315830,29.0,15,ideal
2,11.0,560000.0,-7.3393,2.65950,4.8126,-1.27190,0.46421,-0.21316,0.74316,0.000257,...,0.71868,0.20100,0.283350,0.58836,0.426760,0.61236,-0.311010,29.0,15,ideal
3,11.0,580000.0,-7.6996,2.54520,5.0558,-1.56090,0.39034,-0.31989,0.74633,0.007229,...,0.71682,0.19971,0.294170,0.58555,0.427580,0.61665,-0.306680,29.0,15,ideal
4,11.0,600000.0,-7.9549,1.78220,5.0668,-1.66070,0.28158,-0.37719,0.74740,0.012301,...,0.71371,0.19864,0.304010,0.58176,0.429380,0.62100,-0.302580,29.0,15,ideal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1906938,3959.0,680000.0,-5.8689,-7.23520,-15.0790,-1.23210,2.89100,2.68700,0.35368,0.238270,...,0.44676,-0.27431,0.003387,0.73100,-0.048641,0.68040,0.018247,3.0,15,mutual
1906939,3959.0,700000.0,-5.0028,-10.73400,-18.8810,-1.29650,3.47120,2.46690,0.33366,0.189950,...,0.46424,-0.26598,0.076237,0.67964,-0.049279,0.73173,-0.014970,3.0,15,mutual
1906940,3959.0,720000.0,-4.5963,-16.24600,-25.8080,-1.67470,3.39150,2.50380,0.30211,0.141550,...,0.45156,-0.24547,0.131790,0.63765,-0.054429,0.76704,-0.045624,3.0,15,mutual
1906941,3959.0,740000.0,-5.5831,-18.21200,-26.2500,-0.32121,2.90110,2.20520,0.28251,0.101010,...,0.43868,-0.23517,0.166120,0.60844,-0.074795,0.78873,-0.045984,3.0,15,mutual
