In [13]:
import pandas as pd
from pathlib import Path

In [14]:
def raw_to_dataframe(session):
    def split_to_cycles(rec, lims):
        return [rec[lims[idx]:lims[idx + 1]] for idx in range(0, len(lims) - 2)]

    df = pd.DataFrame(columns=range(3 * 128 + 1))
    
    i = 0
    for subj in Path('../data/gait_raw/' + session).glob('subj_*'):
        for rec in subj.glob('*'):
            with open(rec.joinpath('3.txt')) as f:
                # read the content of the file
                lines = list(map(lambda line: [float(x) for x in line.strip().split(',')], f.readlines()))

                # select only relevant portion of the recording and generate cycles
                cycles = range(128, len(lines[0]), 128)
                lines = list(map(lambda x: split_to_cycles(x, cycles), lines))

                # select label of measurement
                label = int(subj.name.split('_')[1])

                for cycle in range(len(lines[0])):
                    l = []
                    for line in range(len(lines)):
                        l.extend(lines[line][cycle])
                    l.append(label)
                    df.loc[i] = l
                    i += 1
                    
    return df

In [15]:
df_s0 = raw_to_dataframe('session_0')
df_s1 = raw_to_dataframe('session_1')
df_s2 = raw_to_dataframe('session_2')

In [16]:
df_s0.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,375,376,377,378,379,380,381,382,383,384
0,0.2,0.24,0.24,0.32,0.4,0.4,0.36,0.32,0.32,0.24,...,-0.2,-0.2,-0.16,-0.08,-0.08,-0.08,-0.08,-0.04,-0.04,10.0
1,-0.04,-0.08,-0.12,-0.12,-0.12,-0.16,-0.16,-0.16,-0.16,-0.16,...,-0.16,-0.04,0.44,0.44,0.44,0.44,0.44,0.04,-0.16,10.0
2,0.08,0.12,0.16,0.16,0.16,0.12,0.08,0.04,-0.04,-0.12,...,-0.24,-0.24,-0.24,-0.28,-0.24,-0.24,-0.16,-0.16,-0.2,10.0
3,0.04,0.04,-0.28,-0.28,-0.32,0.0,0.0,-0.04,-0.04,-0.12,...,-0.04,-0.08,-0.12,-0.2,-0.2,-0.28,-0.28,-0.48,-0.52,10.0
4,0.0,0.08,0.08,0.08,0.0,-0.08,-0.2,-0.16,-0.04,-0.04,...,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.0,10.0


In [17]:
df_s0.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,375,376,377,378,379,380,381,382,383,384
count,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,...,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0,1117.0
mean,0.013648,0.012968,0.009466,0.007317,0.00732,0.00749,0.012825,0.011368,0.014023,0.012462,...,-0.131519,-0.132174,-0.134988,-0.132474,-0.129105,-0.126783,-0.129273,-0.129048,-0.13108,11.625783
std,0.263504,0.264223,0.268276,0.268199,0.266659,0.262062,0.260868,0.260832,0.256726,0.253932,...,0.240577,0.237431,0.23844,0.242895,0.243835,0.241672,0.241819,0.236346,0.236801,6.591483
min,-1.24,-1.16,-1.04,-1.28,-1.2,-1.2,-1.08,-1.12,-1.04,-1.08,...,-1.2,-1.0,-1.0,-1.04,-1.0,-1.0,-1.0,-0.96,-0.96,1.0
25%,-0.16,-0.16,-0.16,-0.16,-0.16,-0.16,-0.16,-0.125,-0.12,-0.125,...,-0.24,-0.25,-0.28,-0.25,-0.25,-0.24,-0.24,-0.24,-0.25,5.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.12,-0.12,-0.12,-0.12,-0.12,-0.12,-0.12,-0.12,-0.12,12.0
75%,0.16,0.16,0.16,0.16,0.16,0.16,0.16,0.16,0.16,0.16,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0
max,1.04,1.04,1.04,1.04,1.04,1.0,1.0,1.0,1.0,1.04,...,0.75,0.791667,0.75,0.75,0.708333,0.64,0.708333,0.75,0.875,22.0


In [22]:
df_s0.to_csv('../data/zju_raw_session_0_128.csv', sep='\t', encoding='utf-8', index=False)
df_s1.to_csv('../data/zju_raw_session_1_128.csv', sep='\t', encoding='utf-8', index=False)
df_s2.to_csv('../data/zju_raw_session_2_128.csv', sep='\t', encoding='utf-8', index=False)

In [27]:
df = pd.read_csv('../data/zju_raw_session_0_128.csv', sep='\t')

In [28]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,375,376,377,378,379,380,381,382,383,384
0,0.2,0.24,0.24,0.32,0.4,0.4,0.36,0.32,0.32,0.24,...,-0.2,-0.2,-0.16,-0.08,-0.08,-0.08,-0.08,-0.04,-0.04,10.0
1,-0.04,-0.08,-0.12,-0.12,-0.12,-0.16,-0.16,-0.16,-0.16,-0.16,...,-0.16,-0.04,0.44,0.44,0.44,0.44,0.44,0.04,-0.16,10.0
2,0.08,0.12,0.16,0.16,0.16,0.12,0.08,0.04,-0.04,-0.12,...,-0.24,-0.24,-0.24,-0.28,-0.24,-0.24,-0.16,-0.16,-0.2,10.0
3,0.04,0.04,-0.28,-0.28,-0.32,0.0,0.0,-0.04,-0.04,-0.12,...,-0.04,-0.08,-0.12,-0.2,-0.2,-0.28,-0.28,-0.48,-0.52,10.0
4,0.0,0.08,0.08,0.08,0.0,-0.08,-0.2,-0.16,-0.04,-0.04,...,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.04,0.0,10.0


In [31]:
df = pd.read_csv('../data/zju_gaitaccel_session_0_128.csv', header=None)

In [32]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
0,-1.0,-1.56,-1.04,0.471593,-0.310938,-0.924687,-0.26875,1.049456,0.03978,0.076241,...,0.0,0.0625,0.203125,0.179688,0.242188,0.171875,0.078125,0.0625,0.0,u001
1,-1.16,-1.36,-1.04,0.461303,-0.323437,-0.895938,-0.1925,1.016195,0.057276,0.081358,...,0.0,0.15625,0.140625,0.164062,0.226562,0.195312,0.078125,0.039062,0.0,u001
2,-0.88,-1.6,-0.92,0.493153,-0.3125,-0.962188,-0.239375,1.076075,0.039594,0.078883,...,0.0,0.078125,0.148438,0.171875,0.21875,0.226562,0.101562,0.054688,0.0,u001
3,-1.12,-1.36,-1.0,0.461303,-0.275938,-0.877812,-0.243125,0.985793,0.033071,0.086183,...,0.0,0.1875,0.148438,0.140625,0.234375,0.1875,0.070312,0.03125,0.0,u001
4,-1.28,-1.52,-1.08,0.401995,-0.3275,-0.9575,-0.2425,1.093631,0.067544,0.084819,...,0.0,0.101562,0.15625,0.148438,0.203125,0.1875,0.101562,0.078125,0.023438,u001
