In [1]:
import numpy as np
import pandas as pd
import os
import sys
from tqdm import tqdm
import matplotlib.pyplot as plt

In [2]:
directory = "../datasets/realWorld"

obls = ["chest", "forearm", "head", "shin", "upperarm", "waist", "thigh"]
activities = ["running", "jumping", "climbingdown", "climbingup", "walking", "lying", "sitting", "standing"]
obls_map = {"chest":0, "forearm":1, "head":2, "shin":3, "upperarm":4, "waist":5, "thigh":6}
activities_map = {"running":0, "jumping":1, "climbingdown":2, "climbingup":3, "walking":4, "lying":5, "sitting":6, "standing":7}

def nameToIndex(obl, act):
    index = obls_map[obl]*len(activities_map) + activities_map[act]
    return index

def indexToName(index):
    obl = index//len(activities_map)
    act = index%len(activities_map)
    return obls[obl] + "_" +  activities[act]


file_names = list()
for subdir, dirs, files in os.walk(directory):
    
    if not (dirs):
        csv_files = [os.path.join(subdir, f) for f in files if ".csv" in f]
        file_names = file_names + csv_files


file_names = (file_names)
parsed_files = list()
parsed_targets = list()
for obl in obls:
    obl_files = [f for f in file_names if obl in f]
    for activity in activities:
        obl_activity_files = [f for f in obl_files if activity in f]
        parsed_files = parsed_files + obl_activity_files
        target_classes = [nameToIndex(obl,activity)]*len(obl_activity_files)
        parsed_targets = parsed_targets + target_classes
        



# lets just assume 50Hz and no resampling, the error this causes is minimal
samples_len = 50*2 #seconds
Acc_x = list()
Acc_y = list()
Acc_z = list()
Label = list()
for data in tqdm(zip(parsed_files,parsed_targets),total=len(parsed_targets)):
    csv, target = data
    csv_part = pd.read_csv(csv)
    for i in range(0,int(csv_part.shape[0]/samples_len)):
        sample = csv_part.iloc[i*samples_len:(i+1)*samples_len]
        sample_x = sample["attr_x"].reset_index(drop=True) 
        sample_y = sample["attr_y"].reset_index(drop=True) 
        sample_z = sample["attr_z"].reset_index(drop=True)
        sample_label = pd.DataFrame([target])

        Acc_x.append(sample_x)

        Acc_y.append(sample_y)
        Acc_z.append(sample_z)
        Label.append(sample_label)
Acc_x = pd.concat(Acc_x, ignore_index=True, axis=1).transpose()
Acc_y = pd.concat(Acc_y, ignore_index=True, axis=1).transpose()
Acc_z = pd.concat(Acc_z, ignore_index=True, axis=1).transpose()
Label = pd.concat(Label, ignore_index=True, axis=1).transpose()

100%|██████████| 880/880 [00:37<00:00, 23.53it/s]


In [3]:
joint_names = list()
for i in range(len(obls_map)*len(activities_map)):  
    joint_names.append(indexToName(i))
Label_names = pd.DataFrame(joint_names, columns=["Label"])

In [4]:
Acc_x.to_csv("../datasets/RW16/train/Acc_x.txt", index=False, header=False, sep=" ")
Acc_y.to_csv("../datasets/RW16/train/Acc_y.txt", index=False, header=False, sep=" ")
Acc_z.to_csv("../datasets/RW16/train/Acc_z.txt", index=False, header=False, sep=" ")
Label.to_csv("../datasets/RW16/train/Label.txt", index=False, header=False, sep=" ")
Label_names.to_csv("../datasets/RW16/Label_names.txt", index=False, header=False, sep=" ")