In [1]:
import glob
import gzip
import os
import shutil
from collections import Counter
from pathlib import Path
import re
from datetime import datetime

In [2]:
output_dir = Path(f'E:/Datasets/0-Processed_Datasets/{datetime.now().strftime("%Y-%m-%d")}_imu-vicon-data')

imu_filepaths = Path("E:/Datasets/Smartwatch_NAR").glob('**/*.gz')
mocap_filepaths = Path('E:/Datasets/Mocap_Data').glob('*.csv')

In [3]:
pattern = re.compile(r'\((\d+)B\)')
for f in imu_filepaths:
    with gzip.GzipFile(f, 'rb') as f_in:
        if 'Pilots' in str(f): # ignore pilot files
            continue   
        if pattern.search(f.parts[3]): # if contains '__B' subject number
            sub_folder = f.parts[3].split('(')[0]
            sub_name = sub_folder
            save_name = f.name.replace(".csv.gz", "_imu.csv")
            save_name = f'{sub_name}_{save_name[5:]}'
        else:
            sub_folder = f.parts[3]
            sub_name = f.parts[4].split('_')[0]
            save_name = f.parts[4].replace(".csv.gz", "_imu.csv")

        dest = Path(output_dir).joinpath(sub_name)
        dest.mkdir(parents=True, exist_ok=True)

        with open(dest.joinpath(save_name), 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)

In [4]:
# create dic pair of repeat subjects
repeat_subs = {
    'S02B': 'S43',
    'S11B': 'S50',
    'S06B': 'S51',
    'S03B': 'S56',
    'S07B': 'S57',
    'S01B': 'S58',
    'S12B': 'S59',
    'S30B': 'S60',
    'S04B': 'S61',
    'S29B': 'S62',
    'S32B': 'S63',
    'S16B': 'S65',
    'S08B': 'S66',
    'S73B': 'S79',
    'S82B': 'S83',
    'S70B': 'S84',
}

In [5]:
repeat_subs.keys()

dict_keys(['S02B', 'S11B', 'S06B', 'S03B', 'S07B', 'S01B', 'S12B', 'S30B', 'S04B', 'S29B', 'S32B', 'S16B', 'S08B', 'S73B', 'S82B', 'S70B'])

In [6]:
for f in mocap_filepaths:
    with open(f, "rb") as f_in:
        sub_name = f.name.split('_')[0]
        if sub_name in repeat_subs.keys():
            sub_name = repeat_subs[sub_name]
            save_name = f.name.replace(".csv", "_mocap.csv")
            save_name = f'{sub_name}_{save_name[5:]}'
        else:
            save_name = f.name.replace(".csv", "_mocap.csv")
        
        dest = Path(output_dir).joinpath(sub_name)
        dest.mkdir(parents=True, exist_ok=True)

        with open(dest.joinpath(save_name), 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)

In [7]:
counters = {}
_sub_files = []
valid_files = []
# Check which pairs we have
for sub_dir in output_dir.glob('*'):
    sub_id = sub_dir.parts[-1]
    sub_files = sub_dir.glob('*.csv')
    for i in sub_files:
        _sub_files.append(i)
    for mocap_file in sub_dir.glob('*mocap.csv'):
        if Path(str(mocap_file).replace("mocap", "imu")) in _sub_files: # check for imu files matching subject
            filename = mocap_file.name
            subject, exercise, arm, movement_class, _ = filename.split("_")
            valid_files.append(sub_id + "/" + filename.replace("_mocap.csv", ""))
            counters.setdefault("subject", Counter()).update([subject])
            counters.setdefault("exercise", Counter()).update([exercise])
            counters.setdefault("arm", Counter()).update([arm])
            counters.setdefault("movement_class", Counter()).update([movement_class])

In [8]:
counters["exercise"]

Counter({'E2': 140,
         'E3': 140,
         'E5': 140,
         'E6': 140,
         'E9': 140,
         'E4': 139,
         'E7': 139,
         'E1': 138,
         'E8': 138})

In [9]:
counters["subject"]

Counter({'S01': 18,
         'S02': 18,
         'S03': 18,
         'S04': 18,
         'S05': 18,
         'S06': 18,
         'S07': 18,
         'S08': 18,
         'S09': 18,
         'S10': 18,
         'S11': 18,
         'S12': 18,
         'S13': 18,
         'S14': 18,
         'S15': 18,
         'S16': 18,
         'S17': 18,
         'S19': 18,
         'S22': 18,
         'S23': 18,
         'S26': 18,
         'S27': 18,
         'S29': 18,
         'S30': 18,
         'S31': 18,
         'S32': 18,
         'S34': 18,
         'S35': 18,
         'S38': 18,
         'S39': 18,
         'S40': 18,
         'S41': 18,
         'S42': 18,
         'S43': 18,
         'S44': 18,
         'S45': 18,
         'S46': 18,
         'S47': 18,
         'S48': 18,
         'S49': 18,
         'S50': 18,
         'S51': 18,
         'S53': 18,
         'S54': 18,
         'S55': 18,
         'S56': 18,
         'S57': 18,
         'S58': 18,
         'S59': 18,
         'S60': 18,


In [10]:
counters['arm'], counters["movement_class"]

(Counter({'R': 1254}),
 Counter({'1': 644, '2': 577, '1B': 18, '2B': 11, '2b': 2, '1C': 1, '1b': 1}))

In [11]:
valid_files

['S01/S01_E1_R_1',
 'S01/S01_E1_R_2',
 'S01/S01_E2_R_1',
 'S01/S01_E2_R_2',
 'S01/S01_E3_R_1',
 'S01/S01_E3_R_2',
 'S01/S01_E4_R_1',
 'S01/S01_E4_R_2',
 'S01/S01_E5_R_1',
 'S01/S01_E5_R_2',
 'S01/S01_E6_R_1',
 'S01/S01_E6_R_2',
 'S01/S01_E7_R_1B',
 'S01/S01_E7_R_2',
 'S01/S01_E8_R_1',
 'S01/S01_E8_R_2',
 'S01/S01_E9_R_1',
 'S01/S01_E9_R_2',
 'S02/S02_E1_R_1',
 'S02/S02_E1_R_2',
 'S02/S02_E2_R_1B',
 'S02/S02_E2_R_2',
 'S02/S02_E3_R_1',
 'S02/S02_E3_R_2',
 'S02/S02_E4_R_1',
 'S02/S02_E4_R_2',
 'S02/S02_E5_R_1',
 'S02/S02_E5_R_2B',
 'S02/S02_E6_R_1',
 'S02/S02_E6_R_2',
 'S02/S02_E7_R_1',
 'S02/S02_E7_R_2',
 'S02/S02_E8_R_1',
 'S02/S02_E8_R_2',
 'S02/S02_E9_R_1',
 'S02/S02_E9_R_2',
 'S03/S03_E1_R_1',
 'S03/S03_E1_R_2',
 'S03/S03_E2_R_1',
 'S03/S03_E2_R_2',
 'S03/S03_E3_R_1',
 'S03/S03_E3_R_2',
 'S03/S03_E4_R_1',
 'S03/S03_E4_R_2',
 'S03/S03_E5_R_1',
 'S03/S03_E5_R_2',
 'S03/S03_E6_R_1',
 'S03/S03_E6_R_2',
 'S03/S03_E7_R_1',
 'S03/S03_E7_R_2',
 'S03/S03_E8_R_1',
 'S03/S03_E8_R_2',
 'S03/S03

In [12]:
with open(f"{output_dir}/valid_files.txt", "w") as f:
    for line in sorted(valid_files):
        f.writelines([line + '\n'])