In [1]:
import glob
import gzip
import os
import shutil
from collections import Counter

In [2]:
output_dir = "/root/data/smartwatch/subjects_2023-06-27"

In [3]:
imu_filepaths = glob.glob("/root/data/smartwatch/imu/*/*.gz")
imu_filepaths

['/root/data/smartwatch/imu/S82/S82_E5_R_2.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E6_L_1.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E5_L_1.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E2_R_1.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E4_R_2.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E5_L_2.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E1_L_1.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E1_R_2.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E8_L_1.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E1_R_1.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E3_L_1.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E7_L_2.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E5_R_1.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E9_R_2.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E8_R_2.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E3_R_2.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E7_L_1.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E9_R_1.csv.gz',
 '/root/data/smartwatch/imu/S82/S82_E8_L_2.csv.gz',
 '/root/data

In [5]:
for f in imu_filepaths:
    with gzip.GzipFile(f, "rb") as f_in:
        old_subject = f.split("/")[5]
        new_subject = f.split("/")[6].split("_")[0]
        save_name = f.split("/")[6].replace(".csv.gz", "_imu.csv")
        dest = f"{output_dir}/{new_subject}"
        if not os.path.exists(dest):
            os.makedirs(dest)
        with open(dest + "/" + save_name, "wb") as f_out:
            shutil.copyfileobj(f_in, f_out)

In [6]:
mocap_filepaths = glob.glob("/root/data/smartwatch/mocap_2023-06-27/*.csv")
mocap_filepaths

['/root/data/smartwatch/mocap_2023-06-27/S26_E1_R_1.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S32B_E8_R_1.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S04_E9_R_1.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S02_E1_R_1.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S80_E5_R_1.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S64_E5_R_2.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S82B_E3_R_2.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S49_E4_R_1.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S70_E5_R_2.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S32B_E4_R_2.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S31_E8_R_2.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S15_E8_R_2.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S09_E7_R_1.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S03B_E5_R_1.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S50_E5_R_1.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S80_E4_R_2.csv',
 '/root/data/smartwatch/mocap_2023-06-27/S04_E6_R_2.csv',
 '/root/da

In [8]:
for f in mocap_filepaths:
    with open(f, "rb") as f_in:
        subject = f.split("/")[5].split("_")[0]
        save_name = f.split("/")[5].replace(".csv", "_mocap.csv")
        dest = f"{output_dir}/{subject}"
        if not os.path.exists(dest):
            os.makedirs(dest)
        with open(dest + "/" + save_name, "wb") as f_out:
            shutil.copyfileobj(f_in, f_out)

In [10]:
counters = {}
valid_files = []
# Check which pairs we have
for subject_dir in glob.glob(f"{output_dir}/*"):
    subject_id = subject_dir.split("/")[-1]
    subject_files = glob.glob(subject_dir + "/*.csv")
    # There are fewer mocap files, just check if we have corresponding IMU files to those
    for mocap_file in glob.glob(subject_dir + "/*mocap.csv"):
        if mocap_file.replace("mocap", "imu") in subject_files:
            filename = mocap_file.split("/")[-1]
            subject, exercise, arm, movement_class, _ = filename.split("_")
            valid_files.append(subject_id + "/" + filename.replace("_mocap.csv", ""))
            counters.setdefault("subject", Counter()).update([subject])
            counters.setdefault("exercise", Counter()).update([exercise])
            counters.setdefault("arm", Counter()).update([arm])
            counters.setdefault("movement_class", Counter()).update([movement_class])

In [11]:
counters["exercise"]

Counter({'E7': 135,
         'E9': 136,
         'E1': 130,
         'E5': 135,
         'E2': 136,
         'E8': 133,
         'E6': 136,
         'E4': 135,
         'E3': 135})

In [12]:
counters["subject"]

Counter({'S82': 18,
         'S47': 18,
         'S24': 17,
         'S13': 18,
         'S19': 18,
         'S80': 18,
         'S52': 17,
         'S01B': 18,
         'S21': 9,
         'S02B': 18,
         'S30': 18,
         'S31': 17,
         'S30B': 18,
         'S08': 18,
         'S34': 18,
         'S32B': 18,
         'S04': 18,
         'S44': 18,
         'S25': 9,
         'S27': 17,
         'S06': 18,
         'S40': 18,
         'S32': 16,
         'S45': 18,
         'S17': 18,
         'S76': 9,
         'S03B': 18,
         'S35': 17,
         'S10': 18,
         'S49': 18,
         'S70B': 18,
         'S70': 18,
         'S42': 18,
         'S46': 18,
         'S48': 18,
         'S16': 18,
         'S18': 9,
         'S22': 18,
         'S29B': 17,
         'S03': 18,
         'S06B': 18,
         'S33': 17,
         'S53': 18,
         'S05': 18,
         'S38': 18,
         'S09': 18,
         'S08B': 18,
         'S15': 18,
         'S20': 17,
         'S55':

In [13]:
counters['arm'], counters["movement_class"]

(Counter({'R': 1211}),
 Counter({'2': 559, '1': 627, '1B': 12, '2B': 9, '1b': 1, '1C': 1, '2b': 2}))

In [14]:
valid_files

['S82/S82_E7_R_2',
 'S82/S82_E9_R_2',
 'S82/S82_E1_R_1',
 'S82/S82_E5_R_1',
 'S82/S82_E2_R_1',
 'S82/S82_E8_R_2',
 'S82/S82_E9_R_1',
 'S82/S82_E2_R_2',
 'S82/S82_E8_R_1',
 'S82/S82_E6_R_1',
 'S82/S82_E4_R_1',
 'S82/S82_E7_R_1',
 'S82/S82_E3_R_2',
 'S82/S82_E6_R_2',
 'S82/S82_E4_R_2',
 'S82/S82_E3_R_1',
 'S82/S82_E1_R_2',
 'S82/S82_E5_R_2',
 'S47/S47_E1_R_2',
 'S47/S47_E8_R_2',
 'S47/S47_E9_R_1',
 'S47/S47_E4_R_1',
 'S47/S47_E1_R_1',
 'S47/S47_E3_R_2',
 'S47/S47_E4_R_2',
 'S47/S47_E6_R_2',
 'S47/S47_E7_R_2',
 'S47/S47_E9_R_2',
 'S47/S47_E5_R_1',
 'S47/S47_E6_R_1',
 'S47/S47_E8_R_1',
 'S47/S47_E2_R_1',
 'S47/S47_E7_R_1',
 'S47/S47_E5_R_2',
 'S47/S47_E3_R_1',
 'S47/S47_E2_R_2',
 'S24/S24_E8_R_1',
 'S24/S24_E8_R_2',
 'S24/S24_E1_R_2',
 'S24/S24_E3_R_1',
 'S24/S24_E2_R_1',
 'S24/S24_E7_R_1',
 'S24/S24_E2_R_2',
 'S24/S24_E4_R_2',
 'S24/S24_E1_R_1B',
 'S24/S24_E4_R_1',
 'S24/S24_E6_R_2',
 'S24/S24_E5_R_2',
 'S24/S24_E5_R_1',
 'S24/S24_E9_R_1',
 'S24/S24_E3_R_2',
 'S24/S24_E9_R_2',
 'S24/S24_E

In [15]:
with open(f"{output_dir}/valid_files.txt", "w") as f:
    for line in sorted(valid_files):
        f.writelines([line + '\n'])