In [1]:
import numpy as np
import sys
import os
import copy
import pandas as pd
import time

## Loading in the data
> Note: cannot use dataframes since this is NOT resampled for all gestures have different lengths!

In [2]:
#brc_path = "C:\\Users\\YamagamiLab\\Box\\Yamagami Lab\\Data\\$M data segmented\\segmented_filtered_data"
laptop_path = "C:\\Users\\kdmen\\Box\\Yamagami Lab\\Data\\2024_UIST_dataset\\upload\\segmented_raw_data"

In [3]:
# pID 101 doesn't exist
# each participant has 100 experimenter defined files and 50 user defined files
# 10 experimenter defined gestures and 5 user defined gestures

file_types = ["IMU_extract", "movavg_files"]
expt_types = ["experimenter-defined"] # user-defined, standardized, rehab

## Rehab is variations!
pIDs_impaired = ['P102','P103','P104','P105','P106','P107','P108','P109','P110','P111',
       'P112','P114','P115','P116','P118','P119','P121','P122','P123','P124','P125',
       'P126','P127','P128', 'P131', 'P132']
# remove participants P001 and P003 because they dont have duplicate or open gestures
pIDs_unimpaired = ['P004','P005','P006','P008','P010','P011']

pIDs_both = pIDs_impaired + pIDs_unimpaired

In [None]:
def load_data(pIDs, data_dir_path="C:\\Users\\kdmen\\Box\\Yamagami Lab\\Data\\2024_UIST_dataset\\upload\\segmented_raw_data", modalities=["E"], expt_types=["experimenter-defined"], num_emg_channels=16, num_imu_sensors=15):

    nested_dict = {}  # Avoid defaultdict for clarity and efficiency
    for expt_type in expt_types:
        for pid in pIDs:
            print(pid)
            pid_path = os.path.join(data_dir_path, pid)
            if not os.path.isdir(pid_path):
                print(f"Pidpath does not exist: {pid_path}")
                continue

            for file in os.listdir(pid_path):
                if expt_type not in file:
                    continue  # Skip files that don't match the expt_type substring
                if "E" in modalities and "I" in modalities:
                    if "EMG" not in file and "IMU" not in file:
                        continue
                else:
                    if "EMG" not in file and "E" in modalities and "I" not in modalities:
                        continue
                    elif "IMU" not in file and "I" in modalities and "E" not in modalities:
                        continue

                split_filename = file.split("_")
                if len(split_filename) < 6:
                    print(f"Unexpected filename format: {file}")
                    continue
                modality = split_filename[3]
                gestureID = split_filename[4]
                gestureNum = split_filename[5].rsplit(".", 1)[0]  # Remove .csv extension

                headers = None
                if modality == "EMG" and "E" in modalities:
                    headers = [f'EMG{i}' for i in range(1, num_emg_channels+1)]
                elif modality == "IMU" and "I" in modalities:
                    # TODO: The below +1 might not be enough?
                    headers = [f'IMU{j}_{axis}' for j in list(range(1, num_imu_sensors+1)) for axis in ['ax', 'ay', 'az', 'vx', 'vy', 'vz']]
                
                file_path = os.path.join(pid_path, file)
                
                try:
                    df = pd.read_csv(file_path, header=0)  # Load without specifying column names

                    # Drop the first column if it has no header (i.e., an unnamed timestamp/index)
                    if df.columns[0] == "":
                        df = df.iloc[:, 1:].reset_index(drop=True)  # Drop and reset indexing

                    # Ensure correct columns are being used
                    df = df[headers]

                except FileNotFoundError:
                    print(f"File does not exist: {file_path}")
                    continue
                except pd.errors.EmptyDataError:
                    print(f"DataFrame is empty for file: {file_path}")
                    continue
                except KeyError:
                    print(f"File {file} does not contain expected columns. Skipping.")
                    continue

                if pid not in nested_dict:
                    nested_dict[pid] = {}
                if gestureID not in nested_dict[pid]:
                    nested_dict[pid][gestureID] = {}
                if gestureNum not in nested_dict[pid][gestureID]:
                    nested_dict[pid][gestureID][gestureNum] = {}

                # Convert to list of lists (channels x timepoints)
                reshaped_data = df.T.values.tolist()  # Transpose to match expected format

                # Store the reshaped data by modality
                nested_dict[pid][gestureID][gestureNum][modality] = reshaped_data

    return nested_dict


# EMG Only Dataset

In [6]:
start_time = time.time()
nested_dict = load_data(pIDs_both, data_dir_path=laptop_path)
end_time = time.time()

print(f"\nCompleted in {end_time - start_time}s")

P102
P103
P104
P105
P106
P107
P108
P109
P110
P111
P112
P114
P115
P116
P118
P119
P121
P122
P123
P124
P125
P126
P127
P128
P131
P132
P004
P005
P006
P008
P010
P011

Completed in 298.0591824054718s


In [7]:
nested_dict.keys()

dict_keys(['P102', 'P103', 'P104', 'P105', 'P106', 'P107', 'P108', 'P109', 'P110', 'P111', 'P112', 'P114', 'P115', 'P116', 'P118', 'P119', 'P121', 'P122', 'P123', 'P124', 'P125', 'P126', 'P127', 'P128', 'P131', 'P132', 'P004', 'P005', 'P006', 'P008', 'P010', 'P011'])

In [8]:
gesture = nested_dict["P118"]["pan"]["5"]["EMG"]

In [9]:
print(type(gesture))

<class 'list'>


In [10]:
print(len(gesture))

16


In [11]:
assert(len(gesture)==16)

## Save Nested Dict

In [12]:
#brc_data_save_path = 'D:\\Kai_MetaGestureClustering_24\\saved_datasets\\filtered_datasets\\'
laptop_data_save_path = 'C:\\Users\\kdmen\\Box\\Meta_Gesture_2024\\saved_datasets'

full_filesave_path = laptop_data_save_path+'\\segraw_EMG_allgestures_allusers.json'

In [13]:
import json

# Save the dict to a JSON file
with open(full_filesave_path, 'w') as f:
    json.dump(nested_dict, f)

In [14]:
with open(full_filesave_path, 'r') as f:
    loaded_dict = json.load(f)

In [15]:
loaded_dict.keys()

dict_keys(['P102', 'P103', 'P104', 'P105', 'P106', 'P107', 'P108', 'P109', 'P110', 'P111', 'P112', 'P114', 'P115', 'P116', 'P118', 'P119', 'P121', 'P122', 'P123', 'P124', 'P125', 'P126', 'P127', 'P128', 'P131', 'P132', 'P004', 'P005', 'P006', 'P008', 'P010', 'P011'])

In [16]:
loaded_dict['P118'].keys()

dict_keys(['pan', 'delete', 'close', 'select-single', 'rotate', 'zoom-in', 'zoom-out', 'open', 'move', 'duplicate'])

In [17]:
loaded_dict['P118']['pan'].keys()

dict_keys(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'])

In [18]:
gesture = loaded_dict['P118']['pan']['1']["EMG"]

In [19]:
print(type(gesture))
print(len(gesture))

<class 'list'>
16
