In [1]:
import os
import torch
import pickle
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [2]:
def _B_normalize_block(block_np, demean=True, eps=1e-8):
    """
    block_np: (T, D_block) numpy array for one biosignal (e.g., all IMU channels)
    Returns: (T, D_block) normalized per $B (demean per channel, divide by shared std over block)
    """
    if demean:
        block_np = block_np - block_np.mean(axis=0, keepdims=True)  # per-channel demean
    sigma = block_np.ravel().std(dtype=np.float64)
    if sigma < eps:
        return block_np  # flat signal; leave as-is
    return block_np / sigma

def preprocess_df_B_by_gesture(
    data_df: pd.DataFrame,
    biosignal_switch_ix: int = 72,   # [:switch) = IMU, [switch:] = EMG
    trial_length: int = 64,
    demean: bool = True,
    eps: float = 1e-8,
) -> pd.DataFrame:
    """
    Apply $B to every trial in the full dataframe.
    Assumptions:
      - data_df has ONLY sensor columns (no metadata), shape = (num_trials*trial_length, num_channels)
      - IMU columns come first, EMG columns follow
      - Each trial is a contiguous block of `trial_length` rows
    Returns: DataFrame with same shape/columns as input.
    """
    if data_df.isna().any().any():
        print("Warning: NaNs detected in input; consider cleaning first.")

    num_rows, num_cols = data_df.shape
    if num_rows % trial_length != 0:
        raise ValueError(f"Rows ({num_rows}) not divisible by trial_length ({trial_length}).")

    if not (0 < biosignal_switch_ix < num_cols):
        raise ValueError(f"biosignal_switch_ix {biosignal_switch_ix} must be in (0, {num_cols}).")

    num_trials = num_rows // trial_length
    cols = data_df.columns
    X = data_df.to_numpy(dtype=np.float64, copy=True)  # (N, D)

    for t in range(num_trials):
        s = t * trial_length
        e = s + trial_length
        trial = X[s:e, :]  # (T, D)

        imu_block = trial[:, :biosignal_switch_ix]
        emg_block = trial[:, biosignal_switch_ix:]

        imu_block = _B_normalize_block(imu_block, demean=demean, eps=eps)
        emg_block = _B_normalize_block(emg_block, demean=demean, eps=eps)

        X[s:e, :biosignal_switch_ix] = imu_block
        X[s:e, biosignal_switch_ix:] = emg_block

    out = pd.DataFrame(X, columns=cols, index=data_df.index)
    return out


In [3]:
def return_full_yX_timeseries_df(config):
    """
    Reads the raw pandas files, processes them, groups by PID and Gesture,
    converts to pure PyTorch Tensors, and saves to disk.
    This eliminates all Pandas memory leaks during DataLoader multiprocessing.
    """
    print("Processing raw data into pure Tensor dictionaries...")

    emg_imu_pkl_full_path = config["emg_imu_pkl_full_path"]
    pwmd_xlsx_filepath = config["pwmd_xlsx_filepath"]
    pwoutmd_xlsx_filepath = config["pwoutmd_xlsx_filepath"]
    
    data_df = pd.read_pickle(emg_imu_pkl_full_path)

    metadata_cols = ['Participant', 'Gesture_ID', 'Gesture_Num']
    metadata_cols_df = data_df[metadata_cols].rename(columns={"Participant": "PID"})
    metadata_cols_df['Gesture_Num'] = metadata_cols_df['Gesture_Num'].astype(int)

    # PID encoder
    all_PIDs = metadata_cols_df['PID']
    unique_PIDs = all_PIDs.unique()
    PID_encoder = LabelEncoder().fit(unique_PIDs)

    # Gesture encoder
    gesture_ID_label_encoder = LabelEncoder()
    metadata_cols_df['Enc_Gesture_ID'] = gesture_ID_label_encoder.fit_transform(metadata_cols_df['Gesture_ID'])
    metadata_cols_df['Enc_PID'] = PID_encoder.transform(metadata_cols_df['PID'])

    # Signals
    X_df = data_df.drop(metadata_cols, axis=1)
    ppd_B_X_df = preprocess_df_B_by_gesture(X_df)

    # Demographics (with & without disabilities)
    FULL_pwmd_demo_df = pd.read_excel(pwmd_xlsx_filepath)
    
    # Fix 1: Add .copy() to avoid SettingWithCopy warnings
    pwmd_demo_df = FULL_pwmd_demo_df[[ 
        "PID", "disability coding", "time disabled", "Actual handedness", 
        "What is your age?", "What is your gender?", "BMI", "DASH score" 
    ]][:-8].copy()
    # Fix 2: Use .str.strip() to access string methods on the Series
    pwmd_demo_df["time disabled"] = pd.to_numeric(
        pwmd_demo_df["time disabled"].astype(str).str.strip(), 
        errors='coerce'
    )

    numeric_cols = pwmd_demo_df.select_dtypes(include='number').columns
    pwmd_demo_df[numeric_cols] = pwmd_demo_df[numeric_cols] / 100.0
    pwmd_demo_df["BMI"] = pwmd_demo_df["BMI"] / 70.0
    pwmd_demo_df['Enc_PID'] = PID_encoder.transform(pwmd_demo_df["PID"])

    FULL_pwoutmd_demo_df = pd.read_excel(pwoutmd_xlsx_filepath)
    # 1. Added .copy() to ensure this is a standalone DataFrame
    pwoutmd_demo_df = FULL_pwoutmd_demo_df[[
        "PID", "disability coding", "time disabled", "Actual handedness",
        "What is your age?", "What is your gender?", "BMI", "DASH score"
    ]][:-5].copy()
    # 2. Fixed the .str.strip() syntax
    pwoutmd_demo_df["time disabled"] = pd.to_numeric(
        pwoutmd_demo_df["time disabled"].astype(str).str.strip(), 
        errors='coerce'
    )
    # 3. Scale numeric columns
    numeric_cols2 = pwoutmd_demo_df.select_dtypes(include='number').columns
    pwoutmd_demo_df[numeric_cols2] = pwoutmd_demo_df[numeric_cols2] / 100.0
    pwoutmd_demo_df["BMI"] = pwoutmd_demo_df["BMI"] / 70.0
    pwoutmd_demo_df = pwoutmd_demo_df[~pwoutmd_demo_df['PID'].isin(['P001', 'P003'])]
    pwoutmd_demo_df['Enc_PID'] = PID_encoder.transform(pwoutmd_demo_df["PID"])

    combined_demo_df = pd.concat([pwmd_demo_df, pwoutmd_demo_df])

    demoENC_df = pd.get_dummies(
        combined_demo_df,
        columns=["disability coding", "Actual handedness", "What is your gender?"],
        drop_first=True
    )
    cols_to_convert = demoENC_df.columns.difference(['PID'])
    demoENC_df[cols_to_convert] = demoENC_df[cols_to_convert].astype(float)

    full_yX_timeseries_df = pd.concat([metadata_cols_df, ppd_B_X_df], axis=1)
    return full_yX_timeseries_df

In [4]:
# ==========================================
# DATA PREPARATION (Run Once & Save)
# ==========================================
def process_and_save_tensor_dict(config):
    """
    Reads the raw pandas files, processes them, groups by PID and Gesture,
    converts to pure PyTorch Tensors, and saves to disk.
    This eliminates all Pandas memory leaks during DataLoader multiprocessing.
    """
    print("Processing raw data into pure Tensor dictionaries...")

    emg_imu_pkl_full_path = config["emg_imu_pkl_full_path"]
    pwmd_xlsx_filepath = config["pwmd_xlsx_filepath"]
    pwoutmd_xlsx_filepath = config["pwoutmd_xlsx_filepath"]
    
    # We want to build a dictionary structure: 
    # data_dict[PID][Gesture_Num] = { 'timeseries': Tensor(N, len), 'demo': Tensor(D) }
    # [Insert your existing raw data loading logic here: pd.read_pickle, demographics, _B_normalize_block]
    # For brevity, assuming you end up with `full_yX_timeseries_df` and `demoENC_df` as before.
    # Imagine full_yX_timeseries_df has columns: ['PID', 'Gesture_Num', 'sensor_features...']
    # And demoENC_df has demographics indexed by PID.
    data_df = pd.read_pickle(emg_imu_pkl_full_path)

    metadata_cols = ['Participant', 'Gesture_ID', 'Gesture_Num']
    metadata_cols_df = data_df[metadata_cols].rename(columns={"Participant": "PID"})
    metadata_cols_df['Gesture_Num'] = metadata_cols_df['Gesture_Num'].astype(int)

    # PID encoder
    all_PIDs = metadata_cols_df['PID']
    unique_PIDs = all_PIDs.unique()
    PID_encoder = LabelEncoder().fit(unique_PIDs)

    # Gesture encoder
    gesture_ID_label_encoder = LabelEncoder()
    metadata_cols_df['Enc_Gesture_ID'] = gesture_ID_label_encoder.fit_transform(metadata_cols_df['Gesture_ID'])
    metadata_cols_df['Enc_PID'] = PID_encoder.transform(metadata_cols_df['PID'])

    # Signals
    X_df = data_df.drop(metadata_cols, axis=1)
    ppd_B_X_df = preprocess_df_B_by_gesture(X_df)

    # Demographics (with & without disabilities)
    FULL_pwmd_demo_df = pd.read_excel(pwmd_xlsx_filepath)
    
    # Fix 1: Add .copy() to avoid SettingWithCopy warnings
    pwmd_demo_df = FULL_pwmd_demo_df[[ 
        "PID", "disability coding", "time disabled", "Actual handedness", 
        "What is your age?", "What is your gender?", "BMI", "DASH score" 
    ]][:-8].copy()
    # Fix 2: Use .str.strip() to access string methods on the Series
    pwmd_demo_df["time disabled"] = pd.to_numeric(
        pwmd_demo_df["time disabled"].astype(str).str.strip(), 
        errors='coerce'
    )

    numeric_cols = pwmd_demo_df.select_dtypes(include='number').columns
    pwmd_demo_df[numeric_cols] = pwmd_demo_df[numeric_cols] / 100.0
    pwmd_demo_df["BMI"] = pwmd_demo_df["BMI"] / 70.0
    pwmd_demo_df['Enc_PID'] = PID_encoder.transform(pwmd_demo_df["PID"])

    ##########################################################################
    # --- Verification Prints ---
    print("--- Data Integrity Check ---")
    print(f"Total rows after slice: {len(pwmd_demo_df)}")
    # Check how many values failed to convert to numeric (resulted in NaN)
    nans_created = pwmd_demo_df["time disabled"].isna().sum()
    print(f"Rows that couldn't be converted (set to NaN): {nans_created}")
    # Show a sample of the 'time disabled' column to verify values
    print("\nSample of 'time disabled' after conversion:")
    print(pwmd_demo_df["time disabled"].head())
    # Verify the division worked on numeric columns
    print("\nMax value in numeric columns (should be small if divided by 100):")
    print(pwmd_demo_df[numeric_cols].max())
    ##########################################################################

    ##########################################################
    print("\n--- pwmd_demo_df: First Row Entry ---")
    print(f"Unique disability codings: {pwmd_demo_df['disability coding'].unique()}")
    print(pwmd_demo_df.head(1))
    ##########################################################

    FULL_pwoutmd_demo_df = pd.read_excel(pwoutmd_xlsx_filepath)
    # 1. Added .copy() to ensure this is a standalone DataFrame
    pwoutmd_demo_df = FULL_pwoutmd_demo_df[[
        "PID", "disability coding", "time disabled", "Actual handedness",
        "What is your age?", "What is your gender?", "BMI", "DASH score"
    ]][:-5].copy()
    # 2. Fixed the .str.strip() syntax
    pwoutmd_demo_df["time disabled"] = pd.to_numeric(
        pwoutmd_demo_df["time disabled"].astype(str).str.strip(), 
        errors='coerce'
    )
    # 3. Scale numeric columns
    numeric_cols2 = pwoutmd_demo_df.select_dtypes(include='number').columns
    pwoutmd_demo_df[numeric_cols2] = pwoutmd_demo_df[numeric_cols2] / 100.0
    pwoutmd_demo_df["BMI"] = pwoutmd_demo_df["BMI"] / 70.0
    pwoutmd_demo_df = pwoutmd_demo_df[~pwoutmd_demo_df['PID'].isin(['P001', 'P003'])]
    pwoutmd_demo_df['Enc_PID'] = PID_encoder.transform(pwoutmd_demo_df["PID"])

    ##########################################################
    print("\n--- pwoutmd_demo_df: First Row Entry ---")
    print(f"Unique disability codings: {pwoutmd_demo_df['disability coding'].unique()}")
    print(pwoutmd_demo_df.head(1))
    ##########################################################

    combined_demo_df = pd.concat([pwmd_demo_df, pwoutmd_demo_df])

    # 1. Generate dummies but KEEP the 'PID' column for now
    # 1. Create dummies (which produces the True/False columns)
    demoENC_df = pd.get_dummies(
        combined_demo_df,
        columns=["disability coding", "Actual handedness", "What is your gender?"],
        drop_first=True
    )

    # 2. Convert the entire DataFrame (except PID) to float immediately
    # This turns True -> 1.0 and False -> 0.0
    cols_to_convert = demoENC_df.columns.difference(['PID'])
    demoENC_df[cols_to_convert] = demoENC_df[cols_to_convert].astype(float)

    # REMOVE THIS LINE: 
    # TODO: Why did I remove this? I dont think PID or Enc_PID get used in the demographic embedding vector (gotta find where that is...)
    # demoENC_df.drop(columns=["PID"], inplace=True) 

    ###############################################################################
    ###   DEBUGGING DEMOGRAPHICS   ###
    # TODO: Do I want it to include (Enc_)PID or not?
    ## For ML, no
    ## But presumably this is the only place where PID is passed through? Which would be useful for MOE and conditioning...
    ## Is this the only place where it is passsed through?
    # 1. Print the shape (Rows, Columns)
    print(f"Shape of DataFrame: {demoENC_df.shape}")
    # 2. Print the column names as a list (so you can count them easily)
    print(f"\n--- Names of the {len(demoENC_df.columns.tolist())} Columns ---")
    print(demoENC_df.columns.tolist())
    # 3. Print the first row to see the data types and values
    print("\n--- First Row Entry ---")
    print(demoENC_df.head(1))
    ###############################################################################

    full_yX_timeseries_df = pd.concat([metadata_cols_df, ppd_B_X_df], axis=1)

    data_dict = {}
    pids = full_yX_timeseries_df['PID'].unique()

    for pid in pids:
        data_dict[pid] = {}
        pid_data = full_yX_timeseries_df[full_yX_timeseries_df['PID'] == pid]

        all_cols = pid_data.columns.tolist()
        emg_cols = [c for c in all_cols if 'EMG' in str(c).upper()]
        imu_cols = [c for c in all_cols if 'IMU' in str(c).upper()]

        # LOGGING: Always print this once so you can verify the count!
        print(f"Detected {len(emg_cols)} EMG columns and {len(imu_cols)} IMU columns.")
        
        # 2. Filter using 'PID', then drop BOTH 'PID' and 'Enc_PID' 
        # before converting the remaining numeric values to a tensor.
        matching_demo = demoENC_df[demoENC_df['PID'] == pid]
        
        if not matching_demo.empty:
            # Drop the ID columns so they aren't part of the feature vector
            features = matching_demo.drop(columns=['PID', 'Enc_PID']).values[0]
            demo_tensor = torch.tensor(features, dtype=torch.float32)
        else:
            print(f"Skipping {pid}: No demographic match found.")
            continue
        
        gestures = pid_data['Gesture_Num'].unique()
        for g in gestures:
            # Filter for specific gesture (this currently grabs all 640 rows)
            g_df = pid_data[pid_data['Gesture_Num'] == g]
            
            # 1. Extract the raw numpy values
            emg_raw = g_df[emg_cols].values # Shape: (640, 16)
            imu_raw = g_df[imu_cols].values # Shape: (640, 72)
            
            # 2. Convert to Tensors and RESHAPE to (Trials, Time, Channels)
            # We use .view(10, 64, -1) which means: 
            # 10 trials, 64 time points, and "calculate the rest" for channels.
            try:
                emg_tensor = torch.tensor(emg_raw, dtype=torch.float32).view(10, 64, 16)
                imu_tensor = torch.tensor(imu_raw, dtype=torch.float32).view(10, 64, 72)
            except RuntimeError as e:
                print(f"Error reshaping PID {pid} Gesture {g}: {e}")
                print(f"Actual shape was {emg_raw.shape}. Expected 640 rows.")
                continue

            data_dict[pid][g] = {
                'emg': emg_tensor, # Now shape (10, 64, 16)
                'imu': imu_tensor, # Now shape (10, 64, 72)
                'demo': demo_tensor
            }
            
    save_path = os.path.join(config["dfs_save_path"], f"{config['timestamp']}_tensor_dict.pkl")
    with open(save_path, 'wb') as f:
        pickle.dump(data_dict, f)
    print(f"Saved clean Tensor dictionary to {save_path}")
    return save_path


In [5]:
from datetime import datetime

timestamp = datetime.now().strftime("%Y%m%d_%H%M")

In [6]:
config = {}

config["timestamp"] = timestamp
config["emg_imu_pkl_full_path"] = 'C:\\Users\\kdmen\\Box\\Yamagami Lab\\Data\\Meta_Gesture_Project\\filtered_datasets\\metadata_IMU_EMG_allgestures_allusers.pkl'
config["pwmd_xlsx_filepath"] = "C:\\Users\\kdmen\\Repos\\pers-gest-cls\\dataset\\Biosignal gesture questionnaire for participants with disabilities.xlsx"
config["pwoutmd_xlsx_filepath"] = "C:\\Users\\kdmen\\Repos\\pers-gest-cls\\dataset\\Biosignal gesture questionnaire for participants without disabilities.xlsx"
config["dfs_save_path"] = "C:\\Users\\kdmen\\Repos\\pers-gest-cls\\dataset\\meta-learning-sup-que-ds\\"
config["dfs_load_path"] = "C:\\Users\\kdmen\\Repos\\pers-gest-cls\\dataset\\meta-learning-sup-que-ds\\"
config["saved_df_timestamp"] = '20250917_1217'
config["user_split_json_filepath"] = "C:\\Users\\kdmen\\Repos\\pers-gest-cls\\system\\fixed_user_splits\\4kfcv_splits_shared_test.json"
config["results_save_dir"] = f"C:\\Users\\kdmen\\Repos\\pers-gest-cls\\system\\results\\local_{timestamp}"
config["models_save_dir"] = f"C:\\Users\\kdmen\\Repos\\pers-gest-cls\\system\\models\\local_{timestamp}"


In [7]:
save_location = process_and_save_tensor_dict(config)

Processing raw data into pure Tensor dictionaries...
--- Data Integrity Check ---
Total rows after slice: 26
Rows that couldn't be converted (set to NaN): 0

Sample of 'time disabled' after conversion:
0    0.35
1    0.20
2    0.10
3    0.52
4    0.31
Name: time disabled, dtype: float64

Max value in numeric columns (should be small if divided by 100):
time disabled        0.52
What is your age?    0.77
DASH score           0.82
dtype: float64

--- pwmd_demo_df: First Row Entry ---
Unique disability codings: ['SCI' 'other' 'MD' 'PN' 'ET']
    PID disability coding  time disabled Actual handedness  What is your age?   
0  P102               SCI           0.35             Right               0.61  \

  What is your gender?      BMI  DASH score  Enc_PID  
0                Woman  0.42188        0.39        6  

--- pwoutmd_demo_df: First Row Entry ---
Unique disability codings: ['No Disability']
    PID disability coding  time disabled Actual handedness  What is your age?   
2  P004     No

In [8]:
import pickle
import torch

# Path to the tensor_dict.pkl you just created
file_path = "C:\\Users\\kdmen\\Repos\\pers-gest-cls\\dataset\\meta-learning-sup-que-ds\\maml_tensor_dict.pkl"

with open(file_path, 'rb') as f:
    data_dict = pickle.load(f)

# 1. See how many PIDs (users) are in there
pids = list(data_dict.keys())
print(f"Total Users: {len(pids)}")

# 2. Pick the first user and the first gesture to inspect shapes
sample_pid = pids[0]
sample_gesture = list(data_dict[sample_pid].keys())[0]
sample_data = data_dict[sample_pid][sample_gesture]

print(f"\n--- Inspecting User: {sample_pid}, Gesture: {sample_gesture} ---")
print(f"EMG Shape:  {sample_data['emg'].shape}  (Expected: [Timesteps, 16])")
print(f"IMU Shape:  {sample_data['imu'].shape}  (Expected: [Timesteps, 72])")
print(f"Demo Shape: {sample_data['demo'].shape}")

# 3. Check for NaNs (important for training stability)
if torch.isnan(sample_data['emg']).any():
    print("\nWARNING: Found NaNs in EMG data!")
else:
    print("\nData looks clean (no NaNs).")

Total Users: 32

--- Inspecting User: P102, Gesture: 1 ---
EMG Shape:  torch.Size([10, 64, 16])  (Expected: [Timesteps, 16])
IMU Shape:  torch.Size([10, 64, 72])  (Expected: [Timesteps, 72])
Demo Shape: torch.Size([11])

Data looks clean (no NaNs).


In [9]:
import pandas as pd
import torch
import pickle
import numpy as np

def verify_segmentation(original_df, tensor_dict_path, pid='P102', gesture_num=1):
    # 1. Load the saved Tensor Dictionary
    with open(tensor_dict_path, 'rb') as f:
        data_dict = pickle.load(f)
    
    # 2. Extract Trial 0 (the first 64 rows) from the Tensor
    # Shape: (10, 64, 16) -> We take [0, :10, 0] to see the first 10 timepoints of the 1st channel
    tensor_emg = data_dict[pid][gesture_num]['emg']
    trial_0_first_10_tensor = tensor_emg[0, :10, 0].numpy() 

    # 3. Extract the same values from the original Pandas DF
    # We filter by PID and Gesture, then take the first 10 rows of the first EMG column
    emg_cols = [c for c in original_df.columns if 'EMG' in str(c).upper()]
    first_emg_col = emg_cols[0]
    
    df_segment = original_df[(original_df['PID'] == pid) & 
                             (original_df['Gesture_Num'] == gesture_num)]
    
    # In the original DF, the first 10 rows of this gesture SHOULD be Trial 0
    df_first_10_values = df_segment[first_emg_col].iloc[:10].values

    # 4. Compare
    print(f"--- Verification for User {pid}, Gesture {gesture_num} ---")
    print(f"EMG Column tested: {first_emg_col}")
    print(f"{'Index':<8} | {'Original DF Value':<20} | {'Tensor (Trial 0) Value':<20}")
    print("-" * 60)
    
    matches = 0
    for i in range(10):
        val_df = df_first_10_values[i]
        val_tensor = trial_0_first_10_tensor[i]
        is_match = np.isclose(val_df, val_tensor, atol=1e-5)
        match_str = "MATCH" if is_match else "MISMATCH!"
        if is_match: matches += 1
        
        print(f"{i:<8} | {val_df:<20.6f} | {val_tensor:<20.6f} | {match_str}")

    if matches == 10:
        print("\n✅ SUCCESS: The first 10 rows of the DataFrame mapped perfectly to Trial 0, Time 0-9.")
    else:
        print("\n❌ ERROR: Values do not match. The slicing or reshaping logic is offset.")
        

In [10]:
full_yX_timeseries_df = return_full_yX_timeseries_df(config)
save_path = save_location

print(full_yX_timeseries_df.shape)
full_yX_timeseries_df.head()

Processing raw data into pure Tensor dictionaries...
(204800, 93)


Unnamed: 0,PID,Gesture_ID,Gesture_Num,Enc_Gesture_ID,Enc_PID,IMU1_ax,IMU1_ay,IMU1_az,IMU1_vx,IMU1_vy,...,EMG7,EMG8,EMG9,EMG10,EMG11,EMG12,EMG13,EMG14,EMG15,EMG16
0,P102,pan,1,5,6,-0.551109,-0.738972,-0.985439,0.181924,0.059616,...,-0.276292,-0.026736,-0.87387,-1.036152,-0.58093,-0.719494,-0.502255,-1.750091,-0.127847,-0.094192
1,P102,pan,1,5,6,-0.571115,-0.821726,-0.975036,0.242607,0.067375,...,-0.125822,0.089679,-0.816215,-2.082635,-0.006283,-0.139439,-0.367764,-0.208084,-0.111811,-0.039009
2,P102,pan,1,5,6,-0.509305,-0.823575,-0.947221,0.550111,0.013848,...,-0.068451,0.117076,-0.668221,-3.403064,-0.52603,-0.478294,-0.300443,0.203266,0.1133,0.004728
3,P102,pan,1,5,6,-0.511788,-0.77581,-0.947939,0.417919,0.087222,...,-0.058907,0.080977,-0.424416,-3.709413,-0.570894,-0.775155,-0.14471,-0.619539,0.146499,0.199975
4,P102,pan,1,5,6,-0.441369,-0.921726,-0.882652,1.25497,0.108993,...,-0.003929,0.041526,-0.01653,-4.07515,-0.12771,2.682791,-0.14175,-0.208404,-0.035642,0.172662


In [11]:
verify_segmentation(full_yX_timeseries_df, save_path, pid='P102', gesture_num=1)

--- Verification for User P102, Gesture 1 ---
EMG Column tested: EMG1
Index    | Original DF Value    | Tensor (Trial 0) Value
------------------------------------------------------------
0        | -0.362743            | -0.362743            | MATCH
1        | -0.351553            | -0.351553            | MATCH
2        | -0.380825            | -0.380825            | MATCH
3        | -0.366795            | -0.366795            | MATCH
4        | -0.245578            | -0.245578            | MATCH
5        | -0.171063            | -0.171063            | MATCH
6        | -0.201667            | -0.201667            | MATCH
7        | -0.165951            | -0.165951            | MATCH
8        | -0.014897            | -0.014897            | MATCH
9        | 0.034381             | 0.034381             | MATCH

✅ SUCCESS: The first 10 rows of the DataFrame mapped perfectly to Trial 0, Time 0-9.


In [12]:
verify_segmentation(full_yX_timeseries_df, save_path, pid='P112', gesture_num=3)

--- Verification for User P112, Gesture 3 ---
EMG Column tested: EMG1
Index    | Original DF Value    | Tensor (Trial 0) Value
------------------------------------------------------------
0        | -0.043198            | -0.043198            | MATCH
1        | -0.030466            | -0.030466            | MATCH
2        | -0.049066            | -0.049066            | MATCH
3        | -0.070309            | -0.070309            | MATCH
4        | -0.069989            | -0.069989            | MATCH
5        | -0.036132            | -0.036132            | MATCH
6        | -0.037199            | -0.037199            | MATCH
7        | -0.048633            | -0.048633            | MATCH
8        | -0.054633            | -0.054633            | MATCH
9        | -0.034075            | -0.034075            | MATCH

✅ SUCCESS: The first 10 rows of the DataFrame mapped perfectly to Trial 0, Time 0-9.


In [13]:
verify_segmentation(full_yX_timeseries_df, save_path, pid='P124', gesture_num=7)

--- Verification for User P124, Gesture 7 ---
EMG Column tested: EMG1
Index    | Original DF Value    | Tensor (Trial 0) Value
------------------------------------------------------------
0        | -0.221235            | -0.221235            | MATCH
1        | -0.215883            | -0.215883            | MATCH
2        | -0.209682            | -0.209682            | MATCH
3        | -0.150030            | -0.150030            | MATCH
4        | 0.104091             | 0.104091             | MATCH
5        | 0.501215             | 0.501215             | MATCH
6        | 0.570865             | 0.570865             | MATCH
7        | 0.292914             | 0.292914             | MATCH
8        | 0.324206             | 0.324206             | MATCH
9        | 0.342103             | 0.342103             | MATCH

✅ SUCCESS: The first 10 rows of the DataFrame mapped perfectly to Trial 0, Time 0-9.
