In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time

from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report, accuracy_score

from utils.ae_eval_and_vis import *
from utils.ae_torch_classes import *
from utils.LSTM_gesture_classifier import *

In [2]:
import warnings
warnings.filterwarnings('ignore')
# C:\Users\kdmen\miniconda3\envs\fl_torch\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning warnings.warn(

# Load in Data

In [3]:
print("Loading")

# Kai's laptop
#data_path = "C:\\Users\\kdmen\\Desktop\\Research\\Data\\$M\\PCA_40D\\"
#model_dir_path = 'C:\\Users\\kdmen\\Desktop\\Research\\Repos\\fl-gestures\\models\\Embedding\\Autoencoders\\'
# BRC Desktop
data_path = "D:\\Kai_MetaGestureClustering_24\\saved_datasets\\"
model_dir_path = 'C:\\Users\\YamagamiLab\\Desktop\\Dev\\fl-gestures\\models\\Embedding\\Autoencoders\\'

emg_dir = "filtered_datasets\\EMG_PPD\\"
both_dir = "filtered_datasets\\Both_PPD\\"

metadata_cols = ['Participant', 'Gesture_ID', 'Gesture_Num']
metadata_cols_df = pd.read_pickle('D:\\Kai_MetaGestureClustering_24\\saved_datasets\\metadata_cols_df.pkl')

emg_training_users_df = pd.read_pickle(data_path+emg_dir+'training_users_df.pkl').drop(metadata_cols, axis=1)
emg_test_users_df = pd.read_pickle(data_path+emg_dir+'test_users_df.pkl').drop(metadata_cols, axis=1)

both_training_users_df = pd.read_pickle(data_path+both_dir+'training_users_df.pkl').drop(metadata_cols, axis=1)
both_test_users_df = pd.read_pickle(data_path+both_dir+'test_users_df.pkl').drop(metadata_cols, axis=1)

Loading


In [4]:
num_rows_per_gesture = 64 # From the interp
batch_size = 32

In [5]:
print(emg_training_users_df.shape)
emg_training_users_df.head()

(160000, 16)


Unnamed: 0,EMG1,EMG2,EMG3,EMG4,EMG5,EMG6,EMG7,EMG8,EMG9,EMG10,EMG11,EMG12,EMG13,EMG14,EMG15,EMG16
0,-0.362743,-0.801651,-0.383077,-0.195299,-0.203047,-0.464472,-0.276292,-0.026736,-0.87387,-1.036152,-0.58093,-0.719494,-0.502255,-1.750091,-0.127847,-0.094192
1,-0.351553,-0.775334,-0.382545,-0.154773,-0.131977,-0.295204,-0.125822,0.089679,-0.816215,-2.082635,-0.006283,-0.139439,-0.367764,-0.208084,-0.111811,-0.039009
2,-0.380825,-0.762588,-0.398388,-0.085411,0.017528,-0.205675,-0.068451,0.117076,-0.668221,-3.403064,-0.52603,-0.478294,-0.300443,0.203266,0.1133,0.004728
3,-0.366795,-0.765464,-0.374423,-0.073225,0.183172,0.009277,-0.058907,0.080977,-0.424416,-3.709413,-0.570894,-0.775155,-0.14471,-0.619539,0.146499,0.199975
4,-0.245578,-0.761283,-0.303976,-0.081947,0.224996,0.103319,-0.003929,0.041526,-0.01653,-4.07515,-0.12771,2.682791,-0.14175,-0.208404,-0.035642,0.172662


In [6]:
print(both_training_users_df.shape)
both_training_users_df.head()

(160000, 88)


Unnamed: 0,IMU1_ax,IMU1_ay,IMU1_az,IMU1_vx,IMU1_vy,IMU1_vz,IMU2_ax,IMU2_ay,IMU2_az,IMU2_vx,...,EMG7,EMG8,EMG9,EMG10,EMG11,EMG12,EMG13,EMG14,EMG15,EMG16
0,-0.551109,-0.738972,-0.985439,0.181924,0.059616,0.087024,1.055804,-0.883268,-0.327978,-0.221822,...,-0.276292,-0.026736,-0.87387,-1.036152,-0.58093,-0.719494,-0.502255,-1.750091,-0.127847,-0.094192
1,-0.571115,-0.821726,-0.975036,0.242607,0.067375,-0.0249,1.007074,-0.944195,-0.363602,-0.149833,...,-0.125822,0.089679,-0.816215,-2.082635,-0.006283,-0.139439,-0.367764,-0.208084,-0.111811,-0.039009
2,-0.509305,-0.823575,-0.947221,0.550111,0.013848,-0.485765,0.993332,-0.944029,-0.357476,-0.164447,...,-0.068451,0.117076,-0.668221,-3.403064,-0.52603,-0.478294,-0.300443,0.203266,0.1133,0.004728
3,-0.511788,-0.77581,-0.947939,0.417919,0.087222,-0.229441,1.026003,-0.911993,-0.335594,-0.006214,...,-0.058907,0.080977,-0.424416,-3.709413,-0.570894,-0.775155,-0.14471,-0.619539,0.146499,0.199975
4,-0.441369,-0.921726,-0.882652,1.25497,0.108993,-0.933639,1.001748,-0.939228,-0.357917,0.009543,...,-0.003929,0.041526,-0.01653,-4.07515,-0.12771,2.682791,-0.14175,-0.208404,-0.035642,0.172662


Make the dataloaders for the autoencoders

In [7]:
# CREATE THE TRAINING SET
num_gestures = len(emg_training_users_df) // num_rows_per_gesture
num_full_emg_features = emg_training_users_df.shape[1]
emg_train_loader = create_data_loader(emg_training_users_df, GestureDatasetAE)
# CREATE THE TEST SET
num_test_gestures = len(emg_test_users_df) // num_rows_per_gesture
emg_test_loader = create_data_loader(emg_test_users_df, GestureDatasetAE, shuffle_bool=False)

# CREATE THE TRAINING SET
num_gestures = len(both_training_users_df) // num_rows_per_gesture
num_full_both_features = both_training_users_df.shape[1]
both_train_loader = create_data_loader(both_training_users_df, GestureDatasetAE)
# CREATE THE TEST SET
num_test_gestures = len(both_test_users_df) // num_rows_per_gesture
both_test_loader = create_data_loader(both_test_users_df, GestureDatasetAE, shuffle_bool=False)

In [8]:
hidden_dim_lst = [64, 22]
input_dim = 88
RNNAE_64_22mir_BothFull = RNNAutoencoder(input_dim, hidden_dim_lst, num_layers=None, seq_len=num_rows_per_gesture, progressive=False, mirror=True)
RNNAE_64_22mir_BothFull.load_state_dict(torch.load(model_dir_path+'RNNAE_64_22mir_vallossp262_BothFull.pth'))

average_test_loss, sample_data_lst, reconstructions_lst, latent_representations_lst = eval_on_testset_and_return_original_and_reconstructed(RNNAE_64_22mir_BothFull, both_test_loader)

latent_representations = np.concatenate(latent_representations_lst)
latent_representations_reshaped = latent_representations.reshape(latent_representations.shape[0], -1)


Average testing loss across the entire test_loader: 0.2686733954719135


In [9]:
hidden_dim_lst = [9]
input_dim = 16
RNNAE_9mir_EMGFull = RNNAutoencoder(input_dim, hidden_dim_lst, num_layers=None, seq_len=num_rows_per_gesture, progressive=False, mirror=True)
RNNAE_9mir_EMGFull.load_state_dict(torch.load(model_dir_path+'RNNAE_9mir_vallossp277_EMGFull.pth'))

average_test_loss, sample_data_lst, reconstructions_lst, latent_representations_lst = eval_on_testset_and_return_original_and_reconstructed(RNNAE_9mir_EMGFull, emg_test_loader)

latent_representations = np.concatenate(latent_representations_lst)
latent_representations_reshaped = latent_representations.reshape(latent_representations.shape[0], -1)


Average testing loss across the entire test_loader: 0.2833991277785528


One hot encode
> All datasets have the same number of rows and thus the same label sets (they only differ in the number of columns!)

In [10]:
emg_test_users_df.head()

Unnamed: 0,EMG1,EMG2,EMG3,EMG4,EMG5,EMG6,EMG7,EMG8,EMG9,EMG10,EMG11,EMG12,EMG13,EMG14,EMG15,EMG16
6400,-0.590579,-0.287949,-0.30621,-0.286026,-0.067811,-0.022725,-0.030116,-0.301059,-5.375536,-0.672272,0.00355,-0.034412,-0.015434,-0.004368,-0.132297,-0.127927
6401,-0.538529,-0.31662,-0.312567,-0.225234,-0.040725,-0.002531,-0.050021,-0.234298,-5.365962,-0.618651,0.022224,-0.002252,-0.007844,-0.018952,0.016946,-0.09342
6402,-0.521673,-0.321927,-0.321917,-0.383057,-0.02638,0.003411,-0.021233,-0.146613,-5.357741,-0.599404,-0.006983,-0.036363,-0.015657,-0.009868,0.270363,0.30181
6403,-0.27455,-0.28336,-0.290955,-0.607226,-0.048867,-0.0058,0.01106,0.036846,-5.534573,-0.600492,-0.006797,0.041818,0.001948,0.004594,0.287805,0.519149
6404,-0.203965,-0.305826,-0.301109,-0.372556,-0.062966,0.004885,-0.009311,-0.093618,-5.317473,-0.602479,0.040034,0.045638,0.013134,0.015332,-0.016407,-0.127118


In [11]:
emg_train_df = pd.read_pickle(data_path+emg_dir+'training_users_df.pkl')
emg_test_df = pd.read_pickle(data_path+emg_dir+'test_users_df.pkl')
emg_train_gesture_labels_df = emg_train_df['Gesture_ID']
emg_test_gesture_labels_df = emg_test_df['Gesture_ID']

both_train_df = pd.read_pickle(data_path+both_dir+'training_users_df.pkl')
both_test_df = pd.read_pickle(data_path+both_dir+'test_users_df.pkl')
both_train_gesture_labels_df = both_train_df['Gesture_ID']
both_test_gesture_labels_df = both_test_df['Gesture_ID']


In [12]:
all_train_test_gestures_df = pd.concat([emg_train_gesture_labels_df, emg_test_gesture_labels_df], axis=0)
unique_gestures = all_train_test_gestures_df.unique()
print(len(unique_gestures))
print(unique_gestures)

10
['pan' 'duplicate' 'zoom-out' 'zoom-in' 'move' 'rotate' 'select-single'
 'delete' 'close' 'open']


Create OneHotEncoder

In [13]:
label_per_gesture_df = emg_train_gesture_labels_df.iloc[::num_rows_per_gesture]

# TRAIN LABELS (OHE)
labels_array = label_per_gesture_df.to_numpy()
gesture_encoder = OneHotEncoder(sparse=False)
labels_reshaped = labels_array.reshape(-1, 1)
labels_encoded = gesture_encoder.fit_transform(labels_reshaped)
encoded_labels_df = pd.DataFrame(labels_encoded, columns=gesture_encoder.categories_[0])

# TEST LABELS (OHE)
label_per_test_gesture_df = emg_test_gesture_labels_df.iloc[::num_rows_per_gesture]
gesture_encoder = OneHotEncoder(sparse=False)
test_labels_encoded = gesture_encoder.fit_transform(label_per_test_gesture_df.to_numpy().reshape(-1, 1))
encoded_test_labels_df = pd.DataFrame(test_labels_encoded, columns=gesture_encoder.categories_[0])

print(encoded_labels_df.shape)
encoded_labels_df.head()

(2500, 10)


Unnamed: 0,close,delete,duplicate,move,open,pan,rotate,select-single,zoom-in,zoom-out
0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [14]:
X_3DTensor_full_emg = torch.tensor(emg_training_users_df.to_numpy().reshape(num_gestures, num_rows_per_gesture, num_full_emg_features), dtype=torch.float32)
Xtest_3DTensor_full_emg = torch.tensor(emg_test_users_df.to_numpy().reshape(num_test_gestures, num_rows_per_gesture, num_full_emg_features), dtype=torch.float32)

X_3DTensor_full_both = torch.tensor(both_training_users_df.to_numpy().reshape(num_gestures, num_rows_per_gesture, num_full_both_features), dtype=torch.float32)
Xtest_3DTensor_full_both = torch.tensor(both_test_users_df.to_numpy().reshape(num_test_gestures, num_rows_per_gesture, num_full_both_features), dtype=torch.float32)


Get latent embeddings

In [15]:
X_full_emg_train_embeddings = RNNAE_9mir_EMGFull.encode(X_3DTensor_full_emg)
X_full_emg_test_embeddings = RNNAE_9mir_EMGFull.encode(Xtest_3DTensor_full_emg)

X_full_both_train_embeddings = RNNAE_64_22mir_BothFull.encode(X_3DTensor_full_both)
X_full_both_test_embeddings = RNNAE_64_22mir_BothFull.encode(Xtest_3DTensor_full_both)

print(f"X_full_emg_train_embeddings shape: {X_full_emg_train_embeddings.shape}")
print(f"X_full_emg_test_embeddings shape: {X_full_emg_test_embeddings.shape}")
print(f"X_full_both_train_embeddings shape: {X_full_both_train_embeddings.shape}")
print(f"X_full_both_test_embeddings shape: {X_full_both_test_embeddings.shape}")
print()
print(f"encoded_labels_df shape: {encoded_labels_df.shape}")
print(f"encoded_test_labels_df shape: {encoded_test_labels_df.shape}")

X_full_emg_train_embeddings shape: torch.Size([2500, 64, 9])
X_full_emg_test_embeddings shape: torch.Size([700, 64, 9])
X_full_both_train_embeddings shape: torch.Size([2500, 64, 22])
X_full_both_test_embeddings shape: torch.Size([700, 64, 22])

encoded_labels_df shape: (2500, 10)
encoded_test_labels_df shape: (700, 10)


Combine latent embedding train/test into a single dataset and pass that to split_users below

In [16]:
print(metadata_cols_df.shape)
metadata_cols_df.head()

(204800, 3)


Unnamed: 0,Participant,Gesture_ID,Gesture_Num
0,P102,pan,1
1,P102,pan,1
2,P102,pan,1
3,P102,pan,1
4,P102,pan,1


In [17]:
X_full_emg_all_embeddings = torch.cat((X_full_emg_train_embeddings, X_full_emg_test_embeddings), dim=0)

new_shape = (X_full_emg_all_embeddings.shape[0] * X_full_emg_all_embeddings.shape[1], X_full_emg_all_embeddings.shape[2])
X_full_emg_all_embeddings_reshaped = X_full_emg_all_embeddings.view(*new_shape)

X_all_emg_embeddings_df = pd.DataFrame(X_full_emg_all_embeddings_reshaped.detach().numpy())

In [18]:
print(X_all_emg_embeddings_df.shape)
X_all_emg_embeddings_df.head()

(204800, 9)


Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.819932,-0.195209,0.203993,-0.662652,-0.23271,-0.234532,-0.755304,0.312021,0.358541
1,0.186615,-0.630549,0.403146,-0.625808,-0.373283,0.037659,-0.597726,-0.038134,0.337289
2,0.62109,-0.235736,0.686262,-0.814283,-0.408508,0.180361,-0.816356,-0.40681,0.6911
3,0.669751,-0.404265,0.714247,-0.679241,-0.377924,0.098625,-0.793641,-0.485138,0.673027
4,0.574261,-0.820558,0.278629,-0.868722,-0.799502,0.629095,-0.596288,-0.84063,0.061046


In [19]:
labeled_emg_embeddings_df = pd.concat([metadata_cols_df, X_all_emg_embeddings_df], axis=1)

In [20]:
print(labeled_emg_embeddings_df.shape)
labeled_emg_embeddings_df.head()

(204800, 12)


Unnamed: 0,Participant,Gesture_ID,Gesture_Num,0,1,2,3,4,5,6,7,8
0,P102,pan,1,0.819932,-0.195209,0.203993,-0.662652,-0.23271,-0.234532,-0.755304,0.312021,0.358541
1,P102,pan,1,0.186615,-0.630549,0.403146,-0.625808,-0.373283,0.037659,-0.597726,-0.038134,0.337289
2,P102,pan,1,0.62109,-0.235736,0.686262,-0.814283,-0.408508,0.180361,-0.816356,-0.40681,0.6911
3,P102,pan,1,0.669751,-0.404265,0.714247,-0.679241,-0.377924,0.098625,-0.793641,-0.485138,0.673027
4,P102,pan,1,0.574261,-0.820558,0.278629,-0.868722,-0.799502,0.629095,-0.596288,-0.84063,0.061046


In [21]:
labeled_emg_embeddings_df.columns

Index(['Participant', 'Gesture_ID', 'Gesture_Num', 0, 1, 2, 3, 4, 5, 6, 7, 8], dtype='object')

## Train/test/val Split and K Fold Dataloader Making

Split users into train, val, and test sets

In [22]:
train_users_lst, val_users_lst, test_users_lst = split_users(labeled_emg_embeddings_df)


In [23]:
train_users_lst

array(['P008', 'P004', 'P118', 'P127', 'P123', 'P115', 'P107', 'P132',
       'P128', 'P119', 'P126', 'P114', 'P102', 'P116', 'P011', 'P006',
       'P010', 'P005', 'P108', 'P112', 'P103', 'P111'], dtype=object)

In [24]:
val_users_lst

array(['P124', 'P109', 'P110', 'P122'], dtype=object)

In [25]:
test_users_lst

array(['P125', 'P106', 'P105', 'P131', 'P104', 'P121'], dtype=object)

K Fold - Cross Validation (KF-CV) on Training Set

In [26]:
from sklearn.model_selection import GroupKFold

def prepare_KFold_data_loaders(dataset, users, batch_size, seq_len=64, num_folds=5):
    data_loaders = []
    
    # Convert Gesture_ID column to integer labels
    label_encoder = LabelEncoder()
    dataset['Gesture_ID'] = label_encoder.fit_transform(dataset['Gesture_ID'])
    num_gesture_classes = len(label_encoder.classes_)
    num_features = dataset.shape[-1] - 3 # -3 for the 3 rows of metadata
    
    # Assuming you have a list of user IDs for each row of your dataset
    user_ids = dataset['Participant']

    # Initialize GroupKFold with the number of desired folds
    gkf = GroupKFold(n_splits=num_folds)

    for train_index, test_index in gkf.split(dataset, groups=user_ids):
        train_dataset = dataset.iloc[train_index]
        test_dataset = dataset.iloc[test_index]

        train_input = torch.tensor(train_dataset.drop(columns=['Participant','Gesture_ID','Gesture_Num']).values.reshape(-1, seq_len, num_features), dtype=torch.float32)
        train_labels = torch.tensor(train_dataset['Gesture_ID'].iloc[::seq_len].values, dtype=torch.float32).long()
        test_input = torch.tensor(test_dataset.drop(columns=['Participant','Gesture_ID','Gesture_Num']).values.reshape(-1, seq_len, num_features), dtype=torch.float32)
        test_labels = torch.tensor(test_dataset['Gesture_ID'].iloc[::seq_len].values, dtype=torch.float32).long()

        # Create TensorDataset
        train_dataset = TensorDataset(train_input, train_labels)
        test_dataset = TensorDataset(test_input, test_labels)
        
        # Create DataLoader
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
        
        data_loaders.append((train_loader, test_loader))
    
    return data_loaders


Validate split...

In [27]:
def analyze_split(train_index, test_index, user_ids):
    train_users = set(user_ids[train_index])
    test_users = set(user_ids[test_index])

    print("Train Users:", train_users)
    print("Test Users:", test_users)

    print("\nTrain User Counts:")
    for user in train_users:
        user_count = sum(user_ids[train_index] == user)
        print(f"User {user}: {user_count} rows")

    print("\nTest User Counts:")
    for user in test_users:
        user_count = sum(user_ids[test_index] == user)
        print(f"User {user}: {user_count} rows")

dataset=labeled_emg_embeddings_df
users=train_users_lst
batch_size=32
seq_len=64
num_folds=5

data_loaders = []
    
# Convert Gesture_ID column to integer labels
label_encoder = LabelEncoder()
dataset['Gesture_ID'] = label_encoder.fit_transform(dataset['Gesture_ID'])
num_gesture_classes = len(label_encoder.classes_)
num_features = dataset.shape[-1] - 3 # -3 for the 3 rows of metadata
# Assuming you have a list of user IDs for each row of your dataset
user_ids = dataset['Participant']
# Initialize GroupKFold with the number of desired folds
gkf = GroupKFold(n_splits=num_folds)
for train_index, test_index in gkf.split(dataset, groups=user_ids):
    analyze_split(train_index, test_index, user_ids)
    print("\n\n\n")


Train Users: {'P121', 'P114', 'P127', 'P104', 'P131', 'P125', 'P109', 'P102', 'P126', 'P112', 'P108', 'P004', 'P107', 'P118', 'P106', 'P006', 'P124', 'P111', 'P103', 'P005', 'P115', 'P119', 'P011', 'P122', 'P008'}
Test Users: {'P123', 'P116', 'P128', 'P010', 'P132', 'P110', 'P105'}

Train User Counts:
User P121: 6400 rows
User P114: 6400 rows
User P127: 6400 rows
User P104: 6400 rows
User P131: 6400 rows
User P125: 6400 rows
User P109: 6400 rows
User P102: 6400 rows
User P126: 6400 rows
User P112: 6400 rows
User P108: 6400 rows
User P004: 6400 rows
User P107: 6400 rows
User P118: 6400 rows
User P106: 6400 rows
User P006: 6400 rows
User P124: 6400 rows
User P111: 6400 rows
User P103: 6400 rows
User P005: 6400 rows
User P115: 6400 rows
User P119: 6400 rows
User P011: 6400 rows
User P122: 6400 rows
User P008: 6400 rows

Test User Counts:
User P123: 6400 rows
User P116: 6400 rows
User P128: 6400 rows
User P010: 6400 rows
User P132: 6400 rows
User P110: 6400 rows
User P105: 6400 rows




Tr

In [28]:
kf_data_loaders = prepare_KFold_data_loaders(labeled_emg_embeddings_df, train_users_lst, batch_size)


In [29]:
len(kf_data_loaders)

5

In [30]:
dl_iter = iter(kf_data_loaders[0][0])

In [31]:
batch0 = next(dl_iter)

In [32]:
batch0[0].shape

torch.Size([32, 64, 9])

In [33]:
batch0[1].shape

torch.Size([32])

## Hyperparameter Tuning with KF-CV

In [34]:
from sklearn.model_selection import ParameterGrid


In [35]:
def hyperparameter_tuning(param_grid, data_loaders_lst):
    param_list = list(ParameterGrid(param_grid))
    best_params = None
    best_score = float('-inf')
    results = []
    num_KFolds = len(data_loaders_lst)

    for params in param_list:
        print(f'\n\nTrial Params: {params}')
        fold_results = []

        for idx, (train_loader, test_loader) in enumerate(data_loaders_lst):
            # I don't even have the Participant ID to print anymore...
            print(f"KFold: {idx}/{num_KFolds}")
            
            if params['use_hidden']:
                model = GestureLSTMClassifier_Hidden(
                    input_size=params['input_size'],
                    hidden_size=params['hidden_size'],
                    num_layers=params['num_layers'],
                    num_classes=params['num_classes'],
                    dropout_prob=params['dropout']
                )#.to(device)
            else:
                model = GestureLSTMClassifier(
                    input_size=params['input_size'],
                    hidden_size=params['hidden_size'],
                    num_layers=params['num_layers'],
                    num_classes=params['num_classes'],
                    dropout_prob=params['dropout']
                )#.to(device)

            trained_model = train_LSTM_gesture_classifier(
                model,
                train_loader,
                test_loader,
                lr=params['lr'],
                weight_decay=params['weight_decay'],
                num_epochs=params['num_epochs'],
                batch_size=params['batch_size'],
                use_hidden=params['use_hidden'],
                print_loss=False,
                print_accuracy=False
            )

            _, val_accuracy = evaluate_model(trained_model, test_loader, nn.CrossEntropyLoss(), params['use_hidden'], params['batch_size'])
            fold_results.append(val_accuracy)

        mean_score = np.mean(fold_results)
        results.append((params, mean_score))

        if mean_score > best_score:
            best_score = mean_score
            best_params = params

    print(f'Best Params: {best_params}, Best Score: {best_score}')
    return best_params, results


In [36]:
input_size = 9
num_classes = 10

In [39]:
# Perform hyperparameter tuning

# Define the hyperparameter grid
param_grid = {
    'input_size': [input_size],
    'hidden_size': [16, 32, 64, 96, 128],
    'num_layers': [1, 2, 3],
    'num_classes': [num_classes],
    'dropout': [0.0, 0.2, 0.5],
    'lr': [0.001, 0.0001],
    'weight_decay': [0.0, 0.001],
    'num_epochs': [8],
    'use_hidden': [True, False],
    'batch_size': [1, 32, 256]
}

best_params, tuning_results = hyperparameter_tuning(param_grid, kf_data_loaders)



Trial Params: {'batch_size': 1, 'dropout': 0.0, 'hidden_size': 16, 'input_size': 9, 'lr': 0.001, 'num_classes': 10, 'num_epochs': 8, 'num_layers': 1, 'use_hidden': True, 'weight_decay': 0.0}
KFold: 0/5
Starting training!
Epoch [1/8], Train Accuracy: 10.04%, Val Accuracy: 9.29%
Epoch [8/8], Train Accuracy: 17.96%, Val Accuracy: 7.86%
Training completed in 13.77 seconds
KFold: 1/5
Starting training!
Epoch [1/8], Train Accuracy: 10.72%, Val Accuracy: 6.86%
Epoch [8/8], Train Accuracy: 15.04%, Val Accuracy: 8.29%
Training completed in 12.00 seconds
KFold: 2/5
Starting training!
Epoch [1/8], Train Accuracy: 10.35%, Val Accuracy: 9.83%
Epoch [8/8], Train Accuracy: 15.77%, Val Accuracy: 11.33%
Training completed in 14.43 seconds
KFold: 3/5
Starting training!
Epoch [1/8], Train Accuracy: 10.08%, Val Accuracy: 9.00%
Epoch [8/8], Train Accuracy: 17.54%, Val Accuracy: 8.33%
Training completed in 14.60 seconds
KFold: 4/5
Starting training!
Epoch [1/8], Train Accuracy: 11.65%, Val Accuracy: 13.00

KeyboardInterrupt: 

Final Model Evaluation

In [None]:
# Create DataLoaders for training and validation sets combined, and test set
train_val_users_lst = [train_users_lst].extend(val_users_lst)
train_val_loader = create_data_loader(labeled_emg_embeddings_df, train_val_users_lst, batch_size=best_params['batch_size'])
test_loader = create_data_loader(labeled_emg_embeddings_df, test_users_lst, batch_size=best_params['batch_size'])

# Train final model
if best_params['use_hidden']:
    final_model = GestureLSTMClassifier_Hidden(
        input_size=best_params['input_size'],
        hidden_size=best_params['hidden_size'],
        num_layers=best_params['num_layers'],
        num_classes=best_params['num_classes'],
        dropout_prob=best_params['dropout']
    )#.to(device)
else:
    final_model = GestureLSTMClassifier(
        input_size=best_params['input_size'],
        hidden_size=best_params['hidden_size'],
        num_layers=best_params['num_layers'],
        num_classes=best_params['num_classes'],
        dropout_prob=best_params['dropout']
    )#.to(device)
                
final_model, final_report, _ = train_LSTM_gesture_classifier(
    final_model,
    train_val_loader,
    test_loader,
    lr=best_params['lr'],
    weight_decay=best_params['weight_decay'],
    num_epochs=best_params['num_epochs'],
    batch_size=best_params['batch_size'],
    use_hidden=best_params['use_hidden']
)
