Train MLP for all fingers!\
Using hyper param optimalization
Data source: \
sliding windowed powers for mu and beta band\
Subject 1

In [1]:
import os
import numpy as np
import h5py
from scipy import stats
import scipy.io
import mne

mne.set_log_level('error')

from random import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold


import torch
import torch.nn as nn
import torch.optim as optim

import optuna


from utils.load import Load
from config.default import cfg

%load_ext autoreload
%autoreload 2


In [2]:
subject_data = {}
# Load the data  from the HDF5 file
target_dir = 'features'
tag = 'reproduced_with_bad'

for subject in cfg['subjects']:
    file_path = os.path.join(target_dir, tag+'_'+subject + '.h5')

    data = {}
    with h5py.File(file_path, 'r') as h5file:
        for key in h5file.keys():
            data[key] = np.array(h5file[key])

    subject_data[subject] = data


for subject_id in subject_data:
    print(subject_id)
    print(subject_data[subject_id].keys())

S1
dict_keys(['index', 'little', 'middle', 'ring', 'thumb'])
S2
dict_keys(['index', 'little', 'middle', 'ring', 'thumb'])
S3
dict_keys(['index', 'little', 'middle', 'ring', 'thumb'])
S4
dict_keys(['index', 'little', 'middle', 'ring', 'thumb'])
S5
dict_keys(['index', 'little', 'middle', 'ring', 'thumb'])


In [3]:
X = []
y = []
# Loop through each subject in the dataset
for subject_id, (subject, data) in enumerate(subject_data.items()):

    features = np.concatenate(list(data.values()), axis=0)
    features = features.reshape(features.shape[0], -1)

    #features = np.insert(features, 0, subject_id, axis=1) # add subject id as first column
    labels = np.concatenate([np.ones(data[finger].shape[0]) * i for i, finger in enumerate(data)], axis=0)
   
    X += [features]
    y += [labels]

X = np.concatenate(X, axis=0)
y = np.concatenate(y, axis=0)

print(X.shape)
print(y.shape)

(1250, 8216)
(1250,)


In [4]:
class SingleLayerMLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, activation):
        super(SingleLayerMLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.activation = activation
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.activation(x)
        x = self.fc2(x)
        x = nn.Softmax(dim=1)(x)
        return x

In [5]:
def train(X_train, y_train, X_test, y_test, model, criterion, optimizer, num_epochs=100):
    device_name = 'cuda' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device_name)


    model.to(device)

    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
    X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_train = torch.tensor(y_train, dtype=torch.long).to(device)
    y_test = torch.tensor(y_test, dtype=torch.long).to(device)



    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

        # # Shuffle X and y together
        p = np.random.permutation(len(X_train))
        X_train, y_train = X_train[p], y_train[p]

    with torch.no_grad():
        y_pred = model(X_test)
        y_pred = torch.argmax(y_pred, dim=1)

    acc = accuracy_score(y_test.cpu(), y_pred.cpu())
    return acc

def objective(trial, X, y):
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    num_epochs = trial.suggest_int("num_epochs", 100, 2000)
    hidden_size = trial.suggest_int("hidden_size", 16, 128)
    activation_name = trial.suggest_categorical("activation", ["relu", "elu", "leaky_relu"])
    optimizer = trial.suggest_categorical("optimizer", ["SGD", "Adam"])

    if activation_name == "relu":
        activation = nn.ReLU()
    elif activation_name == "elu":
        activation = nn.ELU()
    elif activation_name == "leaky_relu":
        activation = nn.LeakyReLU()

    if optimizer == "SGD":
        optimizer = optim.SGD
    elif optimizer == "Adam":
        optimizer = optim.Adam

    train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=42)
    model = SingleLayerMLP(train_X.shape[1], hidden_size, 5, activation)
    criterion = nn.CrossEntropyLoss()
    optimizer = optimizer(model.parameters(), lr=learning_rate)
    return train(train_X, train_y, test_X, test_y, model, criterion, optimizer, num_epochs=num_epochs)


def train_MLP(X, y, n_trials = 10):


    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial, X, y), n_trials=n_trials)

    best_trial = study.best_trial

    print(f'Best trial params: {best_trial.params}')
    print(f'Best trial accuracy: {best_trial.value * 100:.2f}%')



In [6]:
# connect data

In [7]:
train_MLP(X, y, n_trials=10)

[32m[I 2023-04-18 22:53:32,029][0m A new study created in memory with name: no-name-3f94163e-ed1a-4215-a4cb-13430554c1c8[0m
[32m[I 2023-04-18 22:53:54,523][0m Trial 0 finished with value: 0.204 and parameters: {'learning_rate': 6.912777705126664e-05, 'num_epochs': 1498, 'hidden_size': 112, 'activation': 'elu', 'optimizer': 'Adam'}. Best is trial 0 with value: 0.204.[0m
[32m[I 2023-04-18 22:54:19,878][0m Trial 1 finished with value: 0.232 and parameters: {'learning_rate': 0.002695619888243019, 'num_epochs': 1817, 'hidden_size': 79, 'activation': 'elu', 'optimizer': 'Adam'}. Best is trial 1 with value: 0.232.[0m
[32m[I 2023-04-18 22:54:21,634][0m Trial 2 finished with value: 0.22 and parameters: {'learning_rate': 0.0009079983269920445, 'num_epochs': 253, 'hidden_size': 77, 'activation': 'relu', 'optimizer': 'SGD'}. Best is trial 1 with value: 0.232.[0m
[32m[I 2023-04-18 22:54:29,852][0m Trial 3 finished with value: 0.188 and parameters: {'learning_rate': 0.00462167703129144

Best trial params: {'learning_rate': 0.07234012920975832, 'num_epochs': 1260, 'hidden_size': 103, 'activation': 'relu', 'optimizer': 'SGD'}
Best trial accuracy: 24.00%


In [8]:
# subject as features (first column)
#23.2% accuracy

# no subject data
#24 % accuracy

NameError: name 'accuracy' is not defined