In [1]:
import os
import numpy as np
import h5py
from scipy import stats
import scipy.io
import mne

mne.set_log_level('error')

from random import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold


import torch
import torch.nn as nn
import torch.optim as optim

import optuna


from utils.load import Load
from config.default import cfg

%load_ext autoreload
%autoreload 2


In [2]:
subject_data = {}
# Load the data  from the HDF5 file
target_dir = 'features'
tag = '0_25powers'

for subject in cfg['subjects']:
    file_path = os.path.join(target_dir, tag+'_'+subject + '.h5')

    data = {}
    with h5py.File(file_path, 'r') as h5file:
        for key in h5file.keys():
            data[key] = np.array(h5file[key])

    subject_data[subject] = data


for subject_id in subject_data:
    print(subject_id)
    print(subject_data[subject_id].keys())

S1
dict_keys(['index', 'little', 'middle', 'ring', 'thumb'])
S2
dict_keys(['index', 'little', 'middle', 'ring', 'thumb'])
S3
dict_keys(['index', 'little', 'middle', 'ring', 'thumb'])
S4
dict_keys(['index', 'little', 'middle', 'ring', 'thumb'])
S5
dict_keys(['index', 'little', 'middle', 'ring', 'thumb'])


In [3]:
add_subject_id = False

X = []
y = []
# Loop through each subject in the dataset
for subject_id, (subject, data) in enumerate(subject_data.items()):

    features = np.concatenate(list(data.values()), axis=0)
    features = features.reshape(features.shape[0], -1)
    if add_subject_id:
        features = np.insert(features, 0, subject_id, axis=1) # add subject id as first column
    labels = np.concatenate([np.ones(data[finger].shape[0]) * i for i, finger in enumerate(data)], axis=0)
   
    X += [features]
    y += [labels]

X = np.concatenate(X, axis=0)
y = np.concatenate(y, axis=0)

print(X.shape)
print(y.shape)

(1250, 9480)
(1250,)


In [34]:
class SingleLayerMLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, activation):
        super(SingleLayerMLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.activation = activation
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.activation(x)
        x = self.fc2(x)
       
        return x

In [37]:
def train(train_X, test_X, train_y, test_y, model, criterion, optimizer, num_epochs=100):
    device_name = 'cuda' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device_name)


    model.to(device)

    train_X = torch.tensor(train_X, dtype=torch.float32).to(device)
    test_X = torch.tensor(test_X, dtype=torch.float32).to(device)
    train_y = torch.tensor(train_y, dtype=torch.long).to(device)
    test_y = torch.tensor(test_y, dtype=torch.long).to(device)



    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = model(train_X)
        loss = criterion(outputs, train_y)
        loss.backward()
        optimizer.step()

        # # Shuffle X and y together
        p = np.random.permutation(len(train_X))
        train_X, train_y = train_X[p], train_y[p]

    with torch.no_grad():
        y_pred = model(test_X)
        y_pred = torch.argmax(y_pred, dim=1)

    acc = accuracy_score(test_y.cpu(), y_pred.cpu())
    return acc

def objective(trial,train_X, test_X, train_y, test_y):
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-3, log=True)
    num_epochs = trial.suggest_int("num_epochs", 10, 500)
    hidden_size = trial.suggest_int("hidden_size", 16, 128)
    activation_name = "relu" # trial.suggest_categorical("activation", ["relu", "elu", "leaky_relu"])
    optimizer = "Adam" # trial.suggest_categorical("optimizer", ["SGD", "Adam"])

    if activation_name == "relu":
        activation = nn.ReLU()
    elif activation_name == "elu":
        activation = nn.ELU()
    elif activation_name == "leaky_relu":
        activation = nn.LeakyReLU()

    if optimizer == "SGD":
        optimizer = optim.SGD
    elif optimizer == "Adam":
        optimizer = optim.Adam

   
    model = SingleLayerMLP(train_X.shape[1], hidden_size, 5, activation)
    criterion = nn.CrossEntropyLoss()
    optimizer = optimizer(model.parameters(), lr=learning_rate)
    return train(train_X, test_X, train_y, test_y, model, criterion, optimizer, num_epochs=num_epochs)


def train_MLP(X, y, n_trials = 10):


    # scaler = StandardScaler()
    # X = scaler.fit_transform(X)
    train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=42)

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial, train_X, test_X, train_y, test_y), n_trials=n_trials)

    best_trial = study.best_trial

    print(f'Best trial params: {best_trial.params}')
    print(f'Best trial accuracy: {best_trial.value * 100:.2f}%')



In [38]:
train_MLP(X, y, n_trials=5)

[32m[I 2023-04-27 17:22:43,334][0m A new study created in memory with name: no-name-8f8e165f-be5c-4038-9b56-93a47ffd0383[0m
[32m[I 2023-04-27 17:22:44,826][0m Trial 0 finished with value: 0.2 and parameters: {'learning_rate': 0.00037163929147631346, 'num_epochs': 393, 'hidden_size': 62}. Best is trial 0 with value: 0.2.[0m
[32m[I 2023-04-27 17:22:46,007][0m Trial 1 finished with value: 0.184 and parameters: {'learning_rate': 0.000770401134410677, 'num_epochs': 391, 'hidden_size': 58}. Best is trial 0 with value: 0.2.[0m
[32m[I 2023-04-27 17:22:46,483][0m Trial 2 finished with value: 0.204 and parameters: {'learning_rate': 0.00044672167422980027, 'num_epochs': 151, 'hidden_size': 68}. Best is trial 2 with value: 0.204.[0m
[32m[I 2023-04-27 17:22:47,694][0m Trial 3 finished with value: 0.204 and parameters: {'learning_rate': 0.0007205665018594352, 'num_epochs': 400, 'hidden_size': 24}. Best is trial 2 with value: 0.204.[0m
[32m[I 2023-04-27 17:22:49,052][0m Trial 4 finis

Best trial params: {'learning_rate': 0.00044672167422980027, 'num_epochs': 151, 'hidden_size': 68}
Best trial accuracy: 20.40%


In [None]:
# subject as features (first column)
#23.2% accuracy

# no subject data
#24 % accuracy

NameError: name 'accuracy' is not defined