# About this notebook

This notebook showcase how we can load an EEG dataset, and perform preprocessing. We train the EEGNet model with leave one subject paradigm and k-fold cross-validation.

In [1]:
%cd ../

D:\workspace\github\bcikit


In [2]:
import numpy as np
import torch
from torch import nn, optim

In [3]:
from bcikit.dataclass import BcikitConfig


load_subject_id_range = (1, 3)
subject_ids = list(np.arange(load_subject_id_range[0], load_subject_id_range[1]+1, dtype=int))
print("Load subject IDs", subject_ids)

config = {
    "dataset": {
        "root": "../data/hsssvep",
        "num_channels": 9, # although this dataset has 64 channels, we planned to perform preprocessing, which left us with 9 channels
        "num_classes": 40, # this is not used in the dataloading process, but it belongs to the dataset, so lets define it here
        "subject_ids": subject_ids, # this is a list of int
        "sessions": None, # `None` since this dataset has one session
        "do_recommended_preprocessing": True # perform preprocessing defined in the dataset
    },
    "optimizer": { # work in progress, we need this when we build Trainer
        "weight_decay": 0.05,
    },
    "lr_scheduler": { # work in progress, we need this when we build Trainer
        "learning_rate": 0.001,
    },
    "criterion": { # work in progress, we need this when we build Trainer
        
    },
    "model": { # work in progress, we need this when we build Trainer
        
    },
    "training": { # work in progress, we need this when we build Trainer
        "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
        "batchsize": 64,
        "kfold_split": 3,
        "num_epochs": 10,
    },
    "log_level": 1 # 0: no logs | 1: minimum, only the very important ones | 2: moderate, tells you at which steps is the code running | 3: everything, for serious debugging
}

config = BcikitConfig(config) # we do this, so we can use dot notation. this is work in progress, to design a global config, see https://github.com/ntubci/bcikit/issues/5

Load subject IDs [1, 2, 3]


# Data

## Create preprocessing function

We have a custom preprocessing function. Here we slice the 4-second signal into 4 parts, and we select only the first segment.

In [4]:
from bcikit.transforms.segment_time import segment_data_time_domain


def preprocessing(data, targets, channel_names, sample_rate, segment_config, verbose, **kwargs):
    print()
    print("preprocessing data shape", data.shape) # (subject,session,trial,channel,time)

    # segment signal and select the first segment
    data = segment_data_time_domain(
        data=data,
        window_len=segment_config.window_len,
        shift_len=segment_config.shift_len,
        sample_rate=segment_config.sample_rate,
        add_segment_axis=True,
    )
    data = data[:, :, :, :, 0, :] # select the first segment, and remove the rest

    # since we are doing leave one subject out, we don't care about `session`, we only want data in this format (subject, trial, channel, time).
    data = data.reshape((data.shape[0], data.shape[1]*data.shape[2], data.shape[3], data.shape[4]))
    targets = targets.reshape((targets.shape[0], targets.shape[1]*targets.shape[2]))
    
    print("final data.shape", data.shape)
    print("final targets.shape", targets.shape)

    return data, targets

## Load data

We load and preprocess the dataset that want to use in our experiment. We only do this once.

In [5]:
from bcikit.datasets.ssvep import HSSSVEP
from bcikit.datasets import EEGDataloader
from bcikit.datasets.data_selection_methods import leave_one_subject_out


segment_config = {
    "window_len": 1,
    "shift_len": 250,
    "sample_rate": 250,
}
segment_config = BcikitConfig(segment_config) # we do this, so we can use dot notation, `segment_config.window_len` is nicer then doing `segment_config["window_len"]`

data = EEGDataloader(
    dataset=HSSSVEP, 
    config=config,
    preprocessing_fn=preprocessing, # customize your own preprocessing, pass `None` if do not have customized preprocessing steps
    data_selection_fn=leave_one_subject_out, # customize your data selection function or use common ones from `data_selection_methods`
    # start kwargs: basically you can pass any additional params you have
    segment_config=segment_config, # in this case, `segment_config` is needed in the custom `preprocessing` function
)

Load subject: 1
Load subject: 2
Load subject: 3

preprocessing data shape (3, 1, 240, 9, 1000)
final data.shape (3, 240, 9, 250)
final targets.shape (3, 240)


# Model

We import CompactEEGNet and make sure it is working.

In [6]:
from bcikit.models import CompactEEGNet
from bcikit.models.utils import count_params


model = CompactEEGNet(
    num_channel=config.dataset.num_channels,
    num_classes=config.dataset.num_classes,
    signal_length=segment_config.window_len * segment_config.sample_rate,
).to(config.training.device)

x = torch.ones((16, config.dataset.num_channels, segment_config.window_len * segment_config.sample_rate)).to(config.training.device)
y = model(x)
print("Input shape:", x.shape)
print("Output shape:", y.shape)
print('Model size:', count_params(model))

Input shape: torch.Size([16, 9, 250])
Output shape: torch.Size([16, 40])
Model size: 63304


## Training

Here shows how we can use the dataloader, and train a model.

We call the `data.get_dataloaders` here because we want to fetch different k-fold and different "leave one subject out".

In [7]:
test_subject_id = 1
kfold_k = 0

train_loader, val_loader, test_loader = data.get_dataloaders(test_subject_id=test_subject_id, batchsize=config.training.batchsize, kfold_k=kfold_k, kfold_split=config.training.kfold_split)

print()
print("train_loader:", train_loader.dataset.data.shape, train_loader.dataset.targets.shape)
print("val_loader:", val_loader.dataset.data.shape, val_loader.dataset.targets.shape)
print("test_loader:", test_loader.dataset.data.shape, test_loader.dataset.targets.shape)


train_loader: (320, 9, 250) (320,)
val_loader: (160, 9, 250) (160,)
test_loader: (240, 9, 250) (240,)


In [8]:
params_to_update = []
for name, param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)

optimizer = optim.Adam(params_to_update, lr=config.lr_scheduler.learning_rate, weight_decay=config.optimizer.weight_decay)

criterion = nn.CrossEntropyLoss()

In [9]:
def train(train_loader, device):
    for epoch in range(config.training.num_epochs):
        epoch_loss = 0.0
        model.train()

        for X, Y in train_loader:
            inputs = X.to(device)
            labels = Y.long().to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            epoch_loss += loss.item()

            loss.backward()
            optimizer.step()

        print('Epoch {} - loss: {:.5f}'.format(epoch+1, epoch_loss))
        
train(train_loader, config.training.device)

Epoch 1 - loss: 18.70075
Epoch 2 - loss: 18.20019
Epoch 3 - loss: 17.73731
Epoch 4 - loss: 17.29030
Epoch 5 - loss: 16.97092
Epoch 6 - loss: 16.61557
Epoch 7 - loss: 15.97013
Epoch 8 - loss: 15.27393
Epoch 9 - loss: 14.45492
Epoch 10 - loss: 13.29764
