In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

import os, yaml
from easydict import EasyDict
import pickle
from datetime import datetime

import torch
from torch.utils.data import DataLoader

from sklearn.model_selection import KFold

from dataloader.bci_compet import get_dataset
from dataloader.bci_compet import BCICompet2aIV

from model.litmodel import LitModel
from model.attn_conditioned_subj_ftr import ATTNConditionedSubjFtr
from pytorch_lightning.loggers import TensorBoardLogger

from pytorch_lightning import Trainer, seed_everything


from utils.setup_utils import (
    get_device,
    get_log_name,
)
from utils.training_utils import get_callbacks

torch.set_float32_matmul_precision('medium')

%load_ext autoreload
%autoreload 2

In [2]:
CACHE_ROOT = 'cache'

config_name = 'bcicompet2a_config'

with open(f'configs/{config_name}.yaml') as file:
    config = yaml.load(file, Loader=yaml.FullLoader)
    args = EasyDict(config)


In [3]:
def load_dataset(args, return_subject_id=False):
    datasets = {}
    for subject_id in range(0,9):
        args['target_subject'] = subject_id
        datasets[subject_id] = BCICompet2aIV(args)
    return datasets

path = os.path.join(CACHE_ROOT, f'{config_name}_base.pkl')

if not os.path.isfile(path):
    print('Cache miss, generating cache')
    datasets = load_dataset(args)
    with open(path, 'wb') as file:
        pickle.dump(datasets, file)
else:
    print('Loading cache')
    with open(path, 'rb') as file:
        datasets = pickle.load(file)

Loading cache


In [4]:
for subject_id in datasets.keys(): 
    print(f"Subject {subject_id} has {len(datasets[subject_id])} trials")

Subject 0 has 288 trials
Subject 1 has 288 trials
Subject 2 has 288 trials
Subject 3 has 288 trials
Subject 4 has 288 trials
Subject 5 has 288 trials
Subject 6 has 288 trials
Subject 7 has 288 trials
Subject 8 has 288 trials


In [5]:
for subject_id in datasets.keys(): 
    datasets[subject_id].return_subject_info = 'ftr'

In [6]:
train_size = 240
val_size = 48

In [7]:
for LOS in datasets.keys():
    name = 'ftr_L1SO_'+str(LOS)
    args.VERSION = f'{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}-{name}'


    #### Set Log ####
    args['current_time'] = datetime.now().strftime('%Y%m%d')
    args['LOG_NAME'] = get_log_name(args)

    #### Update configs ####
    if args.downsampling != 0: args['sampling_rate'] = args.downsampling
    seed_everything(args.SEED)


    train_datasets = {}
    val_datasets = {}
    for subject_id in datasets.keys():
        if subject_id != LOS:
            train_datasets[subject_id] = torch.utils.data.Subset(datasets[subject_id], range(train_size))
            val_datasets[subject_id] = torch.utils.data.Subset(datasets[subject_id], range(train_size, train_size+val_size))


    train_dataset_all = torch.utils.data.ConcatDataset(list(train_datasets.values()))
    val_dataset_all = torch.utils.data.ConcatDataset(list(val_datasets.values()))
    print(len(train_dataset_all), len(val_dataset_all))

    train_dataloader_all = DataLoader(train_dataset_all, batch_size=args['batch_size'], shuffle=True, num_workers=0, persistent_workers=False)
    val_dataloader_all = DataLoader(val_dataset_all, batch_size=args['batch_size'], shuffle=False, num_workers=0, persistent_workers=False)


    model = ATTNConditionedSubjFtr(args, eeg_normalization = 'LayerNorm', subject_normalization='LayerNorm',  embedding_dimension=23, combined_features_dimension=43, num_classes=args['num_classes'] )
    lit_model = LitModel(args, model)

    logger = TensorBoardLogger(args.LOG_PATH, 
                                    name=args.VERSION)

    callbacks = get_callbacks(monitor='val_loss', args=args)


    trainer = Trainer(
            max_epochs=args['EPOCHS'],
            callbacks=callbacks,
            default_root_dir=args.CKPT_PATH,
            logger=logger,
            enable_progress_bar=False
        )

    trainer.fit(lit_model,
            train_dataloaders=train_dataloader_all,
            val_dataloaders=val_dataloader_all)
        
    torch.cuda.empty_cache()

Seed set to 42


LOG >>> Log name: 
	20231214_task_BCICompet2a_batch_512_lr_0.002_Baseline
1920 384


  return F.conv2d(input, weight, bias, self.stride,
/home/devuser/.local/lib/python3.8/site-packages/pytorch_lightning/utilities/parsing.py:198: Attribute 'model' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['model'])`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ./logs/BCICompet2a/2023-12-14_11-41-56-ftr_L1SO_0
2023-12-14 11:41:56.618476: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-14 11:41:56.638458: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in perfo

  0%|          | 0/500 [00:00<?, ?it/s]

/home/devuser/.local/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.
/home/devuser/.local/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.
/home/devuser/.local/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (4) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=500` reached.
Seed set to 42
/home/devuser/.local/lib/python3.8/

LOG >>> Log name: 
	20231214_task_BCICompet2a_batch_512_lr_0.002_Baseline
1920 384


  0%|          | 0/500 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=500` reached.
Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ./logs/BCICompet2a/2023-12-14_11-47-07-ftr_L1SO_2
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name      | Type                   | Params
-----------------------------------------------------
0 | model     | ATTNConditionedSubjFtr | 34.0 K
1 | criterion | CrossEntropyLoss       | 0     
-----------------------------------------------------
34.0 K    Trainable params
0         Non-trainable params
34.0 K    Total params
0.136     Total estimated model params size (MB)


LOG >>> Log name: 
	20231214_task_BCICompet2a_batch_512_lr_0.002_Baseline
1920 384


  0%|          | 0/500 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=500` reached.
Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ./logs/BCICompet2a/2023-12-14_11-49-41-ftr_L1SO_3
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name      | Type                   | Params
-----------------------------------------------------
0 | model     | ATTNConditionedSubjFtr | 34.0 K
1 | criterion | CrossEntropyLoss       | 0     
-----------------------------------------------------
34.0 K    Trainable params
0         Non-trainable params
34.0 K    Total params
0.136     Total estimated model params size (MB)


LOG >>> Log name: 
	20231214_task_BCICompet2a_batch_512_lr_0.002_Baseline
1920 384


  0%|          | 0/500 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=500` reached.
Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ./logs/BCICompet2a/2023-12-14_11-52-16-ftr_L1SO_4
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name      | Type                   | Params
-----------------------------------------------------
0 | model     | ATTNConditionedSubjFtr | 34.0 K
1 | criterion | CrossEntropyLoss       | 0     
-----------------------------------------------------
34.0 K    Trainable params
0         Non-trainable params
34.0 K    Total params
0.136     Total estimated model params size (MB)


LOG >>> Log name: 
	20231214_task_BCICompet2a_batch_512_lr_0.002_Baseline
1920 384


  0%|          | 0/500 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=500` reached.
Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ./logs/BCICompet2a/2023-12-14_11-54-50-ftr_L1SO_5
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name      | Type                   | Params
-----------------------------------------------------
0 | model     | ATTNConditionedSubjFtr | 34.0 K
1 | criterion | CrossEntropyLoss       | 0     
-----------------------------------------------------
34.0 K    Trainable params
0         Non-trainable params
34.0 K    Total params
0.136     Total estimated model params size (MB)


LOG >>> Log name: 
	20231214_task_BCICompet2a_batch_512_lr_0.002_Baseline
1920 384


  0%|          | 0/500 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=500` reached.
Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ./logs/BCICompet2a/2023-12-14_11-57-22-ftr_L1SO_6
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name      | Type                   | Params
-----------------------------------------------------
0 | model     | ATTNConditionedSubjFtr | 34.0 K
1 | criterion | CrossEntropyLoss       | 0     
-----------------------------------------------------
34.0 K    Trainable params
0         Non-trainable params
34.0 K    Total params
0.136     Total estimated model params size (MB)


LOG >>> Log name: 
	20231214_task_BCICompet2a_batch_512_lr_0.002_Baseline
1920 384


  0%|          | 0/500 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=500` reached.
Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ./logs/BCICompet2a/2023-12-14_11-59-53-ftr_L1SO_7
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name      | Type                   | Params
-----------------------------------------------------
0 | model     | ATTNConditionedSubjFtr | 34.0 K
1 | criterion | CrossEntropyLoss       | 0     
-----------------------------------------------------
34.0 K    Trainable params
0         Non-trainable params
34.0 K    Total params
0.136     Total estimated model params size (MB)


LOG >>> Log name: 
	20231214_task_BCICompet2a_batch_512_lr_0.002_Baseline
1920 384


  0%|          | 0/500 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=500` reached.
Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ./logs/BCICompet2a/2023-12-14_12-02-24-ftr_L1SO_8
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name      | Type                   | Params
-----------------------------------------------------
0 | model     | ATTNConditionedSubjFtr | 34.0 K
1 | criterion | CrossEntropyLoss       | 0     
-----------------------------------------------------
34.0 K    Trainable params
0         Non-trainable params
34.0 K    Total params
0.136     Total estimated model params size (MB)


LOG >>> Log name: 
	20231214_task_BCICompet2a_batch_512_lr_0.002_Baseline
1920 384


  0%|          | 0/500 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=500` reached.
