In [None]:
# Set notebook to use only one GPU
%env CUDA_VISIBLE_DEVICES=0

In [None]:
from braindecode.datasets.moabb import MOABBDataset
import numpy as np
import pandas as pd
from braindecode.preprocessing import create_windows_from_events
from braindecode.preprocessing import (
    exponential_moving_standardize, preprocess, Preprocessor)
from numpy import multiply
from sklearn.preprocessing import OneHotEncoder
from models import *

# Preprocessing functions


In [None]:
def load_dataset():
    dataset = MOABBDataset(dataset_name="BNCI2014001", subject_ids=None)
    return dataset

def preprocess_data(dataset):
    low_cut_hz = 4.  # low cut frequency for filtering
    high_cut_hz = 38.  # high cut frequency for filtering
    # Parameters for exponential moving standardization
    '''
    CHECK IF THE FACTOR IS SAME AS 0.999 MENTIONED IN
    THE ARTICLES
    '''
    factor_new = 1e-3
    init_block_size = 1000
    # Factor to convert from V to uV
    factor = 1e6
    iir_params = dict(order=3, ftype='butter', output='sos')

    preprocessors = [
        Preprocessor('pick_types', eeg=True, meg=False, stim=False),  # Keep EEG sensors
        Preprocessor(lambda data: multiply(data, factor)),  # Convert from V to uV
        Preprocessor('filter', l_freq=low_cut_hz, h_freq=high_cut_hz, iir_params=iir_params, method='iir', phase='forward'),  # Third order butterworth filter
        # The logs say it's a causal filter but the order is 6?
        Preprocessor(exponential_moving_standardize,  # Exponential moving standardization
                    factor_new=factor_new, init_block_size=init_block_size)
    ]

    return preprocess(dataset, preprocessors)

def epoch_data(dataset):
    trial_start_offset_seconds = -0.5
    # Extract sampling frequency, check that they are same in all datasets
    sfreq = dataset.datasets[0].raw.info['sfreq']
    assert all([ds.raw.info['sfreq'] == sfreq for ds in dataset.datasets])
    # Calculate the trial start offset in samples.
    trial_start_offset_samples = int(trial_start_offset_seconds * sfreq)

    # Create windows using braindecode function for this. It needs parameters to
    # define how trials should be used.
    windows_dataset = create_windows_from_events(
        dataset,
        trial_start_offset_samples=trial_start_offset_samples,
        trial_stop_offset_samples=0,
        preload=True,
    )

    return windows_dataset


In [None]:
# Expects a BaseConcatDataset object
# Iterate through subject datasets and create dataset of 9 rows and 576 columns
# (9 subjects and 576 trials).
def create_dataframe_helper(dataset):
    subjects_lst = []
    subjects_targets = []
    for subject_id in range(0, len(dataset)):
        # Append to list a set of inputs and targets from each run
        # in subject dataset
        inputs = []
        targets = []
        subject_dataset = dataset[subject_id].datasets
        for run in subject_dataset:
            for trial in run:
                inputs.append(trial[0])
                targets.append(trial[1])
        subjects_lst.append(inputs)
        subjects_targets.append(targets)

    return np.asarray(subjects_lst), np.asarray(subjects_targets)


def create_dataframe(processed_data):
    # Data to be saved gonna have shape (9, 576, 22, 1125)
    # 9 subjects. 576 trials each. 22 channels. 1125 timestamps
    split_data = processed_data.split('subject')
    split_data = [split_data[str(i)] for i in range(1, 9 + 1)]
    inputs, targets = create_dataframe_helper(split_data)
    return inputs, targets

def onehot(targets):
    encoder = OneHotEncoder(sparse=False)
    targets = targets.reshape(-1,1)
    targets = encoder.fit_transform(targets)
    n_subj = 9
    n_trials = 576
    n_classes = 4
    targets = targets.reshape(n_subj, n_trials, n_classes)
    return targets
    

def get_x_y(inputs, targets):
    n_subjects = inputs.shape[0]
    n_runs = inputs.shape[1] * inputs.shape[0]
    channels = inputs.shape[2]
    timestamps = inputs.shape[3]
    n_classes = targets.shape[2]
    X = np.vstack(inputs).reshape(n_runs, channels, timestamps)
    Y = np.vstack(targets).reshape(n_runs, n_classes)
    return X, Y

In [None]:
processed_data = epoch_data(preprocess_data(load_dataset()))

In [None]:
inputs, targets = create_dataframe(processed_data)
targets = onehot(targets)
print(inputs.shape, targets.shape)
save('all_subject_runs', inputs, targets)

# Lockbox Creation

In [None]:
from models_bachelors import *
from file_functions import *
from numpy import floor
from numpy import random
from sklearn.model_selection import KFold
dataset = load('all_subject_runs')
loaded_inputs = dataset['inputs']
loaded_targets = dataset['targets']

# Create Lockbox for each subject
n_s = 9  # Number of subjects

'''
NEED TO IGNORE THE TEST SUBJECT AND LOXCKBOX THE REST!
YOU CAN USE KFOLD SPLIT FOR THISb  
'''

kfold_lock = KFold(n_splits= n_s, shuffle= False)

lockbox_input = []
lockbox_target = []

# Aside from the test subject, create a lockbox test set containing 10%
# of each subject's data.
for train_idx, test_idx in kfold_lock.split(loaded_inputs, loaded_targets):
    subject_in = []
    subject_tar = []
    
    for idx in train_idx:
        subject_inputs = loaded_inputs[idx]
        subject_targets = loaded_targets[idx]
        num_trials= subject_inputs.shape[0]
        # Get random 10% of subject's trials
        idx = np.random.randint(num_trials, size=int(0.1 * num_trials))
        subject_in.append(subject_inputs[idx])
        subject_tar.append(subject_targets[idx])
        
    subject_in = np.vstack(np.array(subject_in))
    subject_tar = np.vstack(np.array(subject_tar))
    lockbox_input.append(subject_in)
    lockbox_target.append(subject_tar)
    

lockbox_input = np.array(lockbox_input)
lockbox_target = np.array(lockbox_target)

# How to use lockbox: dim-0 is the Nth subject to test on (while model is trained on
# N-1 subjects). 
save('lockbox_mcdropout', dict({'inputs': lockbox_input, 'targets': lockbox_target}))
    