In [229]:
import warnings

warnings.filterwarnings("ignore")

# Miller Dataset Loaders

- Miller, Kai J., Gerwin Schalk, Eberhard E. Fetz, Marcel Den Nijs, Jeffrey G. Ojemann, and Rajesh PN Rao. "Cortical activity during motor execution, motor imagery, and imagery-based online feedback." Proceedings of the National Academy of Sciences (2010): 200913697. doi: [10.1073/pnas.0913697107](https://doi.org/10.1073/pnas.0913697107)

In [230]:
fname = '../Dataset/motor_imagery.npz'

## Numpy

In [231]:
import numpy as np

Dataset_numpy = np.load(fname, allow_pickle=True)['dat']
print(Dataset_numpy)

[[{'t_off': array([ 13160,  19160,  25160,  31240,  37240,  43320,  49360,  55440,
          61520,  67600,  73680,  79680,  85680,  91720,  97760, 103760,
         109800, 115840, 121880, 127960, 134040, 140080, 146160, 152160,
         158160, 164160, 170240, 176320, 182320, 188320, 194320, 200400,
         206400, 212480, 218520, 224520, 230560, 236600, 242640, 248640,
         254640, 260720, 266760, 272800, 278840, 284840, 290880, 296880,
         302880, 308920, 314920, 321000, 327000, 333080, 339120, 345200,
         351240, 357320, 363360, 369440], dtype=int64), 'stim_id': array([12, 12, 12, 11, 12, 12, 12, 11, 11, 12, 12, 11, 11, 11, 12, 11, 11,
         11, 12, 12, 11, 11, 12, 11, 11, 12, 12, 12, 11, 11, 12, 11, 12, 12,
         12, 12, 11, 12, 11, 12, 12, 11, 11, 12, 11, 11, 11, 11, 11, 12, 12,
         12, 12, 11, 12, 11, 11, 12, 11, 11]), 't_on': array([ 10160,  16160,  22160,  28240,  34240,  40320,  46360,  52440,
          58520,  64600,  70680,  76680,  82680,  88720, 

### Some Info about the Dataset

For the movement task, from the original dataset instructions:

*Patients performed simple, repetitive, motor tasks of hand (synchronous flexion and extension of all fingers, i.e., clenching and releasing a fist at a self-paced rate of ~1-2 Hz) or tongue (opening of mouth with protrusion and retraction of the tongue, i.e., sticking the tongue in and out, also at ~1-2 Hz). These movements were performed in an interval-based manner, alternating between movement and rest, and the side of move- ment was always contralateral to the side of cortical grid placement.*

<br>

For the imagery task, from the original dataset instructions:

*Following the overt movement experiment, each subject performed an imagery task, imagining making identical movement rather than executing the movement. The imagery was kinesthetic rather than visual (“imagine yourself performing the actions like you just did”; i.e., “don’t imagine what it looked like, but imagine making the motions”).*

<br>

Sample rate is always 1000Hz, and the ECoG data has been notch-filtered at 60, 120, 180, 240 and 250Hz, followed by z-scoring across time and conversion to float16 to minimize size. Please convert back to float32 after loading the data in the notebook, to avoid unexpected behavior.

In [232]:
Dataset_size = Dataset_numpy.shape
n_trials = Dataset_numpy[0][0]['t_on'].shape[0]
l_trial = (Dataset_numpy[0][0]['t_off']  - Dataset_numpy[0][0]['t_on'])[0]
print(f'Dataset includes {Dataset_size[0]} Subjects and {Dataset_size[1]} Experiments ( real / imagery )')
print(f'With {n_trials} number of trials each with the length of {l_trial} time points')


# Check if all have the same number of trials and trial length
for i in range(7):
    for j in range(2):
        if Dataset_numpy[i][j]['t_on'].shape[0] != 60:
            print(f"({i},{j}) sample has {Dataset_numpy[i][j]['t_on'].shape[0]} number of trials")
        for k in range(Dataset_numpy[i][j]['t_on'].shape[0]):
            if (Dataset_numpy[i][j]['t_off']  - Dataset_numpy[i][j]['t_on'])[k] != 3000:
                print(f"({i},{j}) sample {k}th trial has {(Dataset_numpy[i][j]['t_off']  - Dataset_numpy[i][j]['t_on'])[k]} length")


Dataset includes 7 Subjects and 2 Experiments ( real / imagery )
With 60 number of trials each with the length of 3000 time points


Both experiments:
* `['V']`: continuous voltage data (time by channels)
* `['srate']`: acquisition rate (1000 Hz). All stimulus times are in units of this.  
* `['t_on']`: time of stimulus onset in data samples
* `['t_off']`: time of stimulus offset, always 400 samples after `t_on`
* `['stim_id']`: identity of stimulus (11 = tongue, 12 = hand), real or imaginary stimulus
* `['scale_uv']`: scale factor to multiply the data values to get to microvolts (uV).
* `['locs']`: 3D electrode positions on the brain surface

In [233]:
print(f'Here is the list of all possible keys for this Dataset')
keys = []
for key in Dataset_numpy[0][0]:
    keys.append(key)
print(keys)

Here is the list of all possible keys for this Dataset
['t_off', 'stim_id', 't_on', 'srate', 'V', 'scale_uv', 'locs', 'hemisphere', 'lobe', 'gyrus', 'Brodmann_Area']


### Get Features and Targets

In [234]:
import sys
sys.path.append('..')

from Utils.millerUtils import getFeatureTarget, get_all

real, imagery = get_all(alldat=Dataset_numpy,
                         stim_id_1 = 11,
                           stim_id_2 = 12,
                             timepoints_length = 3000,
                               channels = np.arange(46))

X,Y = getFeatureTarget(real, imagery, channels = np.arange(46), flatten = False, shuffle = True, split = '2')
print(Y.shape)
print(X.shape)


(840,)
(840, 46, 3000)


In [235]:
print(Y)
print(X)

[0. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1.
 0. 1. 1. 0. 1. 1. 1. 0. 1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 1. 0. 0.
 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0.
 0. 1. 1. 0. 0. 1. 1. 1. 0. 1. 0. 0. 1. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 1.
 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 1. 0. 1. 0.
 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 1.
 1. 0. 0. 1. 0. 0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1.
 0. 0. 1. 1. 1. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1.
 0. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 1. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0.
 1. 1. 0. 1. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 0. 1. 0. 0. 0. 1. 0. 1.
 1. 1. 1. 0. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 1. 1. 0. 1. 0. 1. 0. 1.
 0. 1. 1. 1. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0. 1. 1. 1.
 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 0. 0. 1. 0. 1. 0. 1.
 1. 0. 0. 1. 1. 0. 1. 1. 0. 1. 1. 0. 0. 1. 1. 1. 1.

### Train / Test Split

In [236]:
from sklearn.model_selection import train_test_split  

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2024)


print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)


(672, 46, 3000)
(168, 46, 3000)
(672,)
(168,)


## Pytorch

Must be numpy first, will be converted at a later stage

In [237]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device is set to {device}')

Device is set to cpu


In [238]:
import numpy as np

Dataset_numpy = np.load(fname, allow_pickle=True)['dat']
print(Dataset_numpy)

[[{'t_off': array([ 13160,  19160,  25160,  31240,  37240,  43320,  49360,  55440,
          61520,  67600,  73680,  79680,  85680,  91720,  97760, 103760,
         109800, 115840, 121880, 127960, 134040, 140080, 146160, 152160,
         158160, 164160, 170240, 176320, 182320, 188320, 194320, 200400,
         206400, 212480, 218520, 224520, 230560, 236600, 242640, 248640,
         254640, 260720, 266760, 272800, 278840, 284840, 290880, 296880,
         302880, 308920, 314920, 321000, 327000, 333080, 339120, 345200,
         351240, 357320, 363360, 369440], dtype=int64), 'stim_id': array([12, 12, 12, 11, 12, 12, 12, 11, 11, 12, 12, 11, 11, 11, 12, 11, 11,
         11, 12, 12, 11, 11, 12, 11, 11, 12, 12, 12, 11, 11, 12, 11, 12, 12,
         12, 12, 11, 12, 11, 12, 12, 11, 11, 12, 11, 11, 11, 11, 11, 12, 12,
         12, 12, 11, 12, 11, 11, 12, 11, 11]), 't_on': array([ 10160,  16160,  22160,  28240,  34240,  40320,  46360,  52440,
          58520,  64600,  70680,  76680,  82680,  88720, 

### Some Info about the Dataset

For the movement task, from the original dataset instructions:

*Patients performed simple, repetitive, motor tasks of hand (synchronous flexion and extension of all fingers, i.e., clenching and releasing a fist at a self-paced rate of ~1-2 Hz) or tongue (opening of mouth with protrusion and retraction of the tongue, i.e., sticking the tongue in and out, also at ~1-2 Hz). These movements were performed in an interval-based manner, alternating between movement and rest, and the side of move- ment was always contralateral to the side of cortical grid placement.*

<br>

For the imagery task, from the original dataset instructions:

*Following the overt movement experiment, each subject performed an imagery task, imagining making identical movement rather than executing the movement. The imagery was kinesthetic rather than visual (“imagine yourself performing the actions like you just did”; i.e., “don’t imagine what it looked like, but imagine making the motions”).*

<br>

Sample rate is always 1000Hz, and the ECoG data has been notch-filtered at 60, 120, 180, 240 and 250Hz, followed by z-scoring across time and conversion to float16 to minimize size. Please convert back to float32 after loading the data in the notebook, to avoid unexpected behavior.

In [239]:
Dataset_size = Dataset_numpy.shape
n_trials = Dataset_numpy[0][0]['t_on'].shape[0]
l_trial = (Dataset_numpy[0][0]['t_off']  - Dataset_numpy[0][0]['t_on'])[0]
print(f'Dataset includes {Dataset_size[0]} Subjects and {Dataset_size[1]} Experiments ( real / imagery )')
print(f'With {n_trials} number of trials each with the length of {l_trial} time points')


# Check if all have the same number of trials and trial length
for i in range(7):
    for j in range(2):
        if Dataset_numpy[i][j]['t_on'].shape[0] != 60:
            print(f"({i},{j}) sample has {Dataset_numpy[i][j]['t_on'].shape[0]} number of trials")
        for k in range(Dataset_numpy[i][j]['t_on'].shape[0]):
            if (Dataset_numpy[i][j]['t_off']  - Dataset_numpy[i][j]['t_on'])[k] != 3000:
                print(f"({i},{j}) sample {k}th trial has {(Dataset_numpy[i][j]['t_off']  - Dataset_numpy[i][j]['t_on'])[k]} length")


Dataset includes 7 Subjects and 2 Experiments ( real / imagery )
With 60 number of trials each with the length of 3000 time points


Both experiments:
* `['V']`: continuous voltage data (time by channels)
* `['srate']`: acquisition rate (1000 Hz). All stimulus times are in units of this.  
* `['t_on']`: time of stimulus onset in data samples
* `['t_off']`: time of stimulus offset, always 400 samples after `t_on`
* `['stim_id']`: identity of stimulus (11 = tongue, 12 = hand), real or imaginary stimulus
* `['scale_uv']`: scale factor to multiply the data values to get to microvolts (uV).
* `['locs']`: 3D electrode positions on the brain surface

In [240]:
print(f'Here is the list of all possible keys for this Dataset')
keys = []
for key in Dataset_numpy[0][0]:
    keys.append(key)
print(keys)

Here is the list of all possible keys for this Dataset
['t_off', 'stim_id', 't_on', 'srate', 'V', 'scale_uv', 'locs', 'hemisphere', 'lobe', 'gyrus', 'Brodmann_Area']


### Get Features and Targets

In [241]:
import sys
sys.path.append('..')

from Utils.millerUtils import getFeatureTarget, get_all

real, imagery = get_all(alldat=Dataset_numpy,
                         stim_id_1 = 11,
                           stim_id_2 = 12,
                             timepoints_length = 3000,
                               channels = np.arange(46))

X,Y = getFeatureTarget(real, imagery, channels = np.arange(46), flatten = False, shuffle = True, split = '2')

# X = torch.tensor(X, device=device, dtype=torch.float64)
# Y = torch.tensor(Y, device=device, dtype=torch.long)


print(Y.shape)
print(X.shape)


(840,)
(840, 46, 3000)


In [242]:
print(Y)
print(X)

[1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 0.
 0. 1. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0. 1. 1. 1. 0. 0. 1. 1. 0. 0. 0.
 1. 0. 0. 1. 0. 1. 1. 0. 1. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 1. 0. 0. 1.
 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0. 0. 1. 1. 0. 1. 0. 0. 0. 1. 1.
 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 1. 0. 1. 0. 1. 0. 0. 1. 1. 1. 0. 0.
 0. 0. 1. 0. 1. 0. 1. 0. 1. 0. 0. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 0. 1. 0. 0. 1. 1. 1.
 0. 1. 0. 1. 1. 1. 0. 0. 1. 0. 1. 1. 1. 0. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0.
 0. 0. 1. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 1. 1. 0.
 0. 0. 1. 1. 0. 1. 1. 0. 1. 0. 0. 0. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 0.
 1. 0. 1. 0. 1. 0. 0. 1. 1. 1. 0. 1. 0. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 1.
 1. 1. 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 0. 1. 1. 1.

### Train / Test Loaders

In [243]:
from sklearn.model_selection import train_test_split  
import torch
from torch.utils.data import Dataset, DataLoader  
import torchvision.transforms as transforms  

In [248]:
class CustomDataset(Dataset):  
    def __init__(self, X, Y, transform = None):  
        self.X = X  
        self.Y = Y
        self.transform = transform

    def __len__(self):  
        return len(self.Y)  

    def __getitem__(self, idx):  
        x_sample = self.X[idx]    
        y_sample = self.Y[idx]

        if self.transform:  
            x_sample = self.transform(x_sample) 

        return x_sample, y_sample 

In [249]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2024)

batch_size = 32

In [250]:
X_train = torch.tensor(X_train, dtype=torch.float64, device = device)  
X_test = torch.tensor(X_test, dtype=torch.float64, device = device)  
Y_train = torch.tensor(Y_train, dtype=torch.long, device = device)  
Y_test = torch.tensor(Y_test, dtype=torch.long, device = device)


train_dataset = CustomDataset(X_train, Y_train, transform = None)  
test_dataset = CustomDataset(X_test, Y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle = True)  
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle = False)

In [251]:
for _, (x,y) in enumerate(train_loader):
    print(f'batch_index : {_}, x size : {x.shape}, y size : {y.shape}')

batch_index : 0, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 1, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 2, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 3, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 4, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 5, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 6, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 7, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 8, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 9, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 10, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 11, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 12, x size : torch.Size(

In [252]:
for _, (x,y) in enumerate(test_loader):
    print(f'batch_index : {_}, x size : {x.shape}, y size : {y.shape}')

batch_index : 0, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 1, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 2, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 3, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 4, x size : torch.Size([32, 46, 3000]), y size : torch.Size([32])
batch_index : 5, x size : torch.Size([8, 46, 3000]), y size : torch.Size([8])
