In [1]:
import os
import numpy as np
import torch
import scipy.io

In [2]:
channels = 6
samples = 4096
rate = 1024
path = './Base de Datos Habla Imaginada'

subjects = []
data = []
labels = []

for root, _, files in os.walk(path):
    for file in files:
        if file.endswith('_EEG.mat'):
            mat = scipy.io.loadmat(os.path.join(root, file))
            # The format stores the per channel data + three labels. Subject from filename.
            subjects.append(torch.tensor([int(file[1:3])] * len(mat['EEG'])))
            data.append(mat['EEG'][:, :channels*samples].reshape(-1, channels, samples))
            labels.append(mat['EEG'][:, channels*samples:])

subjects = torch.from_numpy(np.concatenate(subjects, axis=0)).to(int) 
data = torch.from_numpy(np.concatenate(data, axis=0)).to(torch.float32)
labels = torch.from_numpy(np.concatenate(labels, axis=0)).to(int)     

In [3]:
subjects.shape, data.shape, labels.shape

(torch.Size([9315]), torch.Size([9315, 6, 4096]), torch.Size([9315, 3]))

In [4]:
subjects[0], data[0], labels[0]

(tensor(1),
 tensor([[-45.0273, -44.9023, -44.4957,  ...,  21.5850,  20.3604,  19.1752],
         [-42.8867, -43.1319, -43.2460,  ...,  23.5990,  22.3516,  21.1756],
         [-49.0815, -48.7082, -47.9301,  ...,  19.6565,  18.9825,  18.3849],
         [-25.9682, -26.7065, -27.4165,  ...,  16.7802,  16.3765,  16.1537],
         [-25.5519, -26.1960, -26.8182,  ...,  18.9174,  18.3165,  17.8039],
         [-16.6065, -17.4099, -18.3053,  ...,  22.1402,  21.3476,  20.7603]]),
 tensor([1, 9, 2]))

In [5]:
metadata = {
    "channels": channels,
    "channels_residual": channels + 1, # Add residual
    "samples": samples,
    "rate": rate,
    "channel_names": [a + n for a in "FCP" for n in "34"] + ["residual"],
    "stimuli": ["A", "E", "I", "O", "U", "Arriba", "Abajo", "Adelante", "Atras", "Derecha", "Izquierda"],
    "modalities": ["Imagined", "Pronounced"],
    "artifacts": ["None", "Blink"],
    "label_headers": ["modality", "stimulus", "artifact"]
}

dataset = {
    "data": data,
    "labels": labels - 1, # YOLO: make 0-based
    "metadata": metadata,
    "subjects": subjects - 1, # YOLO: make 0-based
}

torch.save(dataset, "dataset.pt")

In [6]:
data[0][0]

tensor([-45.0273, -44.9023, -44.4957,  ...,  21.5850,  20.3604,  19.1752])