In [1]:
# Load MATLAB File with Scipy
import scipy.io as sio
import numpy as np
from scipy.io import loadmat
from torch.utils.data import Dataset

# Load MATLAB file
mat_data = sio.loadmat('data/001_data.mat')
head_data = sio.loadmat('data/header_001.mat')

In [None]:
mat_data["data"].shape
mat_data["data_labels"][0]

# Only keep data for the following labels (yet to add Oz, which is average of O1 and O2)
standard_channels = [
        'Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 
        'F9', 'T7', 'C3', 'Cz', 'C4', 'T8', 'F10',
        'P7', 'P3', 'Pz', 'P4', 'P8', 'O1', 'O2'
    ]

# Convert labels to list to use index method
labels_list = mat_data["data_labels"][0].tolist()

# Keep only the data for the standard channels and reorder according to standard_channels
data = mat_data["data"][[labels_list.index(ch) for ch in standard_channels],:]

# Between O1 and O2, create a new channel OZ, which is the average of O1 and O2
o1_idx = mat_data["data_labels"][0].tolist().index('O1')
o2_idx = mat_data["data_labels"][0].tolist().index('O2')

oz = (mat_data["data"][o1_idx,:] + mat_data["data"][o2_idx,:]) / 2

# Insert oz between o1 and o2
data = np.insert(data, o2_idx, oz, axis=0)


In [2]:
# Extract data and labels
data = mat_data['data']
labels = mat_data['data_labels'][0]

# Create new dictionary
data_001 = {label[0]: data[:, i] for i, label in enumerate(labels)}

# Print new dictionary
print(data_001["Fp1"])

[-3200. -3200. -3200. -3200. -3200. -3200. -3200. -3200. -3200. -3200.
 -3200. -3200. -3200. -3200. -3200. -3200. -3200. -3200. -3200. -3200.
 -3200. -3200. -3200. -3200. -3200. -3200. -3200.     0.     0. -3200.]


## NeuroGPT Loading

In [3]:
class EEGDatasetFromMAT(Dataset):
    def __init__(self, mat_file_path, chunk_len=500, num_chunks=10, ovlp=50, normalization=True):
        # Load the .mat file
        mat_data = loadmat(mat_file_path)
        self.data = mat_data['data']  # Assuming data is stored in a variable named 'data'
        self.labels = mat_data['data_labels'][0]  # Assuming labels are stored in 'data_labels'
        
        self.chunk_len = chunk_len
        self.num_chunks = num_chunks
        self.ovlp = ovlp
        self.do_normalization = normalization

    def __len__(self):
        return self.data.shape[1]  # Number of samples or trials

    def __getitem__(self, idx):
        data_sample = self.data[idx]
        if self.do_normalization:
            data_sample = self.normalize(data_sample)
        chunks = self.split_chunks(data_sample)
        return chunks

    def split_chunks(self, data, length=None, ovlp=None, num_chunks=None):
        if length is None:
            length = self.chunk_len
        if ovlp is None:
            ovlp = self.ovlp
        if num_chunks is None:
            num_chunks = self.num_chunks

        all_chunks = []
        total_len = data.shape[1]
        actual_num_chunks = num_chunks
        
        if num_chunks * length > total_len - 1:
            start_point = 0
            actual_num_chunks = total_len // length
        else:
            start_point = np.random.randint(0, total_len - num_chunks * length)
        
        for _ in range(actual_num_chunks):
            chunk = data[:, start_point: start_point + length]
            all_chunks.append(np.array(chunk))
            start_point += length - ovlp
        
        return np.array(all_chunks)

    def normalize(self, data):
        mean = np.mean(data, axis=-1, keepdims=True)
        std = np.std(data, axis=-1, keepdims=True)
        return (data - mean) / (std + 1e-25)

In [4]:
# Example usage
mat_file_path = 'data/001_data.mat'
dataset = EEGDatasetFromMAT(mat_file_path)

In [5]:
chunks = dataset.split_chunks(dataset.data)

# Store chunks as a dictionary
chunks_dict = {label[0]: chunks[:, i] for i, label in enumerate(dataset.labels)}
print(chunks_dict.keys())
# Add a new key with the following label: 'Oz', it is the average of O1 and O2
chunks_dict['Oz'] = (chunks_dict['O1'] + chunks_dict['O2']) / 2
# Store keys with the following labels:Fp1,Fp2,F7,F3,Fz,F4,F8,F9,T7,C3,Cz,C4,T8,F8,P7,P3,Pz,P4,P8,O1,Oz,O2
chunk_filtered = {key: chunks_dict[key] for key in ['Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'F9', 'T7', 'C3', 'Cz', 'C4', 'T8', 'F8', 'P7', 'P3', 'Pz', 'P4', 'P8', 'O1', 'Oz','O2']}


dict_keys(['Fp1', 'T9', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'T7', 'C3', 'Cz', 'C4', 'T8', 'P7', 'P3', 'Pz', 'P4', 'P8', 'O1', 'T10', 'O2', 'F9', 'F10', 'EKG+', 'P9', 'P10', 'PULS+', 'BEAT+', 'SpO2+', 'MKR+'])


In [6]:
chunk_filtered

{'Fp1': array([[ -8.7890625 ,  -4.296875  ,  -6.0546875 , ...,   0.9765625 ,
          -1.7578125 ,   2.05078125],
        [ 76.46484375,  76.3671875 , 103.41796875, ..., -40.52734375,
         -31.25      , -35.9375    ],
        [-31.34765625, -28.90625   ,   0.78125   , ..., -64.94140625,
         -55.37109375, -57.32421875],
        ...,
        [-63.18359375, -61.9140625 , -59.1796875 , ...,   9.27734375,
          23.6328125 ,  28.515625  ],
        [-29.00390625, -29.00390625, -29.296875  , ..., -49.4140625 ,
         -48.73046875, -70.5078125 ],
        [-58.0078125 , -69.82421875, -70.8984375 , ..., -14.84375   ,
           6.4453125 ,  29.8828125 ]]),
 'Fp2': array([[  8.69140625,  -1.26953125,  -9.765625  , ...,  -6.34765625,
          -6.640625  ,  -3.80859375],
        [ 59.5703125 ,  52.63671875,  54.98046875, ...,  -8.7890625 ,
         -30.76171875, -26.3671875 ],
        [-12.890625  , -16.2109375 , -14.74609375, ..., -33.88671875,
         -41.40625   , -40.4296875 ],