In [2]:
# Load MATLAB File with Scipy
import scipy.io as sio
import numpy as np
from scipy.io import loadmat
from torch.utils.data import Dataset

# Load MATLAB file
mat_data = sio.loadmat('data/001_data.mat')
head_data = sio.loadmat('data/header_001.mat')

In [3]:
# Extract data and labels
data = mat_data['data']
labels = mat_data['data_labels'][0]

# Create new dictionary
data_001 = {label[0]: data[:, i] for i, label in enumerate(labels)}

# Print new dictionary
print(data_001["Fp1"])

[-3200. -3200. -3200. -3200. -3200. -3200. -3200. -3200. -3200. -3200.
 -3200. -3200. -3200. -3200. -3200. -3200. -3200. -3200. -3200. -3200.
 -3200. -3200. -3200. -3200. -3200. -3200. -3200.     0.     0. -3200.]


## NeuroGPT Loading

In [4]:
class EEGDatasetFromMAT(Dataset):
    def __init__(self, mat_file_path, chunk_len=500, num_chunks=10, ovlp=50, normalization=True):
        # Load the .mat file
        mat_data = loadmat(mat_file_path)
        self.data = mat_data['data']  # Assuming data is stored in a variable named 'data'
        self.labels = mat_data['data_labels'][0]  # Assuming labels are stored in 'data_labels'
        
        self.chunk_len = chunk_len
        self.num_chunks = num_chunks
        self.ovlp = ovlp
        self.do_normalization = normalization

    def __len__(self):
        return self.data.shape[1]  # Number of samples or trials

    def __getitem__(self, idx):
        data_sample = self.data[idx]
        if self.do_normalization:
            data_sample = self.normalize(data_sample)
        chunks = self.split_chunks(data_sample)
        return chunks

    def split_chunks(self, data, length=None, ovlp=None, num_chunks=None):
        if length is None:
            length = self.chunk_len
        if ovlp is None:
            ovlp = self.ovlp
        if num_chunks is None:
            num_chunks = self.num_chunks

        all_chunks = []
        total_len = data.shape[1]
        actual_num_chunks = num_chunks
        
        if num_chunks * length > total_len - 1:
            start_point = 0
            actual_num_chunks = total_len // length
        else:
            start_point = np.random.randint(0, total_len - num_chunks * length)
        
        for _ in range(actual_num_chunks):
            chunk = data[:, start_point: start_point + length]
            all_chunks.append(np.array(chunk))
            start_point += length - ovlp
        
        return np.array(all_chunks)

    def normalize(self, data):
        mean = np.mean(data, axis=-1, keepdims=True)
        std = np.std(data, axis=-1, keepdims=True)
        return (data - mean) / (std + 1e-25)

In [6]:
# Example usage
mat_file_path = 'data/001_data.mat'
dataset = EEGDatasetFromMAT(mat_file_path)

In [27]:
chunks = dataset.split_chunks(dataset.data)

# Store chunks as a dictionary
chunks_dict = {label[0]: chunks[:, i] for i, label in enumerate(dataset.labels)}
print(chunks_dict.keys())
# Store keys with the following labels:Fp1,Fp2,F7,F3,Fz,F4,F8,F9,T7,C3,Cz,C4,T8,F8,P7,P3,Pz,P4,P8,O1,O2
chunk_filtered = {key: chunks_dict[key] for key in ['Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'F9', 'T7', 'C3', 'Cz', 'C4', 'T8', 'F8', 'P7', 'P3', 'Pz', 'P4', 'P8', 'O1', 'O2']}


dict_keys(['Fp1', 'T9', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'T7', 'C3', 'Cz', 'C4', 'T8', 'P7', 'P3', 'Pz', 'P4', 'P8', 'O1', 'T10', 'O2', 'F9', 'F10', 'EKG+', 'P9', 'P10', 'PULS+', 'BEAT+', 'SpO2+', 'MKR+'])


In [28]:
chunk_filtered

{'Fp1': array([[ -4.296875  , -24.12109375, -30.17578125, ...,  11.1328125 ,
           8.30078125,  11.81640625],
        [ 32.32421875,  28.3203125 ,  25.        , ...,   0.68359375,
          -3.22265625,  17.7734375 ],
        [ 18.26171875,   9.1796875 , -11.42578125, ...,   5.078125  ,
          12.40234375,   3.3203125 ],
        ...,
        [  7.71484375,   1.3671875 ,  -5.6640625 , ...,   3.41796875,
           6.93359375,   6.73828125],
        [ -4.8828125 ,  -5.17578125, -12.5       , ...,  79.296875  ,
          78.80859375,  75.9765625 ],
        [ -6.54296875,  -2.734375  ,  -3.90625   , ..., -21.97265625,
         -19.82421875, -20.01953125]]),
 'Fp2': array([[ -2.1484375 , -11.03515625,   0.29296875, ...,   1.26953125,
          -0.390625  ,  -2.24609375],
        [  7.03125   ,   8.59375   ,   5.078125  , ...,  -4.78515625,
          -3.125     ,   1.3671875 ],
        [ 15.0390625 ,  10.9375    ,   6.93359375, ...,   1.85546875,
           0.9765625 ,  -4.6875    ],

In [9]:
chunks_dict

{'Fp1': array([[ 47.36328125,  47.65625   ,  54.39453125, ..., 126.7578125 ,
         124.0234375 , 135.15625   ],
        [ 46.2890625 ,  43.06640625,  45.703125  , ..., -10.44921875,
         -10.05859375, -20.21484375],
        [ 46.2890625 ,  29.6875    ,  22.36328125, ...,  89.55078125,
          85.64453125,  85.3515625 ],
        ...,
        [ 19.04296875,  17.7734375 ,  15.8203125 , ...,   0.5859375 ,
          -2.5390625 ,   6.8359375 ],
        [ 11.03515625,  11.42578125,  10.25390625, ...,  -3.02734375,
         -12.109375  , -15.33203125],
        [-10.3515625 ,  -9.08203125,  -1.3671875 , ...,  12.20703125,
          12.40234375,  12.6953125 ]]),
 'T9': array([[-22.8515625 , -16.30859375,  17.1875    , ..., -32.32421875,
         -43.1640625 , -32.51953125],
        [ 33.69140625,   3.02734375,  14.94140625, ...,  14.16015625,
          19.3359375 ,  21.09375   ],
        [ 11.23046875,   9.1796875 ,   8.10546875, ...,  -4.78515625,
          -4.78515625,  -2.34375   ],


In [11]:
dataset.data

# Display each sample
for i in range(len(dataset.data)):
    print(dataset.data[i])

# Store data as a dictionary
data_dict = {label[0]: dataset.data[:, i] for i, label in enumerate(dataset.labels)}

[-3200.             6.34765625     6.0546875  ...   -30.859375
   -40.625        -51.46484375]
[-3200.            21.875         13.8671875  ...    -7.8125
   -17.48046875   -31.34765625]
[-3.20000000e+03 -3.02734375e+00  4.29687500e+00 ... -1.26953125e+01
 -2.14843750e+00  2.01171875e+01]
[-3200.           -22.75390625    -8.203125   ...   -25.48828125
   -37.6953125    -59.1796875 ]
[-3.20000000e+03  5.46875000e+00 -2.05078125e+00 ... -3.90625000e-01
  1.26953125e+00 -4.19921875e+00]
[-3.2000000e+03 -8.7890625e-01 -3.7109375e+00 ...  4.4921875e+00
  2.1484375e+00 -9.7656250e-01]
[-3.20000000e+03  2.63671875e+00  1.85546875e+01 ...  1.19140625e+01
 -2.34375000e+00  1.46484375e+00]
[-3200.            -3.22265625    10.3515625  ...    22.16796875
    10.7421875     10.64453125]
[-3.20000000e+03 -9.76562500e-02  1.04492188e+01 ...  6.05468750e+00
 -4.78515625e+00 -9.76562500e+00]
[-3.20000000e+03  5.37109375e+00 -4.00390625e+00 ...  2.53906250e+00
  3.41796875e+00  1.66015625e+00]
[-3.20

In [None]:
data_dict

AttributeError: 'dict' object has no attribute 'shape'

In [None]:
data_dict.keys()

# Sp02,

dict_keys(['Fp1', 'T9', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'T7', 'C3', 'Cz', 'C4', 'T8', 'P7', 'P3', 'Pz', 'P4', 'P8', 'O1', 'T10', 'O2', 'F9', 'F10', 'EKG+', 'P9', 'P10', 'PULS+', 'BEAT+', 'SpO2+', 'MKR+'])

In [22]:
chunks_dict["Pz"]

array([[-12.01171875,  -9.765625  ,  -4.98046875, ..., -13.37890625,
        -13.8671875 ,  -6.8359375 ],
       [ 10.44921875,   6.25      ,   8.203125  , ...,  20.41015625,
         18.75      ,  17.28515625],
       [  7.421875  ,  -2.734375  ,  -4.8828125 , ...,  -4.98046875,
         -7.8125    , -11.23046875],
       ...,
       [  5.078125  ,   5.37109375,   6.640625  , ...,  14.2578125 ,
         11.71875   ,   9.375     ],
       [ -0.390625  ,   5.6640625 ,   6.73828125, ...,   6.25      ,
          2.34375   ,   0.1953125 ],
       [  4.6875    ,   2.734375  ,  -0.1953125 , ...,  11.03515625,
         10.64453125,   8.3984375 ]])