In [1]:

import os
import sys
from pathlib import Path

module_path = os.path.abspath(os.path.join('../src/'))
if module_path not in sys.path:
    sys.path.append(module_path)

from data.smr_datamodule import SMR_Data
import bbcpy

In [2]:
# Load data
data_dir = Path("D:\\SMR\\")
task_name = "2D"
subject_sessions_dict = {"S4": "all"}
loading_data_mode = "within_subject"
ival = "2s:10s:1ms"
bands = [8, 13]
chans = "*"
fallback_neighbors = 4
transform = None
normalize_dict = {"norm_type": "std", "norm_axis": 0}

process_noisy_channels = False
ignore_noisy_sessions = False

trial_type = "valid"

smr_datamodule_nn = SMR_Data(data_dir=data_dir,
                             task_name=task_name,
                             trial_type = trial_type,
                             subject_sessions_dict=subject_sessions_dict,
                             loading_data_mode=loading_data_mode,
                             ival=ival,
                             bands=bands,
                             chans=chans,
                             fallback_neighbors=fallback_neighbors,
                             transform=transform,
                             normalize=normalize_dict,
                             process_noisy_channels=process_noisy_channels,
                             ignore_noisy_sessions=ignore_noisy_sessions,)
subjects_sessions_path_dict = smr_datamodule_nn.collect_subject_sessions(subject_sessions_dict)

INFO:root:Found sessions: ['Session_3'] for subject S4


In [3]:
epo_train_data, epo_test_data, session_info_dict = smr_datamodule_nn.load_session_trials(subjects_sessions_path_dict)

TypeError: stat: path should be string, bytes, os.PathLike or integer, not dict

In [3]:
subjects_sessions_path_dict

{'S4': {'Session_3': WindowsPath('D:/SMR/S4_Session_3.mat')}}

In [4]:
epo_data, session_info_dict, trial_info = smr_datamodule_nn.load_raw_trials(
    subjects_sessions_path_dict["S4"]["Session_3"])

noisechan [3.0, 5.0, 8.0, 23.0, 41.0, 62.0]


In [5]:
epo_data.shape

(450, 62, 11041)

In [6]:
trial_info.keys()

dict_keys(['tasknumber', 'runnumber', 'trialnumber', 'targetnumber', 'triallength', 'targethitnumber', 'resultind', 'result', 'forcedresult', 'artifact'])

In [9]:
task_ids_LR = [id for id, i in enumerate(trial_info["tasknumber"]) if i == 1.0]
task_ids_UD = [id for id, i in enumerate(trial_info["tasknumber"]) if i == 2.0]
task_ids_2D = [id for id, i in enumerate(trial_info["tasknumber"]) if i == 3.0]

In [10]:
import numpy as np

valid_LR_ids = np.array(trial_info["result"])[task_ids_LR]
forced_LR_ids = np.array(trial_info["forcedresult"])[task_ids_LR]

In [None]:
# split to train and test ; the trials for a task is composed of 6 runs each has 25 trials ; so the train set will contains runs : 1,2 , 4,and 5 ; the test set will contains runs 3 and 6
train_ids = [id for id, i in enumerate(trial_info["run"]) if i in [1, 2, 4, 5]]

In [11]:
valid_LR_ids.shape

(150,)

In [12]:
forced_LR_ids.shape

(150,)

In [15]:
for run_idx in range(0, 150, 25):
    print(run_idx // 25 + 1)

1
2
3
4
5
6


In [62]:
srm_data, timepoints, srm_fs, clab, mnt, trials_info, subject_info = \
    bbcpy.load.srm_eeg.load_single_mat_session(file_path=subjects_sessions_path_dict["S4"]["Session_3"])

noisechan [3.0, 5.0, 8.0, 23.0, 41.0, 62.0]


In [63]:
timepoints

array([array([[-2000., -1999., -1998., ...,  9038.,  9039.,  9040.]]),
       array([[-2000., -1999., -1998., ...,  9038.,  9039.,  9040.]]),
       array([[-2000., -1999., -1998., ...,  5438.,  5439.,  5440.]]),
       array([[-2000., -1999., -1998., ...,  5518.,  5519.,  5520.]]),
       array([[-2000., -1999., -1998., ...,  9038.,  9039.,  9040.]]),
       array([[-2000., -1999., -1998., ...,  6598.,  6599.,  6600.]]),
       array([[-2000., -1999., -1998., ...,  7118.,  7119.,  7120.]]),
       array([[-2000., -1999., -1998., ...,  5478.,  5479.,  5480.]]),
       array([[-2000., -1999., -1998., ...,  4398.,  4399.,  4400.]]),
       array([[-2000., -1999., -1998., ...,  4838.,  4839.,  4840.]]),
       array([[-2000., -1999., -1998., ...,  5878.,  5879.,  5880.]]),
       array([[-2000., -1999., -1998., ...,  4678.,  4679.,  4680.]]),
       array([[-2000., -1999., -1998., ...,  9038.,  9039.,  9040.]]),
       array([[-2000., -1999., -1998., ...,  4398.,  4399.,  4400.]]),
      

In [82]:
task_name_dict = {"LR": 1.0, "UD": 2.0, "2D": 3.0}
target_map_dict = {1: "R", 2: "L", 3: "U", 4: "D"}

def split_data(session_data, trials_info, task_name ,trial_type="valid"):
    # Define the runs for training and testing
    train_runs = [1, 2, 4, 5]
    test_runs = [3, 6]

    # Initialize empty lists for train and test data
    train_data = []
    test_data = []
    train_labels = []
    test_labels = []
    
    train_idx = []
    test_idx = []

    # Get the trials ids for the task
    task_trials_ids = [id for id, i in enumerate(trials_info["tasknumber"]) if i == task_name_dict[task_name]]

    # Get the trials results for the task
    if trial_type == "valid":
        task_results_ids = np.array(trials_info["result"])[task_trials_ids]
    elif trial_type == "forced":
        task_results_ids = np.setdiff1d(np.where(trials_info["forcedresult"] == np.bool_(True))[0],
                                        np.array(trials_info["result"])[task_trials_ids])
    else:
        raise ValueError("trial_type should be either valid or forced")

    # Get the trials targets for the task 
    task_targets = np.array(trial_info["targetnumber"])[task_trials_ids]
    task_targets = task_targets.astype(int) - 1  # to start from 0

    for idx, valid, target in zip(task_trials_ids, task_results_ids, task_targets):
        print(idx, valid, target)
        if valid == 1.0:
            trial_data = session_data[idx, :, :]
            run_idx = (idx % 150) // 25 + 1  # Calculate the run index

            if run_idx in train_runs:
                train_data.append(trial_data)
                train_labels.append(target)
                train_idx.append(idx)
            elif run_idx in test_runs:
                test_data.append(trial_data)
                test_labels.append(target)
                test_idx.append(idx)

    # Combine the runs into a single numpy array
    train_data = np.stack(train_data, axis=0).squeeze()
    test_data = np.stack(test_data, axis=0).squeeze()
    
    class_names = np.array(["R", "L", "U", "D"])
    
    # create SRM_Data object for the train
    mrk_train = bbcpy.datatypes.eeg.Marker(mrk_pos=train_idx,
                                       mrk_class=train_labels,
                                       mrk_class_name=class_names,
                                       mrk_fs=1,
                                       parent_fs=srm_fs)


    epo_train_data = bbcpy.datatypes.srm_eeg.SRM_Data(srm_data=srm_data.squeeze(),
                                                      timepoints=timepoints[train_idx].reshape(-1, 1),
                                                      fs=srm_fs,
                                                      mrk=mrk_train,
                                                      chans=chans)
    # create SRM_Data object for the test
    
    
    
    return train_data, test_data, train_labels, test_labels, train_idx, test_idx

In [83]:
train_data, test_data, train_labels, test_labels, train_idx, test_idx = split_data(epo_data, trial_info, "LR", trial_type="valid")

0 nan 1
1 nan 0
2 1.0 1
3 1.0 0
4 nan 0
5 1.0 1
6 1.0 0
7 1.0 1
8 1.0 0
9 1.0 1
10 1.0 1
11 1.0 0
12 1.0 1
13 1.0 0
14 nan 0
15 1.0 1
16 1.0 1
17 1.0 0
18 1.0 1
19 1.0 0
20 1.0 1
21 1.0 0
22 1.0 1
23 1.0 0
24 1.0 0
25 1.0 1
26 1.0 0
27 1.0 1
28 nan 0
29 nan 1
30 1.0 0
31 nan 1
32 1.0 1
33 0.0 0
34 1.0 1
35 1.0 0
36 nan 0
37 nan 1
38 1.0 1
39 nan 0
40 nan 0
41 nan 1
42 1.0 0
43 nan 1
44 1.0 0
45 1.0 1
46 1.0 0
47 1.0 1
48 0.0 1
49 1.0 0
50 1.0 1
51 0.0 0
52 1.0 1
53 1.0 0
54 1.0 1
55 1.0 0
56 1.0 1
57 1.0 0
58 1.0 1
59 1.0 0
60 1.0 0
61 1.0 1
62 0.0 0
63 1.0 1
64 1.0 1
65 1.0 0
66 nan 0
67 1.0 1
68 0.0 0
69 1.0 1
70 0.0 0
71 1.0 1
72 1.0 1
73 0.0 0
74 1.0 1
225 0.0 0
226 1.0 1
227 1.0 1
228 1.0 0
229 nan 1
230 nan 0
231 nan 0
232 1.0 1
233 0.0 0
234 1.0 1
235 nan 0
236 1.0 1
237 1.0 1
238 0.0 0
239 1.0 1
240 0.0 0
241 0.0 0
242 1.0 1
243 1.0 1
244 0.0 0
245 0.0 1
246 1.0 0
247 1.0 0
248 0.0 1
249 0.0 1
250 1.0 0
251 0.0 1
252 1.0 0
253 1.0 0
254 0.0 1
255 1.0 0
256 0.0 1
257 1.0 0
258 0

In [90]:
timepoints[train_idx].reshape(-1, 1).shape

(60, 1)

In [86]:
test_idx.__len__()

31

In [85]:
train_idx.__len__()

60

In [87]:
train_idx

[2,
 3,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 30,
 32,
 34,
 35,
 38,
 42,
 44,
 45,
 46,
 47,
 49,
 226,
 227,
 228,
 232,
 234,
 236,
 237,
 239,
 242,
 243,
 246,
 247,
 250,
 252,
 253,
 255,
 257,
 260,
 262,
 264,
 265,
 268,
 269,
 272,
 274]

In [89]:
train_data.squeeze().shape

(60, 62, 11041)

In [59]:
train_labels.__len__()

100

In [51]:
train_labels

[1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [24]:
import numpy as np

task_name_dict = {"LR": 1.0, "UD": 2.0, "2D": 3.0}


def split_data(session_data, trials_info, task_name):
    # Define the runs for training and testing
    train_runs = [1, 2, 4, 5]
    test_runs = [3, 6]

    # Initialize empty lists for train and test data
    train_data = []
    test_data = []

    # Get the trials ids for the task
    task_trials_ids = [id for id, i in enumerate(trials_info["tasknumber"]) if i == task_name_dict[task_name]]
    session_data = session_data[task_trials_ids, :, :]
    num_trials = len(task_trials_ids)

    # Get the trials results for the task
    task_results_ids = np.array(trials_info["result"])[task_trials_ids]
    # Filter the session data to get only the true trials
    valid_results_ids = np.where(task_results_ids == np.bool_(True))[0]

    for run_idx in range(0, num_trials, 25):  # Jump by 25 trials

        run_data = session_data[run_idx:run_idx + 25, :, :]

        if run_idx // 25 + 1 in train_runs:
            train_data.append(run_data)
        elif run_idx // 25 + 1 in test_runs:
            test_data.append(run_data)

    # Combine the runs into a single numpy array
    train_data = np.concatenate(train_data, axis=0)
    test_data = np.concatenate(test_data, axis=0)

    return train_data, test_data


In [36]:
train_data, test_data = split_data(epo_data, trial_info, "LR")

In [39]:
train_data.y

AttributeError: 'numpy.ndarray' object has no attribute 'y'

In [38]:
test_data.shape

(34, 1, 62, 11041)