In [1]:
import mne
from pathlib import Path

raw = mne.io.read_raw_brainvision(
    Path("/mnt/d/thesis/sat1/eeg4/MD3-0021.vhdr"), preload=False
)
raw.set_channel_types(
    {"EOGh": "eog", "EOGv": "eog", "A1": "misc", "A2": "misc"}
)  # Declare type to avoid confusion with EEG channels
raw.rename_channels({"FP1": "Fp1", "FP2": "Fp2"})  # Naming convention
raw.set_montage("standard_1020")  # Standard 10-20 electrode montage
raw.rename_channels({"Fp1": "FP1", "Fp2": "FP2"})

Extracting parameters from /mnt/d/thesis/sat1/eeg4/MD3-0021.vhdr...
Setting channel info structure...


  raw.set_channel_types(
['A1', 'A2']
Consider setting the channel types to be of EEG/sEEG/ECoG/DBS/fNIRS using inst.set_channel_types before calling inst.set_montage, or omit these channels when creating your montage.
  raw.set_montage("standard_1020")  # Standard 10-20 electrode montage


0,1
Measurement date,Unknown
Experimenter,Unknown
Digitized points,33 points
Good channels,"30 EEG, 2 misc, 2 EOG"
Bad channels,
EOG channels,"EOGh, EOGv"
ECG channels,Not available
Sampling frequency,500.00 Hz
Highpass,0.00 Hz
Lowpass,250.00 Hz


In [2]:
raw.info["sfreq"]

500.0

In [4]:
mne.read_epochs(Path("data/sat1/preprocessed/processed_0001_epo.fif"))

(34, 1381968)

In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import xarray as xr
from pathlib import Path
import numpy as np

stage_data = xr.load_dataset(Path("data/sat1/stage_data.nc"))
split_stage_data = xr.load_dataset(Path("data/sat1/split_stage_data.nc"))

In [7]:
# Create the ndarray with the channel names based on the Markdown table
channels_2d = np.array(
    [
        ["M", "Fp1", "M", "Fp2", "M"],
        ["M", "M", "AFz", "M", "M"],
        ["F7", "F3", "Fz", "F4", "F8"],
        ["FC5", "FC1", "FCz", "FC2", "FC6"],
        ["T7", "C3", "Cz", "C4", "T8"],
        ["CP5", "CP1", "CPz", "CP2", "CP6"],
        ["P7", "P3", "Pz", "P4", "P8"],
        ["M", "O1", "M", "O2", "M"],
    ],
    dtype=str,
)

print(channels_2d)

[['M' 'Fp1' 'M' 'Fp2' 'M']
 ['M' 'M' 'AFz' 'M' 'M']
 ['F7' 'F3' 'Fz' 'F4' 'F8']
 ['FC5' 'FC1' 'FCz' 'FC2' 'FC6']
 ['T7' 'C3' 'Cz' 'C4' 'T8']
 ['CP5' 'CP1' 'CPz' 'CP2' 'CP6']
 ['P7' 'P3' 'Pz' 'P4' 'P8']
 ['M' 'O1' 'M' 'O2' 'M']]


In [11]:
dataset = split_stage_data
# Preprocess data
# Stack three dimensions into one MultiIndex dimension 'index'
dataset = dataset.stack({"index": ["participant", "epochs", "labels"]})
# Reorder so that index is at the front
dataset = dataset.transpose("index", ...)
dataset = dataset.dropna("index", how="all")

In [116]:
len(dataset.samples)

154

In [112]:
dataset.data.dtype

dtype('float64')

In [40]:
# Create array full of 'empty' values (999)
reshaped_data = np.full((20234, 8, 5, 154), 999, dtype=np.float64)

# Overwrite values at each position in reshaped_data, gained from position in channels_2d
height, width = channels_2d.shape

In [41]:
for x in range(width):
    for y in range(height):
        print(channels_2d[y, x])
        if channels_2d[y, x] == "M":
            continue
        reshaped_data[:, y, x, :] = dataset.sel(channels=channels_2d[y, x]).data

M
M
F7
FC5
T7
CP5
P7
M
Fp1
M
F3
FC1
C3
CP1
P3
O1
M
AFz
Fz
FCz
Cz
CPz
Pz
M
Fp2
M
F4
FC2
C4
CP2
P4
O2
M
M
F8
FC6
T8
CP6
P8
M


In [42]:
# Slice of one index, all channels, one sample
reshaped_data[1, :, :, 2]

array([[ 9.99000000e+02,  7.35478306e+00,  9.99000000e+02,
         1.32752419e+01,  9.99000000e+02],
       [ 9.99000000e+02,  9.99000000e+02,  1.06792793e+01,
         9.99000000e+02,  9.99000000e+02],
       [ 2.34640813e+00, -1.90565944e+00,  8.45465565e+00,
         9.86831188e+00,  1.46217279e+01],
       [-3.91972351e+00, -3.00415444e+00,  2.70070457e+00,
         4.81988144e+00,  9.97462177e+00],
       [-5.27048492e+00, -1.66501637e+01, -5.56061649e+00,
         2.93036580e+00,  1.37563362e+01],
       [-1.89969082e+01, -1.87456570e+01, -1.06818810e+01,
        -9.08121777e+00,  6.67964745e+00],
       [-1.19519901e+01, -2.25692081e+01, -1.92385502e+01,
         7.93491006e-02,  1.12508097e+01],
       [ 9.99000000e+02, -6.07885122e+00,  9.99000000e+02,
         8.60692787e+00,  9.99000000e+02]])

In [94]:
extra_coords = dataset.coords.to_dataset()
extra_coords = extra_coords.drop_vars("channels")
extra_coords = extra_coords.assign_coords({"x": np.arange(8), "y": np.arange(5)})

In [100]:
extra_coords = extra_coords.assign(data=(("index", "x", "y", "samples"), reshaped_data))

In [92]:
new_array = xr.DataArray(reshaped_data, coords=extra_coords)

In [34]:
import xarray as xr
from shared.training import get_folds
from pathlib import Path
import numpy as np
from copy import deepcopy

split_stage_data = xr.load_dataset(Path("data/sat1/split_stage_data.nc"))
folds = get_folds(split_stage_data, 5)

for i in range(len(folds)):
    train_folds = deepcopy(folds)
    test_fold = train_folds.pop(i)
    train_fold = np.concatenate(train_fold, axis=0)
    print(test_fold)
    print(train_fold)

['0007' '0017' '0023' '0003' '0008']
['0025' '0001' '0014' '0020' '0021' '0015' '0010' '0016' '0024' '0018'
 '0019' '0005' '0011' '0022' '0002' '0013' '0009' '0012' '0006' '0004']
['0025' '0001' '0014' '0020' '0021']
['0007' '0017' '0023' '0003' '0008' '0015' '0010' '0016' '0024' '0018'
 '0019' '0005' '0011' '0022' '0002' '0013' '0009' '0012' '0006' '0004']
['0015' '0010' '0016' '0024' '0018']
['0007' '0017' '0023' '0003' '0008' '0025' '0001' '0014' '0020' '0021'
 '0019' '0005' '0011' '0022' '0002' '0013' '0009' '0012' '0006' '0004']
['0019' '0005' '0011' '0022' '0002']
['0007' '0017' '0023' '0003' '0008' '0025' '0001' '0014' '0020' '0021'
 '0015' '0010' '0016' '0024' '0018' '0013' '0009' '0012' '0006' '0004']
['0013' '0009' '0012' '0006' '0004']
['0007' '0017' '0023' '0003' '0008' '0025' '0001' '0014' '0020' '0021'
 '0015' '0010' '0016' '0024' '0018' '0019' '0005' '0011' '0022' '0002']


In [17]:
split_stage_data.channels

In [28]:
import numpy as np

test_split = split_stage_data.sel(
    participant="0021", labels="confirmation", channels="Fp1"
).data

nan_ct = 0
for i in test_split:
    if np.all(np.isnan(i)):
        nan_ct += 1

print(nan_ct)

121


In [35]:
test_split.sel(epochs=11)

In [36]:
stage_data.sel(participant="0021", epochs=11).labels

In [7]:
stage_data.sel(participant="0021", epochs=0, channels="Fp1").data

In [None]:
#### LOOCV
estimates = model_all.fit(step=1)

max_fit = model_all.fit_single(n_events=8)
hmp.visu.plot_topo_timecourse(eeg_data, max_fit, positions, model_all)

bests = model_all.backward_estimation()

hmp.visu.plot_topo_timecourse(
    eeg_data, bests, positions, model_accuracy, ydim="n_events"
)
plt.plot(bests.n_events, bests.likelihoods, "o-")

plt.ylabel("Log-likelihood")
plt.xlabel("N-event model")
plt.show()
loocv = hmp.utils.loocv_mp(model_all, hmp_data, bests, cpus=4)

In [2]:
import tensorflow as tf

print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

Num GPUs Available:  1


2023-07-11 12:59:50.433177: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-07-11 12:59:50.533576: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-07-11 12:59:50.533645: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.


In [None]:
# Old code from process_model:
# Takes a model and the probabilities of events occuring within the dataset the model was initiated on
# and returns an ndarray of shape samples x time x #electrodes
# length of labels must be equal to amount of events
def process_model(model, eeg_data, labels):
    n_events = len(model.event)
    if len(labels) != n_events:
        raise ValueError(
            "Amount of labels is not equal to amount of events, adjust labels parameter"
        )

    # Set up output datatypes
    event_locations = model.eventprobs.idxmax(dim="samples").astype(int)
    labels_array = np.full(eeg_data.data.shape, fill_value=np.nan)
    participants = list(eeg_data.participant.values)
    # output_data = []
    # stage_data = []
    # stage_labels = []
    # participant_labels = []
    # epochs = []

    # For every known set of event locations, find the EEG data belonging to that trial (epoch) and participant
    for locations, data in zip(event_locations, model.trial_x_participant):
        data = data.item()
        locations = locations.values
        print(f"Processing participant {data[0]} and epoch {data[1]}")
        # t_x_p_data = int(eeg_data.sel(participant=data[0], epochs=data[1]).isnull().argmax('samples').data[0])
        # TODO Maybe not reliable enough, what if electrode 0 (Fp1) is working but others are not
        # Find sample for combination of participant + epoch where the value is null, this is the reaction time sample
        # where the participant pressed the button and the last stage ends
        RT_sample = int(
            eeg_data.sel(participant=data[0], epochs=data[1])
            .isnull()
            .argmax("samples")
            .data[0]
        )
        participant = participants.index(data[0])
        epoch = data[1]

        # Instead of all this, add dimension to dataarray with labels
        for j, location in enumerate(locations):
            # Slice from known event location n to known event location n + 1
            # unless it is the last event, then slice from known event location n to reaction time
            samples_slice = (
                slice(location, locations[j + 1])
                if j != n_events - 1
                else slice(location, RT_sample - 1)
            )
            # print(samples_slice, j)
            # TODO: Works right after, reset back to zero after this for loop
            labels_array[participant, epoch, :, samples_slice] = j
        # print(np.where(labels_array[participant, epoch, :, :]!= 0))

        # Do not record final stage if the last event bump is after the recorded reaction time
        # In this case, previous slices will have contained NaN values and have been skipped
        # if j == n_events - 1 and location > RT_sample:
        #     continue

        # eeg_slice = t_x_p_data.sel(samples=samples_slice)['data']

        # Disregard slices with NaN/None values
        # if eeg_slice.isnull().any():
        #     continue

        # Pad data to 'max' sample length
        # TODO: Look into max sample length
        # eeg_slice = eeg_slice.pad(pad_width={'samples': (0, 199 - len(eeg_slice['samples']))}, mode='constant', constant_values=0)
        # output_data.append((
        #     data[0], # participant
        #     data[1], # epoch
        #     eeg_slice
        # ))
        # stage_data.append(eeg_slice.pad(pad_width={'samples': (0, 199 - len(eeg_slice['samples']))}, mode='constant', constant_values=0))
        # stage_labels.append(labels[j])
        # participant_labels.append(data[0])
        # epochs.append(data[1])

    # stage_data = np.array(stage_data, dtype=np.float64)
    # participant, epochs, channels, samples
    # acc_xds = xr.Dataset(
    #     data_vars=dict(
    #         labels=(['participant', 'epochs', 'channels', 'samples'], acc_data),
    #         data=eeg_data.data
    #     )
    # )
    return labels_array