In [1]:
%load_ext autoreload
%autoreload 2
import netCDF4
import xarray as xr
from pathlib import Path
from hmpai.pytorch.models import *
from hmpai.training import split_participants
from hmpai.pytorch.utilities import set_global_seed
from hmpai.pytorch.generators import MultiXArrayProbaDataset
from hmpai.pytorch.normalization import *
from hmpai.pytorch.transforms import *
from hmpai.pytorch.mamba import *
from hmpai.pytorch.training import train_and_test

from mne.io import read_info
import os
DATA_PATH = Path(os.getenv("DATA_PATH"))

In [3]:
# Load in t1, t2 and cmb
t1_path = DATA_PATH / "prp/stage_data_250hz_t1.nc"
t2_path = DATA_PATH / "prp/stage_data_250hz_t2.nc"
cmb_path = DATA_PATH / "prp/VP_all_trial_250_off50.nc"

t1 = xr.open_dataset(t1_path)
t2 = xr.open_dataset(t2_path)
cmb = xr.open_dataset(cmb_path)

In [64]:
# Goal: Create new file that contains EEG from cmb, and HMP data from t2 appended to t1, combining labels
SHORT_DELAY = 300 #ms
LONG_DELAY = 1200 #ms
SAMPLING_FREQUENCY = 250 #hz

# Manual testing showed that actualy delay in EEG data is this value - 6 (69 and 294 respectively), see delay_analysis.ipynb
SHORT_PADDING = int((SHORT_DELAY / 1000) * SAMPLING_FREQUENCY) - 6
LONG_PADDING = int((LONG_DELAY / 1000) * SAMPLING_FREQUENCY) - 6

t1_proba = t1.probabilities
print(t1_proba.shape)

t2_proba = t2.probabilities[..., 1:, :]
print(t2_proba.shape)

(21, 1315, 5, 500)
(21, 1308, 3, 437)


In [65]:
# Difficult to achieve with indexing if epochs dont line up exactly
short_cond = ((t1['condition'] == 'short') & (t2['condition'] == 'short')).values
long_cond = ((t1['condition'] == 'long') & (t2['condition'] == 'long')).values

In [66]:
# Pad to equal length, adding 0 probability at the start and end as needed
short_padded = np.pad(t2_proba, ((0, 0), (0, 0), (0, 0), (SHORT_PADDING, LONG_PADDING-SHORT_PADDING)), mode='constant', constant_values=0)
long_padded = np.pad(t2_proba, ((0, 0), (0, 0), (0, 0), (LONG_PADDING, 0)), mode='constant', constant_values=0)

print(short_padded.shape)
print(long_padded.shape)

(21, 1308, 3, 737)
(21, 1308, 3, 737)


In [72]:
combined_padded = np.full(short_padded.shape, 0)

combined_padded = np.where(short_cond[..., None, None], short_padded, combined_padded)
combined_padded = np.where(long_cond[..., None, None], long_padded, combined_padded)

# Take only 500 samples since EEG is only 2s
combined_padded = combined_padded[..., :500]
print(combined_padded.shape)

(21, 1308, 3, 500)


In [73]:
# Will work once epochs are fixed, for now drop everything to 1308 epochs (Does not work correctly of course)
t1_proba = t1_proba[:, :1308]
cmb = cmb.isel(epochs=range(1308))

t1_t2_combined = np.concat([t1_proba, combined_padded], axis=2)

In [74]:
cmb.assign(probabilities=(('participant', 'epochs', 'labels', 'samples'), t1_t2_combined))

In [None]:
cmb.to_netcdf(DATA_PATH / "stage_data_250hz_combined.nc")