### Import Libraries

In [1]:
# Most code adapted from Gabriel Weindel at https://github.com/GWeindel/hmp
# https://github.com/GWeindel/hmp/blob/main/tutorials/1-Data_loading.ipynb
import numpy as np 
import pandas as pd
import xarray as xr
import hsmm_mvpy as hmp
from pathlib import Path

### Set up paths and participants

In [4]:
# Path where pre-processed EEG data can be found
eeg_data_path = Path('preprocessing/sat1/')
behavioral_data_path = eeg_data_path / 'behavior'

# Path where output will be saved
output_filename = 'data.nc'
output_path = Path('data/sat1/') / output_filename

# Change array indexing when you want to create a dataset consisting of a subset of the participants
subj_ids = [subj_id.name.split('.')[0] for subj_id in eeg_data_path.glob('*.csv')]
print(subj_ids)

['0001', '0002', '0003', '0004', '0005', '0006', '0007', '0008', '0009', '0010', '0011', '0012', '0013', '0014', '0015', '0016', '0017', '0018', '0019', '0020', '0021', '0022', '0023', '0024', '0025']


### Load data into epoched xarray

In [3]:
data = []

for participant in subj_ids:
    print(f'Loading participant {participant}')

    behavior = pd.read_csv(behavioral_data_path / f'{participant}-cnv-sat3_ET.csv', sep=';')
    
    # Rename categorical variables for clarity and transparency
    behavior['movement'] = behavior.apply(lambda row: 'stim_left' if row['movement'] == -1 else 
                                      ('stim_right' if row['movement'] == 1 else np.nan), axis=1)
    behavior['resp'] = behavior.apply(lambda row: 'resp_left' if row['resp'] == 1 else 
                                      ('resp_right' if row['resp'] == 2 else np.nan ), axis=1)
    
    # Merging together the experimental conditions info to have the format condition/stimulus/response
    behavior['trigger'] = behavior['cue'] + '/' +  behavior['movement'] + '/' +  behavior['resp']
    
    # Filtering reaction times below 300 and above 3000
    behavior['RT'] = behavior.apply(lambda row: 0 if row['RT'] < 300 else (
                0 if row['RT'] > 3000 else row['RT']), axis = 1)
    
    # Read EEG data
    eeg_data = pd.read_csv(eeg_data_path / f'{participant}.csv', index_col=0).reset_index(drop=True)
    # Drop any column that is not time, epoch, electrode name
    eeg_data = eeg_data.drop(columns='condition')
    # Rename FP1 and FP2 to align to montage convention
    eeg_data = eeg_data.rename(columns={'FP1':'Fp1', 'FP2':'Fp2'})

    # Use HMP function to cut off epochs to RT duration
    data.append(hmp.utils.parsing_epoched_eeg(data=eeg_data, rts=behavior['RT'], conditions=behavior['trigger'], sfreq=100))

# Store participant data into xarray
epoched_data = xr.concat(data, pd.Series(subj_ids, name='participant'), fill_value={'event':'', 'data': np.nan})

# Save xarray for use in HMP bump estimation
# epoched_data.to_netcdf(output_path)

Loading participant 0001
NaN present in condition array, removing associated epoch and RT ([0])
Totaling 185 valid trials


ValueError: different number of dimensions on data and dims: 1 vs 2

In [11]:
subj_files = eeg_data_path.glob('*-06_epo.fif')
subj_files = [str(x) for x in subj_files]
data = hmp.utils.read_mne_data(subj_files, epoched=True, sfreq=100,
                               lower_limit_RT=0.3, upper_limit_RT=2,
                               verbose=False, subj_idx=['0001'], rt_col='RT')

Processing participant preprocessing\sat1\processed_0001_2023-06_epo.fif's epoched eeg


IndexError: index -1 is out of bounds for axis 0 with size 0

In [None]:
data = []

for participant in subj_ids:
    print(f'Loading participant {participant}')

    behavior = pd.read_csv(behavioral_data_path / f'{participant}-cnv-sat3_ET.csv', sep=';')
    
    # Rename categorical variables for clarity and transparency
    behavior['movement'] = behavior.apply(lambda row: 'stim_left' if row['movement'] == -1 else 
                                      ('stim_right' if row['movement'] == 1 else np.nan), axis=1)
    behavior['resp'] = behavior.apply(lambda row: 'resp_left' if row['resp'] == 1 else 
                                      ('resp_right' if row['resp'] == 2 else np.nan ), axis=1)
    
    # Merging together the experimental conditions info to have the format condition/stimulus/response
    behavior['trigger'] = behavior['cue'] + '/' +  behavior['movement'] + '/' +  behavior['resp']
    
    # Filtering reaction times below 300 and above 3000
    # behavior['RT'] = behavior.apply(lambda row: 0 if row['RT'] < 300 else (
    #             0 if row['RT'] > 3000 else row['RT']), axis = 1)
    data.append(hmp.utils.read_mne_data(eeg_data_path / f'{participant.csv}'))
    # Read EEG data
    eeg_data = pd.read_csv(eeg_data_path / f'{participant}.csv', index_col=0).reset_index(drop=True)
    # Drop any column that is not time, epoch, electrode name
    eeg_data = eeg_data.drop(columns='condition')
    # Rename FP1 and FP2 to align to montage convention
    eeg_data = eeg_data.rename(columns={'FP1':'Fp1', 'FP2':'Fp2'})

    # Use HMP function to cut off epochs to RT duration
    data.append(hmp.utils.parsing_epoched_eeg(data=eeg_data, rts=behavior['RT'], conditions=behavior['trigger'], sfreq=100))

# Store participant data into xarray
epoched_data = xr.concat(data, pd.Series(subj_ids, name='participant'), fill_value={'event':'', 'data': np.nan})

# Save xarray for use in HMP bump estimation
# epoched_data.to_netcdf(output_path)

In [6]:
epoched_data