In [1]:
# Import some libraries
import os
import numpy as np
import mne
import pandas as pd

%matplotlib qt 

In [2]:
# Prepare helper functions
def create_header_row(original_row):
    """
    Create a header row based on the first row of a trial
    """
    header_row = original_row.copy()
    header_row['eeg_trigger'] = 99
    header_row['t_stim'] = 0
    header_row['sequence'] = None
    header_row['position'] = None

    return header_row

def create_extended_df(data_frame):
    """
    Create a new df with an extra header row
    """
    new_rows = []
    for i in range(0, len(data_frame), 4):
        # Create header row from first row of trial
        header_row = create_header_row(data_frame.iloc[i])
        new_rows.append(pd.DataFrame([header_row], index=[i - 0.5]))
        
        # Add original trial rows
        new_rows.append(data_frame.iloc[i:i+4])

    return pd.concat(new_rows).sort_index().reset_index(drop=True)

def align_df_with_array(df, modified_array, pattern_column='eeg_trigger'):
    """
    Align DataFrame with a modified array by removing rows that were dropped from the complete array
    """
    # Convert DataFrame column to numpy array for comparison
    complete_array = df[pattern_column].values
    
    # Find which indices from complete_array are missing in modified_array
    complete_indices = []
    modified_indices = []
    
    i, j = 0, 0
    while i < len(complete_array) and j < len(modified_array):
        if complete_array[i] == modified_array[j]:
            complete_indices.append(i)
            modified_indices.append(j)
            i += 1
            j += 1
        else:
            # This value was dropped from complete_array
            i += 1
    # Now remove rows from DataFrame that correspond to dropped indices
    rows_to_keep = complete_indices  # These indices survived in the modified array
  
    return df.iloc[rows_to_keep].reset_index(drop=True)

def make_eegtrigger(row):
    """ 
    Create eeg trigger column in the behaviour to check alignment
    """
    dir = row['trial_direction']
    start = row['start_position']
    seq = row['sequence'] - 1

    # This is to select the presentation direction
    dir_list = np.arange(1,5)
    if dir == 0:
        stimpos_ls = np.roll(dir_list, -start, axis=0)   # select the starting point of the stimulus
    elif dir == 1:
        stimpos_ls = np.roll(dir_list[::-1], 1+start, axis=0)   # select the starting point of the stimulus and reverse direction
    row['position'] = stimpos_ls[seq]

    return row['position']


In [3]:
excluded_pp = [3,14,20]
for sub in range(1,2):
    if sub in excluded_pp: # has no data
        continue
    # Load continuous data
    cleaned_data_dir = '/Users/mvmigem/Documents/data/project_1/preprocessed/'
    clean_raw_path = os.path.join(cleaned_data_dir,f'mastoid_raw/main_clean_mastoidref_{sub:02}-raw.fif')
    # Load continuous data
    data_directory = '/Users/mvmigem/Documents/data/project_1/'
    clean_raw_path = os.path.join(data_directory,f'preprocessed/mastoid_raw/main_clean_mastoidref_{sub:02}-raw.fif')
    raw = mne.io.read_raw_fif(clean_raw_path)
    # Downsampling variables (logic -> https://mne.tools/stable/auto_tutorials/preprocessing/30_filtering_resampling.html#best-practices)
    current_sfreq = raw.info['sfreq']
    desired_sfreq = 256  # Hz
    decim = np.round(current_sfreq / desired_sfreq).astype(int)
    events = mne.find_events(raw)
    current_behav_path = data_directory + f'raw_data/sub_{sub}/behav/predatt_participant_{sub}.csv'
    behav_data = pd.read_csv(current_behav_path)

    # Select event dict for condition
    if behav_data['start_position'].isin([0, 2]).any():
        event_id = {
        'start_trial':99, 'pos1/seq':11, 'pos1/seq3':13, 
        'pos2/seq2':22, 'pos2/seq4':24,
        'pos3/seq1':31, 'pos3/seq3':33,
        'pos4/seq2':42, 'pos4/seq4':44,
        }
    elif behav_data['start_position'].isin([1, 3]).any():
        # Event dict
        event_id = {
            'start_trial':99, 'pos1/seq2':12, 'pos1/seq4':14, 
            'pos2/seq1':21, 'pos2/seq3':23,
            'pos3/seq2':32, 'pos3/seq4':34,
            'pos4/seq1':41, 'pos4/seq3':43,
        }
        
    # Epoch data around stim onset
    epochs = mne.Epochs(raw, events, event_id = event_id,
        tmin = -0.1, tmax = 1.0, proj = False, baseline = (None,0), decim=decim, #from previous cell
        detrend = None, verbose = True, reject_by_annotation= False, preload = True)

    ep_events = epochs.events

    meta_data = pd.DataFrame(np.repeat(behav_data.values, 4, axis=0), columns=behav_data.columns)
    meta_data['sequence'] = np.tile(np.arange(1, 5), len(behav_data))
    meta_data['position'] = meta_data.apply(make_eegtrigger,axis=1)
    meta_data['eeg_trigger'] = meta_data['position']*10 + meta_data['sequence']

    # Add header row
    extended_df = create_extended_df(meta_data)
    # Align data
    aligned_df = align_df_with_array(extended_df, ep_events[:,2])

    # if all(ep_events[:,2] == aligned_df['eeg_trigger'].to_numpy()):
    #     epochs.metadata = aligned_df
    #     ep_p4 = epochs['seq4']
    #     ep_p4.save(f"C:/Users/mvmigem/Documents/data/project_1/preprocessed/mastoid_last_stim/last_stim_{sub:02}-epo.fif", overwrite=True)

Opening raw data file /Users/mvmigem/Documents/data/project_1/preprocessed/mastoid_raw/main_clean_mastoidref_01-raw.fif...
    Range : 0 ... 1465855 =      0.000 ...  2862.998 secs
Ready.
Finding events on: Status
Trigger channel Status has a non-zero initial value of 65536 (consider using initial_event=True to detect this event)
4271 events found on stim channel Status
Event IDs: [   12    14    21    23    32    34    41    43    99   255 65536 65789
 65791]
Not setting metadata
4189 matching events found


  epochs = mne.Epochs(raw, events, event_id = event_id,


Setting baseline interval to [-0.09765625, 0.0] s
Applying baseline correction (mode: mean)
Loading data for 4189 events and 564 original time points (prior to decimation) ...
1 bad epochs dropped
