In [13]:
import pandas as pd
import numpy as np

In [51]:
eeg_df = pd.read_csv('Data/CSV/Subj1031/sub-CLASS_SUBJ_1031_ses-S001OFFLINE_FES_task-Default_run-001_eeg.csv')
marker_df = pd.read_csv('Data/CSV/Subj1031/sub-CLASS_SUBJ_1031_ses-S001OFFLINE_FES_task-Default_run-001_eeg_markers.csv')

In [32]:
eeg_df['Label'] = np.nan

# Ensure both time columns are sorted
eeg_df = eeg_df.sort_values('Time')
marker_df = marker_df.sort_values('Time').reset_index(drop=True)

# Iterate through marker intervals
for i in range(len(marker_df)):
    t_start = marker_df.loc[i, 'Time']
    label = marker_df.loc[i, 'Label']
    
    if i < len(marker_df) - 1:
        t_end = marker_df.loc[i + 1, 'Time']
    else:
        t_end = np.inf  # last marker: assign to end of EEG

    # Assign label to EEG rows where time is in [t_start, t_end)
    eeg_df.loc[(eeg_df['Time'] >= t_start) & (eeg_df['Time'] < t_end), 'Label'] = label

eeg_df['Label'] = eeg_df['Label'].dropna().astype(float).astype(int).astype(str)

# Rebuild mapping just in case
TRIGGERS = {
    "MI_BEGIN": "200",
    "MI_END": "220",
    "MI_EARLYSTOP": "240",
    "ROBOT_BEGIN": "300",
    "ROBOT_END": "320",
    "ROBOT_EARLYSTOP": "340",
    "ROBOT_CONFIRM_STOP": "345",
    "REST_BEGIN": "100",
    "REST_END": "120",
    "REST_EARLYSTOP": "140"
}
label_to_class = {v: k for k, v in TRIGGERS.items()}

# Map labels to OutcomeClass
eeg_df['OutcomeClass'] = eeg_df['Label'].map(label_to_class)

# Confirm it's working
# print(eeg_df[['Time', 'Label', 'OutcomeClass']].dropna().head())

trial_start_triggers = ['MI_BEGIN', 'REST_BEGIN']
trial_end_triggers = ['MI_END', 'REST_END']

# Clean consecutive duplicates in OutcomeClass
eeg_df['PrevClass'] = eeg_df['OutcomeClass'].shift()
events_clean = eeg_df[(eeg_df['OutcomeClass'] != eeg_df['PrevClass']) & (eeg_df['OutcomeClass'].notna())].copy()

# Reset index for clean merging
events_clean = events_clean[['Time', 'OutcomeClass']].reset_index(drop=True)

# Extract start and end events
starts = events_clean[events_clean['OutcomeClass'].isin(trial_start_triggers)].reset_index(drop=True)
ends = events_clean[events_clean['OutcomeClass'].isin(trial_end_triggers)].reset_index(drop=True)

# Add Trial column
eeg_df['Trial'] = np.nan
trial_num = 1

# Loop through each start and match to next correct end
for i in range(len(starts)):
    start_time = starts.loc[i, 'Time']
    start_class = starts.loc[i, 'OutcomeClass']
    expected_end_class = start_class.replace("BEGIN", "END")

    # Find matching END that comes after the start
    matching_ends = ends[(ends['Time'] > start_time) & (ends['OutcomeClass'] == expected_end_class)]

    if not matching_ends.empty:
        end_time = matching_ends.iloc[0]['Time']

        # Debug print (can comment out)
        print(f"Trial {trial_num}: {start_class} from {start_time:.3f}s to {end_time:.3f}s")

        # Assign this trial number to EEG samples in range
        mask = (eeg_df['Time'] >= start_time) & (eeg_df['Time'] < end_time)
        eeg_df.loc[mask, 'Trial'] = trial_num

        # Drop this end from future matches (to avoid duplicate use)
        ends = ends.drop(matching_ends.index[0]).reset_index(drop=True)

        trial_num += 1

# Cleanup
eeg_df.drop(columns='PrevClass', inplace=True)

Trial 1: MI_BEGIN from 50256.910s to 50259.909s
Trial 2: MI_BEGIN from 50279.218s to 50284.190s
Trial 3: REST_BEGIN from 50292.415s to 50295.186s
Trial 4: REST_BEGIN from 50303.483s to 50305.754s
Trial 5: MI_BEGIN from 50314.029s to 50319.045s
Trial 6: MI_BEGIN from 50327.345s to 50329.929s
Trial 7: MI_BEGIN from 50349.222s to 50351.482s
Trial 8: REST_BEGIN from 50370.837s to 50375.833s
Trial 9: REST_BEGIN from 50384.133s to 50386.481s
Trial 10: REST_BEGIN from 50394.768s to 50397.797s
Trial 11: MI_BEGIN from 50406.137s to 50411.084s
Trial 12: REST_BEGIN from 50419.371s to 50421.701s
Trial 13: REST_BEGIN from 50429.984s to 50434.988s
Trial 14: REST_BEGIN from 50443.198s to 50445.464s
Trial 15: MI_BEGIN from 50453.767s to 50456.032s
Trial 16: MI_BEGIN from 50475.430s to 50480.278s
Trial 17: MI_BEGIN from 50499.654s to 50504.654s
Trial 18: REST_BEGIN from 50512.900s to 50517.886s
Trial 19: REST_BEGIN from 50526.220s to 50531.185s
Trial 20: MI_BEGIN from 50539.392s to 50542.728s
Trial 21:

In [46]:
import pandas as pd

# Trigger definitions
TRIGGERS = {
    "MI_BEGIN": 200,
    "MI_END": 220,
    "REST_BEGIN": 100,
    "REST_END": 120,
}

# Time durations (used only if no END marker is found)
TIME_MI = 5  # seconds

def parse_trial_data(eeg_df, marker_df):
    """
    Parses a marker DataFrame to extract trial start/end times and labels.
    
    Parameters:
    - eeg_df (pd.DataFrame): EEG data (not used in this function, just passed for consistency).
    - marker_df (pd.DataFrame): Must have columns ['Label', 'Time'].

    Returns:
    - pd.DataFrame: with columns ['Trial_Label', 'Trial_Start_Time', 'Trial_End_Time']
    """
    trial_labels = []
    trial_start_times = []
    trial_end_times = []

    for i, row in marker_df.iterrows():
        marker = row['Label']
        timestamp = row['Time']

        # Motor Imagery trial
        if marker == TRIGGERS["MI_BEGIN"]:
            trial_labels.append("MI")
            trial_start_times.append(timestamp)
            # Look ahead for MI_END
            end_match = marker_df[(marker_df['Label'] == TRIGGERS["MI_END"]) & 
                                  (marker_df['Time'] > timestamp)]
            if not end_match.empty:
                trial_end_times.append(end_match.iloc[0]['Time'])
            else:
                trial_end_times.append(timestamp + TIME_MI)  # fallback for offline mode

        # Rest trial
        elif marker == TRIGGERS["REST_BEGIN"]:
            trial_labels.append("Rest")
            trial_start_times.append(timestamp)
            # Look ahead for REST_END
            end_match = marker_df[(marker_df['Label'] == TRIGGERS["REST_END"]) & 
                                  (marker_df['Time'] > timestamp)]
            if not end_match.empty:
                trial_end_times.append(end_match.iloc[0]['Time'])
            else:
                trial_end_times.append(timestamp + TIME_MI)

    return pd.DataFrame({
        "Trial_Label": trial_labels,
        "Trial_Start_Time": trial_start_times,
        "Trial_End_Time": trial_end_times
    })


In [52]:
trial_info_df = parse_trial_data(eeg_df, marker_df)
print(trial_info_df)

   Trial_Label  Trial_Start_Time  Trial_End_Time
0         Rest      21814.053225    21819.053385
1         Rest      21827.105593    21832.113431
2         Rest      21840.161782    21845.161953
3           MI      21853.213901    21858.214230
4           MI      21877.286311    21882.286621
..         ...               ...             ...
85        Rest      23487.551952    23492.551692
86          MI      23500.603724    23505.612040
87        Rest      23524.680159    23529.680261
88          MI      23537.736509    23542.748508
89          MI      23561.804619    23566.816712

[90 rows x 3 columns]


In [69]:
eeg_df['Label'] = np.nan

# Ensure both time columns are sorted
eeg_df = eeg_df.sort_values('Time')
marker_df = marker_df.sort_values('Time').reset_index(drop=True)

# Assign label to EEG based on marker intervals
for i in range(len(marker_df)):
    t_start = marker_df.loc[i, 'Time']
    label = marker_df.loc[i, 'Label']
    
    if i < len(marker_df) - 1:
        t_end = marker_df.loc[i + 1, 'Time']
    else:
        t_end = np.inf

    eeg_df.loc[(eeg_df['Time'] >= t_start) & (eeg_df['Time'] < t_end), 'Label'] = label

eeg_df['Label'] = eeg_df['Label'].dropna().astype(float).astype(int).astype(str)

# Mapping for label → descriptive class name
TRIGGERS = {
    "MI_BEGIN": "200",
    "MI_END": "220",
    "MI_EARLYSTOP": "240",
    "ROBOT_BEGIN": "300",
    "ROBOT_END": "320",
    "ROBOT_EARLYSTOP": "340",
    "ROBOT_CONFIRM_STOP": "345",
    "REST_BEGIN": "100",
    "REST_END": "120",
    "REST_EARLYSTOP": "140"
}
label_to_class = {v: k for k, v in TRIGGERS.items()}
eeg_df['OutcomeClass'] = eeg_df['Label'].map(label_to_class)

# Clean up duplicate consecutive classes
eeg_df['PrevClass'] = eeg_df['OutcomeClass'].shift()
events_clean = eeg_df[(eeg_df['OutcomeClass'] != eeg_df['PrevClass']) & (eeg_df['OutcomeClass'].notna())].copy()
events_clean = events_clean[['Time', 'OutcomeClass']].reset_index(drop=True)

# Get just the trial start events
trial_begins = events_clean[events_clean['OutcomeClass'].isin(['MI_BEGIN', 'REST_BEGIN'])].reset_index(drop=True)

# Assign trial numbers
eeg_df['Trial'] = np.nan
trial_num = 0

for i in range(len(trial_begins)):
    t_start = trial_begins.loc[i, 'Time']
    
    if i < len(trial_begins) - 1:
        t_end = trial_begins.loc[i + 1, 'Time']
    else:
        t_end = np.inf  # last trial goes until the end of EEG data

    eeg_df.loc[(eeg_df['Time'] >= t_start) & (eeg_df['Time'] < t_end), 'Trial'] = trial_num
    trial_num += 1

# Fill in any samples before the first MI/REST_BEGIN with trial -1 (optional)
first_start = trial_begins['Time'].min()
eeg_df.loc[eeg_df['Time'] < first_start, 'Trial'] = -1

# Convert Trial to integer type
eeg_df['Trial'] = eeg_df['Trial'].astype(int)

# Done!


In [74]:
eeg_df.sample(10)

Unnamed: 0,Time,Ch1,Ch2,Ch3,Ch4,Ch5,Ch6,Ch7,Ch8,Ch9,...,Ch34,Ch35,Ch36,Ch37,Ch38,Ch39,Label,OutcomeClass,Trial,PrevClass
37968,21864.89271,2152.8,6131.34,-110.4,877.09,1173.78,3896.06,672.56,-5010.83,8401.39,...,-17071.824,-15352.811,-5147.205,-34690.776,-3430.874,0.0,300,ROBOT_BEGIN,3,ROBOT_BEGIN
774467,23303.420631,-1498.5,408.22,-3383.68,-5188.32,-2757.49,2923.96,-2632.91,-7072.86,1686.31,...,-17049.474,-15316.902,-5253.144,-34670.065,-3433.258,0.0,100,REST_BEGIN,74,REST_BEGIN
390716,22553.879181,-1183.37,1406.71,-3296.42,-3124.39,-1470.64,3237.39,-920.36,-7106.48,4443.94,...,-17049.325,-15314.816,-5246.886,-34599.439,-3395.71,0.0,120,REST_END,36,REST_END
256260,22291.260074,366.34,2792.96,-2260.23,-1994.86,-844.64,3489.26,-535.31,-6410.21,5390.16,...,-17042.769,-15328.226,-5227.963,-34590.946,-3433.556,0.0,120,REST_END,26,REST_END
919003,23585.727968,-1443.25,295.68,-3011.61,-5018.28,-3321.59,2944.76,-3518.11,-6890.77,475.49,...,-17026.081,-15335.676,-5197.418,-34611.806,-3388.111,0.0,320,ROBOT_END,89,ROBOT_END
412316,22596.068245,-1339.47,1234.1,-3155.86,-3243.64,-1574.19,3103.0,-1056.81,-7167.77,4225.61,...,-17008.201,-15321.372,-5241.373,-34611.955,-3394.22,0.0,200,MI_BEGIN,39,MI_BEGIN
278046,22333.812432,-138.79,2342.48,-2516.22,-2305.42,-1010.99,3395.88,-608.52,-6556.24,5138.11,...,-17052.752,-15323.607,-5195.183,-34640.414,-3421.04,0.0,300,ROBOT_BEGIN,28,ROBOT_BEGIN
391483,22555.377284,-1210.4,1382.09,-3309.96,-3142.72,-1489.52,3234.62,-941.36,-7115.12,4436.04,...,-16987.341,-15342.381,-5189.521,-34579.324,-3397.796,0.0,120,REST_END,36,REST_END
756273,23267.884158,-1537.49,383.77,-3559.79,-4884.24,-2743.81,2948.21,-2647.82,-7127.91,1630.08,...,-17045.451,-15340.593,-5222.45,-34652.483,-3435.493,0.0,120,REST_END,72,REST_END
479961,22728.192282,-1661.27,950.33,-3481.59,-3844.22,-1772.34,2870.43,-1287.03,-7276.77,3773.75,...,-17044.706,-15320.925,-5190.564,-34642.351,-3409.716,0.0,200,MI_BEGIN,46,MI_BEGIN


trial data is from rest/mi begin to end, but this excludes most of the data. So, I just included all of the data in the trial (until the next begin). To only look at trial data:

In [75]:
trial_labels = {'REST_BEGIN', 'REST_END', 'MI_BEGIN', 'MI_END'}
trial_eeg_df = eeg_df[eeg_df['OutcomeClass'].isin(trial_labels)]
trial_eeg_df.shape

(437570, 44)