In [1]:
%matplotlib widget
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mne

In [2]:
%load_ext autoreload
%autoreload 1
%aimport help_func

from help_func import *


In [3]:
# Load the data
base_path = base_path = Path.cwd().parent  / "data"/ "exp2_box_ISI300" /"session_20250710_1721_Ege"
df_eeg= pd.read_csv(base_path / 'eeg_data.csv')
df_gui = pd.read_csv(base_path / 'gui_data.csv')

# Time start point = 0
df_gui['timestamp'] = df_gui['timestamp'] - df_eeg['timestamp_ux'].iloc[0]
df_eeg['timestamp_ux'] = df_eeg['timestamp_ux'] - df_eeg['timestamp_ux'].iloc[0]

# Interpolate plateaus in ux timestamp
df_eeg = interpolate_plateaus_in_ux(df_eeg)

In [4]:
# Estimate sampling frequency (in Hz)
timestamps = df_eeg['timestamp_ux'].values
dt = np.median(np.diff(timestamps))  # assume constant sampling
fs = 1.0 / dt
print(f"Estimated sampling frequency: {fs:.2f} Hz")

Estimated sampling frequency: 127.89 Hz


## Things to See
- **A** – Average time for the Epochs  
- **B** – The electrode graph 

## Preprocessing List
- **A** – Bandpass filter 1–20 Hz 


## The List to Try

### A. Normalization
- **A1** – Demean per epoch, then average  
- **A2** – Average, then demean  
- **A3** – Just average  

### B. Data Usage
- **B1** – Time values between 290 ms to end  
- **B2** – Time values between 290 ms to end, then decimate by 4  
- **B3** – P300 amplitude: peak in 290–500 ms ±10 ms window  
- **B4** – P300 amplitude: max in 290–500 ms  
- **B5** – P300 amplitude at exactly 300 ms  

### C. Feature Selection
- **C1** – Fisher LDA  
- **C2** - Statistically significant electrodes

### M. ML Methods
 ? Should I have output as binary and prob of correct or selection from 1-2-3 ? 
- **M1** – LDA  
- **M2** – SVM  
- **M3** – DAWN + LDA  
- **M4** - Random Forest with redacted columns



#### Later
- Try and discard the epochs with eye blinking with abs (max -min) and threshold


In [5]:
df = df_gui.iloc[0:36].copy()
avg = extract_and_average_epochs_by_stimulus(df_eeg, df, fs=128, post_time=0.6, n_average=0, normalization="A1",blink_channel_idx=0, blink_threshold=120)

In [33]:
def process_all_trials(df_eeg, df_gui, fs=128, post_time=0.6, n_average=0, normalization="A1",feature_type="B3", blink_channel_idx=0, blink_threshold=120):
    """
    Iteratively processes all trials in df_gui using extract_and_average_epochs_by_stimulus.
    
    Parameters
    ----------
    df_eeg : pd.DataFrame
        EEG data.
    df_gui : pd.DataFrame
        GUI data with 'trial', 'timestamp', and 'stimulus'.
    
    Returns
    -------
    all_averaged_epochs : list of dicts
        List of averaged epoch dicts from each trial.
    """
    all_averaged_epochs = []
    
    # Group df_gui by each trial
    for trial_id, trial_gui in df_gui.groupby('trial'):

        averaged_epochs = extract_and_average_epochs_by_stimulus(df_eeg=df_eeg,df_gui=trial_gui,fs=fs,post_time=post_time,n_average=n_average,
            normalization=normalization,blink_channel_idx=blink_channel_idx,blink_threshold=blink_threshold)
        
        target = trial_gui['target'].iloc[0] # target for the trial
        features_avg_epoch = extract_features_from_averaged_epochs(averaged_epochs, fs=fs, feature_type=feature_type)
        all_averaged_epochs.append(averaged_epochs)
    
    return all_averaged_epochs


In [34]:
all_averaged_epochs = process_all_trials(df_eeg, df_gui, fs=fs, post_time=0.6, n_average=0, normalization="A1", feature_type="B3", blink_channel_idx=0, blink_threshold=120)

NameError: name 'eeg_t' is not defined

In [None]:
def build_binary_classification_from_trial(features_dict, target_class):
    """
    Converts multi-class trial into binary classification sample set
    Parameters
    ----------
    features_dict : dict
        Dictionary {class_index: feature_vector (shape C,)} for current trial.
    target_class : int
        Index of the true target class (e.g., 0, 1, or 2)
    Returns
    -------
    X : np.ndarray, shape (3, C)
        Feature matrix for all 3 stimuli.
    y : np.ndarray, shape (3,)
        Binary labels: 1 if class is the target, 0 otherwise.
    """
    keys = sorted(features_dict.keys())  # ensure consistent order
    X = np.vstack([features_dict[k] for k in keys])
    y = np.array([1 if k == target_class else 0 for k in keys])
    return X, y


I need feature selection
    - RF 

I can also try Common Spatial Patterns

In [None]:
# Feature Selection
from sklearn.feature_selection import SelectKBest, f_classif
# X: shape (n_samples, n_features)
# y: shape (n_samples,), values as 1, 0

selector = SelectKBest(score_func=f_classif, k=10)
X_selected = selector.fit_transform(X, y)
selected_indices = selector.get_support(indices=True)

In [None]:
# Feature Extraction      
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# X: shape (n_samples, n_features)
# y: shape (n_samples,), values as 1, 0

# Fischer LDA
lda = LinearDiscriminantAnalysis()
X_lda = lda.fit_transform(X, y)   # Reduced features
y_pred = lda.predict(X)           # Class predictions (if needed)



I am doing the feature X and y preparer for B3-B5

In [5]:
import time

start = time.time()
epochs = extract_epochs_by_stimulus(df_eeg, df_gui, fs=250, post_time=0.8)

eeg_epochs_1 = epochs[1]
eeg_epochs_2 = epochs[2]
eeg_epochs_3 = epochs[3]

end = time.time()
print(f"Execution time: {(end - start)*1000:.2f} ms")

Execution time: 11.97 ms


In [None]:
# Demean the epochs globally
eeg_epochs_1_demeaned = global_demean_epochs(eeg_epochs_1)
eeg_epochs_2_demeaned = global_demean_epochs(eeg_epochs_2)
eeg_epochs_3_demeaned = global_demean_epochs(eeg_epochs_3)
