In [None]:
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt, hilbert
import os

## `LEiDA_EEG_eigenvectors.m`

#### Helper Functions - Filtering

In [None]:
def butter_bandpass(lowcut, highcut, fs, order=6):
    """
    Construct bandpass filter coefficients for a Butterworth filter.
    
    Parameters
    ----------
    lowcut : float
        Low cutoff frequency (Hz).
    highcut : float
        High cutoff frequency (Hz).
    fs : float
        Sampling frequency in Hz.
    order : int, optional
        The order of the Butterworth filter. Default is 6.
    
    Returns
    -------
    b, a : ndarray
        Numerator (b) and denominator (a) polynomials of the filter.
    """
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return b, a


def bandpass_filter(data, fs, lowcut, highcut, order=6):
    """
    Apply zero-phase Butterworth bandpass filter to 1D data.
    
    Parameters
    ----------
    data : ndarray
        One-dimensional time series data (e.g., one ROI).
    fs : float
        Sampling frequency in Hz.
    lowcut : float
        Low cutoff frequency (Hz).
    highcut : float
        High cutoff frequency (Hz).
    order : int, optional
        The order of the Butterworth filter. Default is 6.

    Returns
    -------
    filtered_data : ndarray
        Filtered time series, same shape as input.
    """
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    filtered_data = filtfilt(b, a, data)
    return filtered_data


#### Eigenvectors

In [None]:
def compute_leading_eigenvectors(data, fs, window_size, freq_band='alpha', verbose=True, do_plots=False):
    """
    Replicates the MATLAB pipeline for:
      1) Bandpass filtering a multi-channel EEG time series (per ROI).
      2) Computing the Hilbert transform to extract instantaneous phases.
      3) Computing dynamic phase-locking (dPL) matrices in non-overlapping windows.
      4) Extracting the leading eigenvector from each dPL.

    Parameters
    ----------
    data : ndarray
        Shape [n_areas, n_timepoints]. Each row is the time series of one brain region.
    fs : float
        Sampling frequency in Hz.
    window_size : int
        Number of samples in each non-overlapping window (e.g., 250).
    freq_band : str, optional
        Which frequency band to use: 'alpha', 'beta', or 'gamma'. Default is 'alpha'.
    verbose : bool, optional
        If True, prints progress messages. Default is True.
    do_plots : bool, optional
        If True, shows intermediate plots (raw vs filtered signal, etc.) for debugging.
        Default is False.

    Returns
    -------
    lead_eigs : ndarray
        Array of leading eigenvectors, shape [n_windows-2, n_areas].
        (We skip the first and last window as in the MATLAB code.)
    """
    
    # --------------------------
    # 1) Determine filter band
    # --------------------------
    if freq_band == 'alpha':
        lowcut, highcut = 8, 12
    elif freq_band == 'beta':
        lowcut, highcut = 15, 25 # same as in other projects of mine
    elif freq_band == 'gamma':
        lowcut, highcut = 30, 80
    else:
        raise ValueError("freq_band must be 'alpha', 'beta', or 'gamma'.")
        
    if verbose:
        print(f"Filtering data ({data.shape[0]} areas, {data.shape[1]} timepoints) "
              f"from {lowcut} to {highcut} Hz, order=6.")
    
    n_areas, T = data.shape
    print(f"N areas: {n_areas}, T: {T}")
    
    # -------------------------------------------
    # 2) De-mean and filter each ROI separately
    # -------------------------------------------
    # Subtract the mean per channel (as in the MATLAB code)
    data_demean = data - np.mean(data, axis=1, keepdims=True)
    
    filtered_data = np.zeros_like(data_demean)
    for i in range(n_areas):
        filtered_data[i, :] = bandpass_filter(data_demean[i, :], fs,
                                             lowcut, highcut, order=6)
    
    if do_plots:
        # Plot an example channel (ROI 0) before and after filtering
        t = np.arange(T) / fs
        plt.figure(figsize=(10, 4))
        plt.plot(t, data_demean[0, :], label='Raw (demeaned)', alpha=0.7)
        plt.plot(t, filtered_data[0, :], label='Filtered', alpha=0.7)
        plt.xlim([0, min(10.0, t[-1])])  # zoom in on the first second
        plt.legend()
        plt.title("ROI 0: Before and After Filtering")
        plt.xlabel("Time (s)")
        plt.show()

    # --------------------------------------------------------
    # 3) Compute Hilbert transform to get instantaneous phase
    # --------------------------------------------------------
    analytic_signal = hilbert(filtered_data, axis=1)
    phases = np.angle(analytic_signal)
    
    if do_plots:
        # Plot example channel's phase
        plt.figure(figsize=(10, 4))
        plt.plot(t, phases[0, :], label='Phase (ROI 0)')
        plt.title("Instantaneous Phase of Filtered Signal (ROI 0)")
        plt.xlabel("Time (s)")
        plt.ylabel("Phase (radians)")
        plt.xlim([0, min(10.0, t[-1])])  # zoom in
        plt.legend()
        plt.show()
    
    # -----------------------------------
    # 4) Windowing & dynamic phase-locking
    # -----------------------------------
    # In MATLAB code:
    #   repArray = 1:window_size:size(data,2)
    #   for t = 2 : (repetitions-1)
    # so effectively we skip the first and last windows.
    # We'll do the same to match their indexing logic.
    
    repArray = np.arange(0, T, window_size)  # e.g. [0, 250, 500, ...]
    repetitions = len(repArray)
    if T % window_size != 0 and verbose:
        print("Warning: discarding last incomplete window since T not multiple of window_size.")
    
    lead_eig_list = []
    example_iFC = None
    example_V1 = None
    
    for t_idx in range(1, repetitions - 1):
        start_idx = repArray[t_idx - 1]
        end_idx = repArray[t_idx]
        
        # Build iFC (dynamic phase-locking matrix) for this window
        iFC = np.zeros((n_areas, n_areas))
        for n in range(n_areas):
            for p in range(n_areas):
                # Extract the phase differences and compute the average of cos(differences)
                diffs = phases[n, start_idx:end_idx] - phases[p, start_idx:end_idx]
                iFC[n, p] = np.mean(np.cos(diffs))
        
        # --------------------------------
        # 5) Leading eigenvector of iFC
        # --------------------------------
        # For a symmetric real matrix iFC, we can use np.linalg.eigh.
        vals, vecs = np.linalg.eigh(iFC)
        # Largest eigenvalue => last entry if ascending
        idx_max = np.argmax(vals)  # or we can just take [-1] if guaranteed sorted
        V1 = vecs[:, idx_max]
        lead_eig_list.append(V1)

        if t_idx == 10:
            example_iFC = iFC.copy()
            example_V1 = V1.copy()
    
    lead_eigs = np.array(lead_eig_list)  # shape [n_windows - 2, n_areas]
    
    if verbose:
        print(f"Computed {lead_eigs.shape[0]} leading eigenvectors "
              f"for {n_areas} areas with window_size={window_size}.")
    
    if do_plots and example_iFC is not None:
        # Plot the phase-locking matrix (dPL) as an image
        plt.figure(figsize=(6, 5))
        plt.imshow(example_iFC, cmap='bwr', aspect='auto', vmin=-1, vmax=1)
        plt.colorbar(label='Phase Coherence (mean cos(diff))')
        plt.title("Example Dynamic Phase-Locking Matrix (dPL)")
        plt.xlabel("Brain Region (ROI index)")
        plt.ylabel("Brain Region (ROI index)")
        plt.show()
        
        # Plot the corresponding leading eigenvector with sign preserved.
        plt.figure(figsize=(6, 4))
        markerline, stemlines, baseline = plt.stem(np.arange(n_areas), example_V1)
        plt.setp(markerline, marker='o', markersize=6, color='b')
        plt.setp(stemlines, color='b')
        plt.title("Leading Eigenvector (with sign) from Example dPL")
        plt.xlabel("Brain Region (ROI index)")
        plt.ylabel("Eigenvector Component")
        plt.show()

    return lead_eigs

In [None]:
def load_example_eeg_mat(file_path):
    """
    Load the exampleSourceEEG_8.mat file containing EEG data for 8 participants.
    
    Parameters
    ----------
    file_path : str
        Path to the .mat file.
    
    Returns
    -------
    data_list : list of ndarray
        List of length 8, where each element is an EEG data matrix of shape (N_areas, T).
    """
    mat_contents = scipy.io.loadmat(file_path)
    
    # Extract participants (p1, p2, ..., p8)
    participants = [mat_contents[f'p{i+1}'] for i in range(8)]
    
    # Check consistency
    for i, data in enumerate(participants):
        if data.ndim != 2:
            raise ValueError(f"Participant p{i+1} data is not 2D (found shape {data.shape}).")
    
    print(f"Loaded EEG data for {len(participants)} participants.")
    return participants

## Main pipeline

In [None]:
# ✅ Path to the provided dataset (update the path as needed)
file_path = '../data/LEiDA_EEG/exampleSourceEEG_8.mat'

if not os.path.exists(file_path):
    raise FileNotFoundError(f"File '{file_path}' not found. Please check the path.")

# Load the dataset
participants_data = load_example_eeg_mat(file_path)

# Parameters
fs = 250           # Sampling frequency in Hz
window_size = 50  # Window size in samples (200 ms windows)
freq_band = 'alpha'  # Frequency band: 'alpha', 'beta', 'gamma'

# Process each participant
all_leading_eigenvectors = []
for i, data in enumerate(participants_data):
    print(f"\n--- Processing Participant {i+1} ---")
    lead_vecs = compute_leading_eigenvectors(data, 
                                             fs, 
                                             window_size, 
                                             freq_band, 
                                             verbose=True, 
                                             do_plots=True)
    all_leading_eigenvectors.append(lead_vecs)

print("\n✅ Processing complete!")