In [1]:
import os, mne, socket
from mne_bids import BIDSPath, read_raw_bids

%matplotlib qt

# Get the hostname
hostname = socket.gethostname()

if 'hpc' in hostname:
    # You are running your code on HPC
    curr_dir = os.getcwd()
    path_parts = curr_dir.split(os.sep)
    dataPath = os.path.join(os.sep, 'scratch', 'work', 'courses', 'PSYCH-GA-3405-2024fa')
elif hostname == 'sebastian_mac':
    # You are Sebastian
    # Setting up paths for data
    mydir = os.getcwd()
    path_parts = mydir.split(os.sep)
    idcs = [i for i, c in enumerate(mydir) if c == os.sep]
    dataPath = mydir[:idcs[-2]]
else:
    # You are running on your device with Google drive path
    # Define the paths and initialize Fieldtrip
    my_user_id = 'mdd9787'  # change this to your netID
    curr_dir = os.getcwd()
    path_parts = curr_dir.split(os.sep)
    base_dir = os.path.join(os.sep, *path_parts[:3])
    dataPath = os.path.join(base_dir, 'Library', 'CloudStorage', f'GoogleDrive-{my_user_id}@nyu.edu', 'My Drive', 'Coursework', 'EEG MEG methods', 'ClassData')

eegRoot = os.path.join(dataPath, 'EEGBids')



In [2]:
## Load data
groupName = 'GroupD'  # Change group name to analyze different dataset (valid IDs: GroupA, GroupC, GroupD)
userName = 'mrugank'  # Make sure to ensure that you are writing to your derivatives 
taskName = 'oddball'

# Get subject code for your group
groupMap = {'GroupA': '001', 'GroupC': '003', 'GroupD': '004'}
subjCode = groupMap[groupName]

dataPath = os.path.join(eegRoot, f'sub-{subjCode}', 'eeg')
derivPath = os.path.join(eegRoot, 'derivatives', userName, 'preprocessing', f'sub-{subjCode}')

if not os.path.exists(derivPath):
    os.makedirs(derivPath)

saveRoot = f'sub-{subjCode}_task-{taskName}_'

# raw_clean.save(os.path.join(derivPath, f'{saveRoot}clean-raw.fif'), overwrite=True)
# Load raw_clean
raw_clean = mne.io.read_raw_fif(os.path.join(derivPath, f'{saveRoot}clean-raw.fif'))

Opening raw data file /Users/mrugankdake/Library/CloudStorage/GoogleDrive-mdd9787@nyu.edu/My Drive/Coursework/EEG MEG methods/ClassData/EEGBids/derivatives/mrugank/preprocessing/sub-004/sub-004_task-oddball_clean-raw.fif...
    Range : 0 ... 2055679 =      0.000 ...  4014.998 secs
Ready.
Opening raw data file /Users/mrugankdake/Library/CloudStorage/GoogleDrive-mdd9787@nyu.edu/My Drive/Coursework/EEG MEG methods/ClassData/EEGBids/derivatives/mrugank/preprocessing/sub-004/sub-004_task-oddball_clean-raw-1.fif...
    Range : 2055680 ... 2347007 =   4015.000 ...  4583.998 secs
Ready.


In [3]:
# ERP analysis
# Epoch data around the semantic visual stimulus
import numpy as np

# Apply low pass filter
raw_clean.load_data()
raw_clean.filter(l_freq=None, h_freq=15, n_jobs=-1)
# Convert annotations to events using these descriptions
events, event_dict = mne.events_from_annotations(raw_clean)

# Update duration in events
for i, evtIdx in enumerate(events):
    if evtIdx[2] not in [5, 6]:  # Skip events with code 5 or 6
        # Check if there is any event after that is not 5 or 6
        nextEvents = events[i+1:]
        # Check if there are any events after the current event that are not 5 or 6
        isNextEvent = False
        for nextEvt in nextEvents:
            if nextEvt[2] not in [5, 6]:
                isNextEvent = True
                break
        if isNextEvent:
            # Update duration
            events[i, 1] = nextEvt[0] - evtIdx[0]
        else:
            # Update duration
            events[i, 1] = raw_clean.times[-1] - evtIdx[0]

    else:
        # Check if there is any event after 
        nextEvents = events[i+1:]
        if len(nextEvents) > 0:
            events[i, 1] = nextEvents[0, 0] - evtIdx[0]
        else:
            events[i, 1] = raw_clean.times[-1] - evtIdx[0]

semantic_vis_blocks = events[events[:, 2] == 2] # 2 is the event code for semanticVis
semantic_aud_blocks = events[events[:, 2] == 3] # 3 is the event code for semanticAud

# Filter trial events ('even' and 'odd') that occur within semanticVis blocks
semantic_vis_events = []
semantic_aud_events = []
for event in events:
    if event[2] in [int(event_dict['even']), int(event_dict['odd'])]:
        # Check if the trial event occurs within any semanticVis block
        for block in semantic_vis_blocks:
            if block[0] <= event[0] <= block[0] + int(block[1]):  # Ensure event is within block duration
                semantic_vis_events.append(event)
                break

for event in events:
    if event[2] in [int(event_dict['even']), int(event_dict['odd'])]:
        for block in semantic_aud_blocks:
            if block[0] <= event[0] <= block[0] + int(block[1]):
                semantic_aud_events.append(event)
                break


# Convert to numpy array for creating epochs
semantic_vis_events = np.array(semantic_vis_events)
semantic_aud_events = np.array(semantic_aud_events)

# Create epochs from -2 to +3 seconds relative to each trial onset within the block
epochsVis = mne.Epochs(
    raw_clean,
    events=semantic_vis_events,
    event_id={'even': int(event_dict['even']), 'odd': int(event_dict['odd'])},
    tmin=-0.3,
    tmax=1,
    baseline=(None, 0),
    preload=True
)
epochAud = mne.Epochs(
    raw_clean,
    events=semantic_aud_events,
    event_id={'even': int(event_dict['even']), 'odd': int(event_dict['odd'])},
    tmin=-0.3,
    tmax=1,
    baseline=(None, 0),
    preload=True
)

Reading 0 ... 2347007  =      0.000 ...  4583.998 secs...
Filtering raw data in 1 contiguous segment
Setting up low-pass filter at 15 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal lowpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Upper passband edge: 15.00 Hz
- Upper transition bandwidth: 3.75 Hz (-6 dB cutoff frequency: 16.88 Hz)
- Filter length: 451 samples (0.881 s)



[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.




[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    4.4s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   11.3s
[Parallel(n_jobs=-1)]: Done 256 out of 256 | elapsed:   16.2s finished


Used Annotations descriptions: ['/1', '/2', '/4', '/8', 'even', 'odd', 'storySeg']
Not setting metadata
540 matching events found
Setting baseline interval to [-0.30078125, 0.0] s
Applying baseline correction (mode: mean)
0 projection items activated
Using data from preloaded Raw for 540 events and 667 original time points ...
0 bad epochs dropped
Not setting metadata
450 matching events found
Setting baseline interval to [-0.30078125, 0.0] s
Applying baseline correction (mode: mean)
0 projection items activated
Using data from preloaded Raw for 450 events and 667 original time points ...
5 bad epochs dropped


## LDA

In [4]:
# Prepare data for LDA
min_trials = min(len(epochsVis), len(epochAud))

epochsVis_balanced = epochsVis[:min_trials]
epochAud_balanced = epochAud[:min_trials]

epochCombined = mne.concatenate_epochs([epochsVis_balanced, epochAud_balanced])
labels = np.concatenate([np.ones(len(epochsVis_balanced)), 2 * np.ones(len(epochAud_balanced))]) # 1 for visual, 2 for auditory

# Check size of epochCombined data
ntrials, nchannels, ntimes = epochCombined.get_data().shape

# Initialize accuracy array
accuracy_timecourse = np.zeros((ntimes, 1))

  epochCombined = mne.concatenate_epochs([epochsVis_balanced, epochAud_balanced])


Not setting metadata
890 matching events found
Applying baseline correction (mode: mean)


In [5]:
# 10-fold cross-validation
from sklearn.model_selection import KFold
from time import time
from scipy.linalg import pinv

start_time = time()
nkfolds = 10
kf = KFold(n_splits=nkfolds)
# Loop through each time point
for t in range(ntimes):
    print(t/ntimes)
    data_t = epochCombined.get_data()[:, :, t]

    fold_accuracies = np.zeros((nkfolds, 1))

    for i, (train_index, test_index) in enumerate(kf.split(data_t, labels)):
        X_train, X_test = data_t[train_index], data_t[test_index]
        y_train, y_test = labels[train_index], labels[test_index]

        # Perform LDA
        mean_vis = np.mean(X_train[y_train == 1], axis=0)
        mean_aud = np.mean(X_train[y_train == 2], axis=0)
        # within class covariance
        Sw = np.cov(X_train[y_train == 1], rowvar=False) + np.cov(X_train[y_train == 2], rowvar=False)
        # between class covariance
        Sb = np.outer(mean_vis - mean_aud, mean_vis - mean_aud)

        # Get the eigenvalues and eigenvectors
        eigvals, eigvecs = np.linalg.eig(pinv(Sw).dot(Sb))
        idx = eigvals.argsort()[::-1]
        eigvecs = eigvecs[:, idx] # Sort eigenvectors based on eigenvalues

        # Project the data onto the first eigenvector
        test_proj = X_test.dot(eigvecs[:, 0])

        # Compute mean of projected training data
        mean_vis_proj = np.mean(X_train[y_train == 1].dot(eigvecs[:, 0]))
        mean_aud_proj = np.mean(X_train[y_train == 2].dot(eigvecs[:, 0]))

        # test if projected data is closer to mean_vis_proj or mean_aud_proj
        predictions = np.zeros_like(test_proj)
        for j in range(len(test_proj)):
            if np.abs(test_proj[j] - mean_vis_proj) < np.abs(test_proj[j] - mean_aud_proj):
                predictions[j] = 1
            else:
                predictions[j] = 2

        # Compute accuracy
        fold_accuracies[i] = np.mean(predictions == y_test)

    accuracy_timecourse[t] = np.mean(fold_accuracies)

print(f'Time taken: {time() - start_time}')

0.0
0.0014992503748125937
0.0029985007496251873
0.004497751124437781
0.005997001499250375
0.0074962518740629685
0.008995502248875561
0.010494752623688156
0.01199400299850075
0.013493253373313344
0.014992503748125937
0.01649175412293853
0.017991004497751123
0.019490254872563718
0.020989505247376312
0.022488755622188907
0.0239880059970015
0.025487256371814093
0.026986506746626688
0.02848575712143928
0.029985007496251874
0.031484257871064465
0.03298350824587706
0.034482758620689655
0.035982008995502246
0.037481259370314844
0.038980509745127435
0.04047976011994003
0.041979010494752625
0.043478260869565216
0.044977511244377814
0.046476761619190406
0.047976011994003
0.049475262368815595
0.050974512743628186
0.05247376311844078
0.053973013493253376
0.05547226386806597
0.05697151424287856
0.05847076461769116
0.05997001499250375
0.06146926536731634
0.06296851574212893
0.06446776611694154
0.06596701649175413
0.06746626686656672
0.06896551724137931
0.0704647676161919
0.07196401799100449
0.0734632

In [7]:
# Plot the time-course of classification accuracy
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(epochsVis_balanced.times, accuracy_timecourse, linewidth=2)
plt.xlabel('Time (s)')
plt.ylabel('Classification Accuracy')
plt.title('Time-Course of Classification Accuracy')
plt.grid(True)
plt.xlim([epochsVis_balanced.times[0], epochsVis_balanced.times[-1]])
plt.ylim([0, 1])  # Assuming accuracy is between 0 and 1
plt.show()  # Display the plot