## Set up

In [81]:
import numpy as np
import os.path as op
from pprint import pformat
from typing import Iterator

# EEG utilities
import mne
from mne.preprocessing import ICA, create_eog_epochs
from pyprep.prep_pipeline import PrepPipeline
from autoreject import get_rejection_threshold, validation_curve

# BIDS utilities
from mne_bids import BIDSPath, read_raw_bids
from util.io.bids import DataSink
from bids import BIDSLayout

In [79]:
# Constants
BIDS_ROOT = '../data/bids'
DERIV_ROOT = op.join(BIDS_ROOT, 'derivatives')
LOWPASS = 300
FS = 2000
REJECT_THRES = 5e-7 # 50 microvolts

# Parse BIDS directory
layout = BIDSLayout(BIDS_ROOT)
subjects = layout.get_subjects()
tasks = layout.get_tasks()
runs = layout.get_runs()
print(subjects, tasks, runs)

['4', '6', '3', '5', '2'] ['pitch'] [1, 2]


## Functions
#### Import data 

In [82]:
# Create Iterator object to loop over all files
KeType = Tuple[]
def fpaths() -> Iterator[KeyType]:
    for sub in subjects:
        for task in tasks:
            for run in runs:
#                 bids_path = get_bids_path(bids_root, sub, task, run)
#                 save_path = get_save_path(deriv_root, sub, task, run)
#                 PREP_seed = int(str(sub) + str(run))
#                 if op.isfile(bids_path):
#                     yield (bids_path, save_path, PREP_seed)
                key = (sub, task, run)
                yield key

def get_bids_path(bids_root, sub, task, run):
    bids_path = BIDSPath(root = bids_root,
                        subject = sub,
                        task = task,
                        run = run,
                        datatype = 'eeg',
                        )
    return bids_path

def import_bids_data(bids_path):
    raw = read_raw_bids(bids_path, verbose = False)
    raw = raw.pick_types(eeg = True)
    return raw

def set_electrode_positions(raw, montage_name, stim_channel):
    dig = mne.channels.make_standard_montage(montage_name)
    raw = raw.set_channel_types({stim_channel: 'stim'}) 
    raw = raw.set_montage(dig)
    return raw

def read_events(raw):
    events, events_ids = mne.events_from_annotations(raw)
    return events, events_ids

NameError: name 'KeyType' is not defined

#### Resampling and PREP

In [66]:
def resample(fs, events): # Resample to a more manageable speed
    raw, events = raw.resample(fs, events = events)
    return raw, events

def run_PREP(raw, sub, run, LOWPASS): # Run PREP pipeline (notch, exclude bad channels, and re-reference)
    raw.load_data()
    seed = int(str(sub) + str(run))
    np.random.seed(seed)

    lf = raw.info['line_freq']
    prep_params = {
        'ref_chs': 'eeg',
        'reref_chs': 'eeg',
        'line_freqs': np.arange(lf, LOWPASS, lf) if np.arange(lf, LOWPASS, lf).size > 0 else [lf]
    }
    prep = PrepPipeline(raw, prep_params, raw.get_montage(), ransac = False, random_state = sub_idx)
    prep = prep.fit()

    raw = prep.raw_eeg # replace raw with cleaned version
    bads = prep.noisy_channels_original
    return raw, bads

#### Apply the following preprocessing steps to two copies of the data
Split the data into two copies, one filtered more liberally for ICA so that high frequency noise can be detected, one band-pass filtered at the behaviorally relevant frequencies. All of the following preprocessing steps will be applied to each of the copies.

In [67]:
def bandpass(raw, l_freq, h_freq):
    raw = raw.filter(l_freq = l_freq, h_freq = h_freq)
    return raw

def create_eogs(raw):
    raw = mne.set_bipolar_reference(raw, anode = 'Fp1', cathode = 'FT10', ch_name = 'eog1', drop_refs = False)
    raw = mne.set_bipolar_reference(raw, anode = 'Fp2', cathode = 'FT9', ch_name = 'eog2', drop_refs = False)
    raw = raw.set_channel_types({'eog1': 'eog', 'eog2': 'eog'})
    return raw

def epoch(raw):
    epochs = mne.Epochs(
        raw, 
        events, 
        tmin = -0.2, 
        tmax = 0.250, 
        baseline = None, # do NOT baseline correct the trials yet; we do that after ICA
        event_id = event_ids, # remember which epochs are associated with which condition
        preload = True # keep data in memory
    )
    return epochs

def compute_ICA(raw, epochs):
    ica = ICA(n_components = 15, random_state = 0)
    ice = ica.fit(epochs, picks = 'eeg')
    return ica

def apply_ICA(epochs_for_ica, epochs):
    eog_indices, eog_scores = ice.find_bads_eog(epochs_for_ica, threshold = 1.96)
    ica.exclude = eog_indices
    epochs = ica.apply(epochs) # apply to aggressively filtered version of data
    return epochs, ica

#### Baseline correct and reject trials
Back to applying preprocessing on only one copy of the data. ICA is finished.

In [68]:
def baseline_correct(epochs):
    epochs = epochs.pick_types(eeg = True) # change syntax?
    epochs = epochs.apply_baseline((-0.2, 0.))
    return epochs

def reject_trials(threshold, epochs):
    epochs = epochs.drop_bad(reject = {'eeg': threshold})
    return epochs

#### Save results and generate report

In [72]:
def get_save_path(deriv_root, sub, task, run):
    sink = DataSink(folder, 'preprocessing')

    # save cleaned data
    fpath = sink.get_path(
                    subject = sub,
                    task = task, 
                    run = run,
                    desc = 'clean',
                    suffix = 'epo', # this suffix is following MNE, not BIDS, naming conventions
                    extension = 'fif.gz',
                    )
    return fpath

def save_preprocessed_data(fpath, epochs):
    epochs.save(fpath, overwrite = True)
    
def generate_report(fpath, epochs, ica, bads):
    report = mne.Report(verbose = True)
    report.parse_folder(op.dirname(fpath), pattern = '*epo.fif.gz', render_bem = False)

    # Plot the ERP
    fig_erp = epochs['50'].average().plot(spatial_colors = True)
    report.add_figs_to_section(
        fig_erp, 
        captions = 'Average Evoked Response', 
        section = 'evoked'
    )

    # Plot the excluded ICAs
    if ica.exclude: # if we found any bad components
        fig_ica_removed = ica.plot_components(ica.exclude)
        report.add_figs_to_section(
            fig_ica_removed, 
            captions = 'Removed ICA Components', 
            section = 'ICA'
        )     
    
    # Format output
    html_lines = []
    for line in pformat(bads).splitlines():
        html_lines.append('<br/>%s' % line) 
    html = '\n'.join(html_lines)
    report.add_htmls_to_section(html, captions = 'Interpolated Channels', section = 'channels')
    report.add_htmls_to_section('<br/>threshold: {:0.2f} microvolts</br>'.format(thres['eeg'] * 1e6), 
                                captions = 'Trial Rejection Criteria', section = 'rejection')
    report.add_htmls_to_section(epochs.info._repr_html_(), captions = 'Info', section = 'info')
    report.save(op.join(sink.deriv_root, 'sub-%s.html'%sub), overwrite = True)

## Preprocessing wrapper
Since we have to loop over all the data files the section below will contain the for loop to wrap all the preprocessing functions contained in the subsequent sections.

In [76]:
# for sub_idx, sub in enumerate(subjects):
# for bids_path, save_path, PREP_seed
for (sub, task, run) in fpaths():
#     sub = subjects[sub_idx]
#     task = tasks[0]
#     run = str(run[sub_idx])
    
    # Import data
    bids_path = get_bids_path(sub, task, run)
    if ~op.isfile(bids_path) # skip if file doesn't exist
        continue
    raw = import_bids_data(bids_path)
    events, event_ids = read_events(raw)
#     raw = set_electrode_positions(raw, 'standard_1020', 'Aux1')
    dig = mne.channels.make_standard_montage('standard_1020')
    raw = raw.set_channel_types({'Aux1': 'stim'})
    raw = raw.set_montage(dig)
    
#     # Make copy of unprocessed raw data for later comparison
#     raw_unprocessed = raw.copy()
    
#     # Resampling and PREP
#     raw, events = resample(FS, events)
#     raw, bads = run_PREP(raw, sub, run, LOWPASS)
    
#     # Apply the following preprocessing steps to two copies of the data
#     raw_for_ica = bandpass(l_freq = 1., h_freq = 1000)
#     raw = bandpass(l_freq = 30, h_freq = 270)
    
#     raw_for_ica = create_eogs(raw_for_ica)
#     raw = create_eogs(raw)
    
#     epochs_for_ica = epoch(raw_for_ica)
#     epochs = epoch(raw)
    
#     ica = compute_ICA(epochs_for_ica) # run ICA on less aggressively filtered data
#     epochs, ica = apply_ICA(epochs_for_ica, epochs) # apply ICA on more aggressively filtered data
    
#     # Baseline correct and reject trials
#     epochs = baseline_correct(epochs)
#     epochs = reject_trials(REJECT_THRES, epochs)
    
#     # Save results and generate report
#     fpath = get_save_fpath(DERIV_ROOT)
#     save_preprocessed_data(fpath, epochs)
#     generate_report(fpath, epochs, ica, bads)

Used Annotations descriptions: ['100', '150', '200', '250', '50']


  raw = raw.set_channel_types({'Aux1': 'stim'})


IndexError: string index out of range