# Helper Functions

In [None]:
def load_data(path, subject):
    """
    load the raw data from the given path and subject
    """
    bids_root = path
    subject_id = subject


    bids_path = BIDSPath(subject=subject_id,task="N170",session="N170",
                     datatype='eeg', suffix='eeg',
                     root=bids_root)

    # read the file
    raw = read_raw_bids(bids_path)
    # fix the annotations reading
    ccs_eeg_utils.read_annotations_core(bids_path,raw)

    raw.load_data()
    return raw

In [None]:
def add_channel_locations(raw):
    # set eog channels
    raw.set_channel_types({'HEOG_left':'eog'})
    raw.set_channel_types({'HEOG_right':'eog'})
    raw.set_channel_types({'VEOG_lower':'eog'})
    raw.set_montage('standard_1020',match_case=False)

In [None]:
def filter_data(raw, low_bound, high_bound):
    # using mne filters for the eeg signal
    filtered_raw = raw.copy().filter(low_bound,high_bound, fir_design='firwin')
    return filtered_raw

In [None]:
def bad_segments(raw, subject_id):
    """
    First find the bad segments and save it. 
    In case the bad segments already exist, then load the segments.
    """
    #find_bad_segments(raw, subject_id)
    load_bad_segments(raw, subject_id)
    
    # only for subject 001 a channel was marked as bad by me
    if subject_id == '001':
        raw.info['bads'] = ['F8']

In [None]:
def find_bad_segments(raw, subject_id):
    """
    Find bad segments by hand.
    """
    # press a and mark bad channels by hand (holding right click)
    raw.plot(n_channels=len(raw.ch_names))
    # save bad channels
    bad_ix = [i for i,a in enumerate(raw.annotations) if a['description']=="BAD_"]
    # if new segments should be saved use the next line
    #raw.annotations[bad_ix].save("sub-{}_task-N170_badannotations.csv".format(subject_id))

In [None]:
def load_bad_segments(raw, subject_id):
    """
    Load the bad segments that were found in the step before.
    """
    annotations = mne.read_annotations("sub-{}_task-N170_badannotations.csv".format(subject_id))
    raw.annotations.append(annotations.onset,annotations.duration,annotations.description)

In [None]:
def ica_filtering(raw, subject_id):
    """
    Decomposition of the given data in Independant Components.
    Based on the decomposition some components are marked as bad by
    1. direct interpretation of the EOG channels
    2. by hand from myselfe
    Then the components marked as bad are excluded and the input data is reconstructed based on the remaining components
    """
    # setup data with filter frequence 1 Hz
    ica_raw = raw.copy()
    ica_raw.load_data().filter(l_freq=1, h_freq=None)
    ica = mne.preprocessing.ICA(method="fastica", random_state=111)
    ica.fit(raw,verbose=True)
    
    icaact = ica.get_sources(ica_raw)
    #plt.plot(icaact[5,0:20000][0].T)
    #plt.plot(ica_raw[0,0:20000][0].T)
    
    reload(mne)
    
    # this is used to find bad components from ICA by hand
    ica.plot_properties(inst=ica_raw, dB=False, topomap_args={'extrapolate': 'head', 'contours': 0},
                            psd_args={'fmin': 0, 'fmax': 50}, picks=['eeg'])
    
    
    # find bad components via eog
    eog_ind= []
    #eog_ind, _ = ica.find_bads_eog(raw)
    print('EOG Bad Components: ' + str(eog_ind))
    
    reconst_raw = raw.copy()
    # eog bads plus some additional found via analysing the spectrum of the components (likely muscle articats)
    expert_analysis_list = []
    if subject_id == '001':
        expert_analysis_list = [5,6,14,21]
    elif subject_id == '017':
        expert_analysis_list = [9]
    elif subject_id == '031':
        expert_analysis_list = []
    
    # combine the two lists and avoide duplicates)
    exclude_list = eog_ind
    for element in expert_analysis_list:
        if element not in exclude_list:
            exclude_list.append(element)
    ica.apply(reconst_raw,exclude=exclude_list)

    raw.plot()
    reconst_raw.plot()  
    ica.plot_overlay(raw,exclude=exclude_list)
    return reconst_raw

# -------------------------------------------------------------------------------------

# Imports and important variables

In [None]:
import osfclient
import mne
import mne_bids
import numpy as np
import ccs_eeg_utils
from ccs_eeg_semesterproject import (load_precomputed_badData, load_precomputed_ica)
from mne_bids import (BIDSPath,read_raw_bids)
from matplotlib import pyplot as plt
from importlib import reload  

%matplotlib qt
path = "../local/bidsN170"
temp_path = "/ses-N170/eeg/"
sub = '001'

# 1 Preprocessing For Subjects 001, 017, 031

In [None]:
# load the data
raw = load_data(path, sub)

In [None]:
# for a first view of the raw un-preprocessed data
raw.plot(n_channels=len(raw.ch_names))

In [None]:
# adding the channel positions (relevant for some calculations later)
add_channel_locations(raw)

In [None]:
# a first look at the power spectral density of the raw data
raw.plot_psd(fmax=75., average=True, spatial_colors=False)

In [None]:
# filtering the data with a bandpass filter
raw_filtered = filter_data(raw, 0.5, 50)

In [None]:
# as sanity check a second look at the power spectral density after filtering
raw_filtered.plot_psd(fmax=75., average=True, spatial_colors=False)

In [None]:
raw = raw_filtered

In [None]:
# either find or load bad segments in the data
bad_segments(raw, sub)

In [None]:
# apply ICA to the data and detect bad components
raw_ica = ica_filtering(raw, sub)

In [None]:
# interpolate bad channels (in my case only for subject 001 relevant)
raw_ica.interpolate_bads()

In [None]:
raw = raw_ica

In [None]:
# rereference the data by the average
raw_rereference, _  = mne.set_eeg_reference(raw, 'average', projection=True)
raw_rereference.apply_proj()

In [None]:
# save the data in the same path as the raw data
save_path = path + "/sub-" + sub + temp_path + "sub-" + sub +"_cleaned.fif"
raw_rereference.save(save_path, overwrite=True)

# -------------------------------------------------------------------------------------

# 2 Preprocessing For The Remaining Subjects

In [None]:
for s in range(1,41):
    # if we preprocessed the subject already, go on with next subject
    if s in [1,17,31]:
        continue
    
    # build the name for reading the data
    if len(str(s)) > 1:
        sub = '0' + str(s)
    else:
        sub = '00' + str(s)
    raw = load_data(path, sub)
    add_channel_locations(raw)
    # filter the data with a bandpass filter
    raw = filter_data(raw)
    
    # load preprocessing information
    annotations, bad_ch = load_precomputed_badData(bids_root=path, subject_id=sub, task="N170")
    ica, bad_components = load_precomputed_ica(bids_root=path, subject_id=sub, task="N170")
    ica.apply(raw, exclude=bad_components)
    
    # rereference the data by the average
    raw_rereference, _  = mne.set_eeg_reference(raw, 'average', projection=True)
    raw_rereference.apply_proj()
    
    save_path = path + "/sub-" + sub + temp_path + "sub-" + sub +"_cleaned.fif"
    #raw_rereference.save(save_path, overwrite=True)
print('DONE')

# -------------------------------------------------------------------------------------

# 3 save preprocessing without setting channel positions to avoide a strange bug in decoding

In [None]:
for s in range(1,41):
    # if we preprocessed the subject already, go on with next subject
    if s in [1,17,31]:
        continue
        
    # build the name for reading the data
    if len(str(s)) > 1:
        sub = '0' + str(s)
    else:
        sub = '00' + str(s)
    raw = load_data(path, sub)
    raw = filter_data(raw, 0.5, 50)
    
    # load preprocessing information
    annotations, bad_ch = load_precomputed_badData(bids_root=path, subject_id=sub, task="N170")
    ica, bad_components = load_precomputed_ica(bids_root=path, subject_id=sub, task="N170")
    ica.apply(raw, exclude=bad_components)
    
    # rereference the data by the average
    raw_rereference, _  = mne.set_eeg_reference(raw, 'average', projection=True)
    raw_rereference.apply_proj()
    
    save_path = path + "/sub-" + sub + temp_path + "sub-" + sub +"_cleaned_no_channel_positions.fif"
    #raw_rereference.save(save_path, overwrite=True)
print('DONE')