This notebook loads the available data for evey participant and presents it to you sequentially so you can
decide which epochs to keep and which to discard. To do this you can run it with `INSPECT_MODE = True`.
If you set this variable to `False`, it will look for a file containing what epochs to discard.
After the Setup part, functions are defined that are called in the main loop.
This makes it a bit easier to debug.

### To Do:
- For each participant create a "analysis_log" file with excluded trials and for what reason.
- Create a "general_analysis_log" with less detail, but condensed over all participants
- For VP 467 the "clean" versions of the behavioral dfs and the "full" version of the .bdf file should be loaded!
It's the one that comes in two parts 

# Setup

In [1]:
# Settings
INSPECT_MODE = False
EEG_DATA_PATH = '..\\Data_raw\\EEG'
BEHAV_DATA_PATH = '..\\Data_raw\\Behav'


MONTAGE_PATH = 'C:\\Users\\Kevin\\Anaconda3\\Lib\\site-packages\\mne\\channels\\data\\montages'
#MONTAGE_PATH = 'C:\\Users\\Ketru\\AppData\\Local\\Programs\\Python\\Python36\\Lib\\'\
#               'site-packages\\mne\\channels\\data\\montages'

# Imports
import os
import mne
from mne.preprocessing import ICA
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

plt.switch_backend('Qt5Agg')
sns.set_context("poster")
sns.set_style("white")
sns.set_palette(sns.husl_palette(4, h=.5, l=.65, s=.9))

In [2]:
# Decide who to include:
# all_subj = [402, 403, 405, 407, 408, 409, 410, 411, 413, 414, 415, 416, 418, 422, 423,
#            425, 427, 428, 429, 430, 433, 434, 436, 439, 440, 442, 443, 444,
#            445, 446, 447, 448, 449, 450, 454, 455, 457, 460, 461, 463, 464, 467, 468, 470, 471]
all_subj = [402, 403, 405, 407, 464, 468, 470, 471]

# Functions

In [None]:
raw = mne.io.read_raw_bdf('{}{}VP_{}.bdf'.format(EEG_DATA_PATH, os.sep, subj), preload=True)


In [None]:
def load_eeg_data(subj, data_path, montage_path):
    raw = mne.io.read_raw_bdf('{}{}VP_{}.bdf'.format(data_path, os.sep, subj), preload=True)
    
    # Fix channel names:
    for name in raw.ch_names[:-1]:
        raw.rename_channels({name: name[2:]})
    raw.drop_channels(raw.ch_names[64 + 9:len(raw.ch_names) - 1])
    
    # Fix data type:
    montage = mne.channels.read_montage(kind='biosemi64', path=montage_path)
    raw.set_montage(montage)
    
    raw.set_channel_types(mapping={'FT10': 'eog', 'PO10': 'eog', 'HeRe': 'eog',
                                   'HeLi': 'emg', 'VeUp': 'emg', 'VeDo': 'emg',
                                   'EMG1a': 'emg', 'Status': 'resp'})

    # QUESTION: Laura used the mean of PO9 and FT9 against HeRe in one script.
    # Would that make more sense?
    mne.set_bipolar_reference(raw,
                              anode=['PO10', 'HeRe', 'HeLi', 'VeUp'],
                              cathode=['FT10', 'FT10', 'VeDo', 'EMG1a'],
                              ch_name=None,
                              copy=False)
    
    raw.set_eeg_reference(ref_channels=['PO9', 'FT9'])
    raw.drop_channels(['PO9', 'FT9'])
    
    return raw

In [47]:
def find_triggers(raw):
    # Find Triggers
    events = mne.find_events(raw, stim_channel='Status', uint_cast=True, consecutive=True, min_duration=.01)
    event_id = {'Masked': 2, 'Reveal': 4, 'Left_choice': 8, 'Right_choice': 16, 'No_choice': 32}

    responses = events[np.where(np.isin(events[:, 2], [8, 16])), :][0]
    reveals = events[np.where(events[:, 2] == 4), :][0]
    reveals = reveals[5:, :]  # Cut away the training
    
    return events, reveals, responses

In [48]:
def load_behav_data(subj, data_path):
    options_df = pd.read_csv('{}{}Options_VP_{}.csv'.format(data_path, os.sep, subj))
    chosen_df = pd.read_csv('{}{}ForcedData_VP_{}.csv'.format(data_path, os.sep, subj))
    
    # Create a dataframe from the options and choices
    df = pd.concat([options_df, chosen_df], axis=1)
    
    # Create a collumn indicating whether the person chose the egoistic option / left option
    df = df.assign(chose_ego=(1 - df.ego_left) == (df.Chosen - 1))
    df = df.assign(chose_left=[i == 1 for i in df.Chosen])
    
    return df

In [49]:
# Fixing the participant data where something went wrong:
def manual_fixes(raw, df, subj, events, reveals, responses):
    if subj in [405, 443, 463]:  # Got cut off at the start:
        reveals = events[np.where(events[:, 2] == 4), :][0]  # The training shouldn't be cut off
        reveals = reveals[1:, :]  # Cut the first choice, just for safety
        df = df.iloc[(df.shape[0] - reveals.shape[0]):]
        responses = responses[(responses.shape[0] - reveals[df.Chosen != 0, :].shape[0]):, :]
    if subj in [427, 434]:  # ran out of battery
        reveals = reveals[:-1, :]  # Cut the last choice, just for safety
        df = df.iloc[:(reveals.shape[0] - df.shape[0])]
        if (reveals[df.Chosen != 0, :].shape[0] - responses.shape[0]) != 0:
            responses = responses[:(reveals[df.Chosen != 0, :].shape[0] - responses.shape[0]), :]
    if subj == 431:  # Started a bit late
        reveals = events[np.where(events[:, 2] == 4), :][0]
        reveals = reveals[2:, :]
    if subj == 418:
        reveals = events[np.where(events[:, 2] == 4), :][0]
        reveals = reveals[4:, :]  # Only four of the trainings were recorded
    if subj == 426:  # FC6 was used at the T8 position!
        temp, _ = raw[mne.pick_channels(raw.ch_names, include=['FC6'])]
        raw[mne.pick_channels(raw.ch_names, include=['T8'])] = temp
        raw.info['bads'] += ['FC6']
    if subj == 456:  # VPN 456 had some electrodes switched! Here we switch them back.
        temp, _ = raw[mne.pick_channels(raw.ch_names, include=['EMG1a'])]
        raw[mne.pick_channels(raw.ch_names, include=['EMG1a'])] = \
            raw[mne.pick_channels(raw.ch_names, include=['VeUp'])]
        raw[mne.pick_channels(raw.ch_names, include=['VeUp'])] = \
            raw[mne.pick_channels(raw.ch_names, include=['HeRe'])]
        raw[mne.pick_channels(raw.ch_names, include=['HeRe'])] = temp
        
    return raw, df, reveals, responses

In [50]:
# Get rid of too fast responses and non-choices
def clean_response_data(df, reveals, responses):
    reveals = reveals[df.Chosen != 0, :]  # Get rid of non-choices
    df = df.loc[df.Chosen != 0]  # Also cut non-responses from the df
    # Non-responses don't have a `response` trigger, so I don't need to cut it from there
    
    # Make the "Chosen" variable clearer:
    df = df.assign(chose_left=[i == 1 for i in df.Chosen])
    
    # Cut away responses faster than 200 ms!
    print('{} epochs with RT < 300ms!'.format(sum(df.RT < .2)))
    reveals = reveals[df.RT > .2, :]
    responses = responses[df.RT > .2, :]
    df = df.loc[df.RT > .2]
    
    return df, reveals, responses


In [51]:
def create_epochs(raw, responses, reveals, df):
    responses[:, 2] = df.chose_ego
    # QUESTION: Baseline ok?
    # -> Shouldn't play a role for tf-stuff, since it's the voltage baseline. But try both and compare!
    epochs_reslocked = mne.Epochs(raw, responses, {'chose_ego': 1, 'chose_altro': 0}, tmin=-1.5,
                                  tmax=.2, baseline=(-1.5, -1.2), preload=True,  # response locked
                                  picks=mne.pick_types(raw.info, eeg=True, eog=True, emg=False, stim=False,
                                                       exclude='bads'))

    epochs_stimlocked = mne.Epochs(raw, reveals, {'Reveal': 4}, tmin=0, tmax=3,  # Stimulus locked
                                   baseline=(0, .2), preload=True,
                                   picks=mne.pick_types(raw.info, eeg=True, eog=True, emg=False, stim=False,
                                                        exclude='bads'))
    
    
    epochs_stimlocked.resample(512, npad='auto')
    epochs_reslocked.resample(512, npad='auto')
    
    return epochs_stimlocked, epochs_reslocked

The following function `deal_with_bad_channels()` depends on the `INSPECT_MODE`. It either shows the plots to select the
bad channels by hand or it loads those from a file and interpolates them by their neighbouring channels.

In [52]:
def deal_with_bad_channels(inspect_mode, epochs_stimlocked, epochs_reslocked):
    if inspect_mode:
        epochs_stimlocked.plot(block=True, scalings={'eeg': 5e-5})
        np.savetxt('Intermediate\\Bads\\bad_channels_VP_' + str(subj) + '.csv', epochs_stimlocked.info['bads'], fmt='%s')
        epochs_reslocked.info['bads'] = epochs_stimlocked.info['bads']
    else:
        these_bads = np.loadtxt('Intermediate\\Bads\\bad_channels_VP_' + str(subj) + '.csv', dtype='str').tolist()
        if isinstance(these_bads, str):
            these_bads = [these_bads]
        epochs_stimlocked.info['bads'] = these_bads
        epochs_reslocked.info['bads'] = these_bads


    # Interpolate bad Channels
    if len(epochs_stimlocked.info['bads']) > 0:
        epochs_stimlocked.interpolate_bads(reset_bads=True)
        epochs_reslocked.interpolate_bads(reset_bads=True)
        
    return epochs_stimlocked, epochs_reslocked

In [53]:
def remove_eye_artifacs(raw, epochs_stimlocked, epochs_reslocked):
    n_components = 20  # if float, select n_components by explained variance of PCA
    method = 'fastica'  # for comparison with EEGLAB try "extended-infomax" here
    decim = 2  # we need sufficient statistics, not all time points -> saves time

    # TODO: Look at eye artefact removal again.
    eog_epochs = mne.preprocessing.create_eog_epochs(raw, tmin=-.5, tmax=.5)

    ica = ICA(n_components=n_components, method=method, random_state=20)
    print(ica)

    ica.fit(raw.filter(1, None), picks=None, decim=decim)
    print(ica)

    eog_inds, scores = ica.find_bads_eog(eog_epochs)

    ica.apply(epochs_stimlocked, exclude=eog_inds)
    ica.apply(epochs_reslocked, exclude=eog_inds)
    
    return epochs_stimlocked, epochs_reslocked

In [54]:
def deal_with_bad_epochs(inspect_mode, epochs_stimlocked, epochs_reslocked, df_responses):
    if inspect_mode:
        print('Human inspection of stimlocked epochs')
        epochs_stimlocked.plot(block=True, scalings={'eeg': 5e-5})
        np.savetxt('Intermediate\\Bads\\keep_epochs_stim_VP_' + str(subj) + '.csv', epochs_stimlocked.selection, fmt='%i')

        print('Human inspection of reslocked epochs')
        epochs_reslocked.plot(block=True, scalings={'eeg': 5e-5})
        np.savetxt('Intermediate\\Bads\\keep_epochs_res_VP_' + str(subj) + '.csv', epochs_reslocked.selection, fmt='%i')
    else:
        epochs_to_be_kept = np.loadtxt('Intermediate\\Bads\\keep_epochs_stim_VP_' + str(subj) + '.csv', dtype='int')
        epochs_to_be_dropped = [i for i in range(len(epochs_stimlocked.events)) if i not in epochs_to_be_kept]
        epochs_stimlocked.drop(epochs_to_be_dropped)
        
        epochs_to_be_kept = np.loadtxt('Intermediate\\Bads\\keep_epochs_res_VP_' + str(subj) + '.csv', dtype='int')
        epochs_to_be_dropped = [i for i in range(len(epochs_reslocked.events)) if i not in epochs_to_be_kept]
        epochs_reslocked.drop(epochs_to_be_dropped)

    if len(epochs_stimlocked.selection) < 360*.75 or len(epochs_reslocked.selection) < 360*.75:
        print('More than .25 of the Epochs were rejected! Please exclude Participant.')
        input('Press Enter to continue...')

    # Save clean Data
    epochs_stimlocked.save('Data_clean\\EEG\\subject_{}_stimlocked-epo.fif'.format(subj), overwrite=True)
    epochs_reslocked.save('Data_clean\\EEG\\subject_{}_reslocked-epo.fif'.format(subj), overwrite=True)
    
    # Save clean behavioral data
    df_reslocked = df_responses.iloc[epochs_reslocked.selection]
    df_reslocked.to_csv(os.path.join('Data_clean', 'Behavior',
                                     'clean_df_reslocked_VP_{}.csv'.format(subj)))
    df_stimlocked = df_responses.iloc[epochs_stimlocked.selection]
    df_stimlocked.to_csv(os.path.join('Data_clean', 'Behavior',
                                     'clean_df_stimlocked_VP_{}.csv'.format(subj)))

# Main Loop

This is the main loop of the file. To use it, first make sure everything in the `setup` part is set correctly.
Then run all cells, so the functions are created before the loop is started.
The function names should technically speak for themselves.

In [55]:
# Main Loop
for subj in all_subj:
    print('Starting subj {}'.format(subj))
    raw = load_eeg_data(subj, EEG_DATA_PATH, MONTAGE_PATH)
    event_trigs, reveal_trigs, response_trigs = find_triggers(raw)
    df_responses = load_behav_data(subj, BEHAV_DATA_PATH)
    raw, df_responses, reveal_trigs, response_trigs = manual_fixes(raw, df_responses, subj,
                                                                   event_trigs, reveal_trigs, response_trigs)
    df_responses, reveal_trigs, response_trigs = clean_response_data(df_responses, reveal_trigs, response_trigs)

    # Filters
    picks = mne.pick_types(raw.info, eeg=True, eog=True, emg=True, stim=False)
    raw.notch_filter([50, 100, 150, 200], picks=picks, filter_length='auto', phase='zero', fir_design='firwin')
    # raw.filter(1, 160, picks=picks, fir_design='firwin')

    epochs_stim, epochs_res = create_epochs(raw, response_trigs, reveal_trigs, df_responses)
    epochs_stim, epochs_res = deal_with_bad_channels(INSPECT_MODE, epochs_stim, epochs_res)
    epochs_stim, epochs_res = remove_eye_artifacs(raw, epochs_stim, epochs_res)
    
    deal_with_bad_epochs(INSPECT_MODE, epochs_stim, epochs_res, df_responses)
    
    print('Done with subj {}\n'
          '***************************'.format(subj))




Starting subj 402
Extracting EDF parameters from C:\Users\Kevin\Desktop\Soz_Entsch\Data_raw\EEG\VP_402.bdf...
BDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 3196927  =      0.000 ...  3121.999 secs...
Bipolar channel added as "PO10-FT10".
Bipolar channel added as "HeRe-FT10".
Bipolar channel added as "HeLi-VeDo".
Bipolar channel added as "VeUp-EMG1a".
Applying a custom EEG reference.
1090 events found
Event IDs: [ 2  4  8 16]
0 epochs with RT < 300ms!
Setting up band-stop filter

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower transition bandwidth: 0.50 Hz
- Upper transition bandwidth: 0.50 Hz
- Filter length: 6759 samples (6.601 sec)

360 matching events found
Applying baseline correction (mode: mean)
Not setting metadata
0 projection items activated


  raw.set_montage(montage)
  raw.set_montage(montage)
  
  raw.set_montage(montage)
  raw.set_montage(montage)
  raw.set_montage(montage)
  
  raw.set_montage(montage)
  raw.set_montage(montage)
  raw.set_montage(montage)
  
