### Imports:

In [1]:
from os import listdir
from importlib import reload
import numpy as np
import pyxdf
import mne
from utils import *
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split, cross_val_score

print('Imports done...')

Imports done...


### Functions

In [44]:
# Helper functions:
def extract_eeg(stream, kick_last_ch=True):
    """
    Extracts the EEG data and the EEG timestamp data from the stream and stores it into two lists.
    :param stream: XDF stream containing the EEG data.
    :param kick_last_ch: Boolean to kick out the brainproducts marker channel
    :return: eeg: list containing the eeg data
             eeg_ts: list containing the eeg timestamps.cd
    """
    eeg = eeg_stream['time_series'].T
    eeg *= 1e-6 # Convert to volts.
    assert eeg.shape[0] == 65
    eeg_ts = eeg_stream['time_stamps']

    if kick_last_ch:
        # Kick the last row (unused Brainproduct markers):
        eeg = eeg[:64,:]

    return eeg, eeg_ts


def extract_eeg_infos(stream):
    # Extract all infos from the EEG stream:
    recording_device = stream['info']['name'][0]
    sampling_rate = float(stream['info']['nominal_srate'][0])
    effective_sample_frequency = float(stream['info']['effective_srate'])

    # Extract channel names:
    names = [stream['info']['desc'][0]['channels'][0]['channel'][i]['label'][0] for i in range(64)]
    # chn_names.append('Markers')
    labels = ['eeg' for i in range(64)]
    labels[16] = 'eog'
    labels[21] = 'eog'
    labels[40] = 'eog'
    # chn_labels.append('misc')

    return sampling_rate, names, labels, effective_sample_frequency


def extract_annotations(marker_stream, first_samp):
    """
    Function to extract the triggers of the marker stream in order to prepare for the annotations.
    :param marker_stream: xdf stream containing the markers and time_stamps
    :param first_samp: First EEG sample, serves for aligning the markers
    :return: triggers: Dict containing the extracted triggers.
    """
    triggers = {'onsets': [], 'duration': [], 'description': []}

    # Extract the markers:
    markers = marker_stream['time_series']

    # Extract the timestamp of the markers an correct them to zero
    markers_ts = marker_stream['time_stamps'] - first_samp

    # Read every trigger in the stream
    for idx, marker_data in enumerate(markers):
        # extract triggers information
        triggers['onsets'].append(markers_ts[idx])
        triggers['duration'].append(int(0))
        # print(marker_data[0])
        triggers['description'].append(marker_data[0])

    return triggers

### Constants

In [52]:
path = 'C:/Users/tumfart/Code/github/master-thesis/data/'
subjects = ['A01', 'A02'] #, 'A03', 'A04', 'A05', 'A06', 'A07' , 'A08', 'A09', 'A10']
# = 'A03'
paradigm = 'paradigm' # 'eye', 'paradigm'
plot = False
mne.set_log_level('WARNING')

### Read xdf-files for specified subject

In [53]:
# Create path list for each subject:
paths = [str(path + subject + '/' + paradigm) for subject in subjects]

In [55]:
# Iterate over each subject and extract the streams
for subject, path in zip(subjects, paths):
    print(f'Extracting subject {subject}...')
    file_names = [f for f in listdir(path)]

    for file_name in file_names:
        print(f'####', end='#')
        file = path + '/' + file_name

        # Read the raw stream:
        streams, header = pyxdf.load_xdf(file)

        # Split the streams:
        eeg_stream, marker_stream = split_streams(streams)

        # Get the eeg data:
        eeg, eeg_ts = extract_eeg(eeg_stream, kick_last_ch=True)
        #max_eeg_ts.append(eeg_ts.max())

        # Extract all infos from the EEG stream:
        fs, ch_names, ch_labels, eff_fs = extract_eeg_infos(eeg_stream)

        # Extract the triggers from the marker stream:
        triggers = extract_annotations(marker_stream, first_samp=eeg_ts[0])

        # Define MNE annotations
        annotations = mne.Annotations(triggers['onsets'], triggers['duration'], triggers['description'], orig_time=None)

        # Create mne info:
        # TODO: Check what info can be added to the stream:
        info = mne.create_info(ch_names, fs, ch_labels)

        # Create the raw array and add info, montage and annotations:
        raw = mne.io.RawArray(eeg, info, first_samp=eeg_ts[0])
        raw.set_montage('standard_1005')
        raw.set_annotations(annotations)

        if plot:
            raw.plot(duration=60, proj=False, n_channels=len(raw.ch_names),
                     remove_dc=False, title='Raw')

    print()


Extracting subject A01...
#############################################
Extracting subject A02...
#############################################


In [4]:
# List files in folder:
files = [f for f in listdir(path)]

eeg_streams = []
marker_streams = []
# Load all recorded EEG files for one subjectc
files = [files[0]]
for file in files:
    file_name = path + '/' + file
    print(f'####', end='#')

    # Read streams
    streams, header = pyxdf.load_xdf(file_name)

    # Split the streams:
    eeg_stream, marker_stream = split_streams(streams)

    eeg_streams.append(eeg_stream)
    marker_streams.append(marker_stream)


print()
print(f'Finished reading, found {len(eeg_streams)} EEG streams and {len(marker_streams)} marker streams...')

#####
Finished reading, found 1 EEG streams and 1 marker streams...


In [27]:
differences = [0]
max_eeg_ts = []
for i, (eeg_stream, m_stream) in enumerate(zip(eeg_streams, marker_streams)):
    # Get the eeg data:
    eeg, eeg_ts = extract_eeg(eeg_stream)
    max_eeg_ts.append(eeg_ts.max())

    # Kick the last row (unused Brainproduct markers):
    eeg = eeg[:64,:]

    # Extract all infos from the EEG stream:
    fs, ch_names, ch_labels, eff_fs = extract_eeg_infos(eeg_stream)

    # Extract the markers and timestamps:
    # markers = m_stream['time_series']
    # markers_ts = m_stream['time_stamps']
    #
    # # Convert list of list of strings to list of strings:
    # markers = [''.join(element) for element in markers]

    # # Make Nan array with len(eeg)
    # aligned_markers = np.empty(eeg_ts.shape, dtype='<U5')
    #
    # # Place markers string at the align array where first time markers_ts <= eeg_ts:
    # for k, marker in enumerate(markers):
    #     ts = markers_ts[k]
    #     idx = np.where(ts <= eeg_ts)[0][0]
    #     aligned_markers[idx] = marker

    if i == 0:
        global_eeg = eeg
        first_ts = eeg_ts[0]
        # global_markers = aligned_markers
    else:
        global_eeg = np.concatenate((global_eeg, eeg), axis=1)
        # global_markers = np.concatenate((global_markers, aligned_markers))
        differences.append(eeg_ts[0]-last_ts)

    last_ts = eeg_ts[-1]
    print(f'####', end='#')

cum_diff = np.cumsum(differences)
eeg = global_eeg
# markers = global_markers
print()
print('Extracted EEG data, EEG infos...')

#####
Extracted EEG data, EEG infos...


In [30]:
# annotation generation from:
# https://github.com/WriessneggerLab/EEG-preprocessing/blob/eeg/src/EEGAnalysis.py
# generation of the events according to the definition
triggers = {'onsets': [], 'duration': [], 'description': []}
global_markers_ts = []
for i, m_stream in enumerate(marker_streams):
    # Extract the markers and timestamps:
    markers = m_stream['time_series']
    markers_ts = m_stream['time_stamps'] - float(m_stream['info']['created_at'][0])# - cum_diff[i]


    global_markers_ts += list(markers_ts)
    # read every trigger in the stream
    for idx, marker_data in enumerate(markers):
        # extract triggers information
        triggers['onsets'].append(markers_ts[idx])
        triggers['duration'].append(int(0))
        # print(marker_data[0])
        triggers['description'].append(marker_data[0])

# define MNE annotations
annotations = mne.Annotations(triggers['onsets'], triggers['duration'], triggers['description'], orig_time=None) #, orig_time=np.array(global_markers_ts))

In [16]:
mrks_list = list(markers_ts)
a = []
a += mrks_list

### Put extracted data into mne structure

In [31]:
# TODO: align annotations

info = mne.create_info(ch_names, fs, ch_labels)

raw = mne.io.RawArray(eeg, info, first_samp=first_ts)
raw.set_montage('standard_1005')
raw.set_annotations(annotations)

if plot:
    raw.plot(duration=60, proj=False, n_channels=len(raw.ch_names),
             remove_dc=False, title='Raw')

Creating RawArray with float64 data, n_channels=64, n_times=86908
    Range : 612437 ... 699344 =   3062.185 ...  3496.720 secs
Ready.


  raw.set_annotations(annotations)


ModuleNotFoundError: No module named 'mnelab'

### Filter with HP at 0.4Hz and BS at 50 Hz

In [None]:
raw_highpass = raw.copy().filter(l_freq=0.4, h_freq=None, picks=['eeg'], method='iir')
if plot:
    raw_highpass.plot(duration=60, proj=False, n_channels=len(raw.ch_names),
                      remove_dc=False, title='Highpass filtered')
    plot_spectrum(raw_highpass)

raw_notch = raw_highpass.copy().notch_filter(freqs=[50], picks=['eeg'])
if plot:
    raw_notch.plot(duration=60, proj=False, n_channels=len(raw.ch_names), remove_dc=False, title='Notch filtered')
    plot_spectrum(raw_notch)

### Interpolate bad channels:

In [None]:
# TODO: check function --> need to mark them first
raw_interp = raw_notch.copy().interpolate_bads(reset_bads=False)

### Correct eye artifacts:

In [None]:
# TODO

### CAR:

In [None]:
raw_avg_ref = raw_interp.copy().set_eeg_reference(ref_channels='average')
if plot:
    raw_avg_ref.plot(duration=60, proj=False, n_channels=len(raw.ch_names), remove_dc=False, title='CAR Referenced')

### HEAR model:

In [None]:
# TODO?

### LP at 3.0Hz

In [None]:
raw_lp = raw_avg_ref.copy().filter(l_freq=None, h_freq=3.0, picks=['eeg'], method='iir')

### Extract epochs before resampling (otherwise markers may get lost) and reject bad trials:

In [None]:
events = mne.find_events(raw_lp, stim_channel='Markers')

epochs = mne.Epochs(raw_lp, events, event_id=classes_map, tmin=1, tmax=6, preload=True, baseline=None, reject=dict(eeg=100e-6)) #, baseline=(1,2))

print(epochs)

if plot:
    epochs.plot(n_epochs=2)

### Resample to 10 Hz:

In [None]:
epochs_resampled = epochs.copy().resample(10)
print('Preprocessing finished.')

### Implementing cue-aligned (better according to Reinmar paper)

### Distance decoding:

In [None]:
events = mne.find_events(raw_lp, stim_channel='Markers')
event_dict = {'short': 1, 'long': 2, 'short': 1, 'long': 2, 'short':1, 'long':2, 'short':1, 'long':2}

epochs_long_short = mne.Epochs(raw_lp, events, event_id=event_dict, tmin=1, tmax=6, preload=True, baseline=None, reject=dict(eeg=100e-6))



short = epochs_long_short['short'].average()

long = epochs_long_short['long'].average()

#evokeds = dict(short=short, long=long)
#mne.viz.plot_compare_evokeds(evokeds, picks='POz')

evokeds2 = dict(short=list(epochs_long_short['short'].iter_evoked()),
                long=list(epochs_long_short['long'].iter_evoked()))
mne.viz.plot_compare_evokeds(evokeds2, combine='mean', picks=['Cz', 'C1', 'C2', 'FCz', 'CPz'], show_sensors='upper right')
plt.savefig('distance_grand_averages.pdf')

#['Pz', 'POz', 'PO3', 'PO4', 'P2', 'P1', 'P2', 'Oz', 'O1', 'O2']

epochs_long_short = epochs_long_short.copy().resample(10)

In [None]:
X = []
y = []
for i,epoch in enumerate(epochs_long_short):
    #print(epoch.shape)
    # Deleting EOG channels:
    epoch = np.delete(epoch, 40, 0)
    epoch = np.delete(epoch, 21, 0)
    epoch = np.delete(epoch, 16, 0)
    X.append(epoch[:61,:])
    y.append(list(epochs_long_short[i].event_id.values())[0])

X = np.array(X)
y = np.array(y)

print(y)

for i,label in enumerate(y):
    if label % 2 == 0:
        y[i] = 0
    else:
        y[i] = 1

print(y)


# Split training and test set:

clf = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto')
acc = []
cv_scores = []
for idx in range(len(X[0,0])):
    x = X[:,:,idx]
    # Reshape X to 2d array:
    #nsamples, nx, ny = x.shape
    #x = x.reshape((nsamples,nx*ny))
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc.append(clf.score(X_test, y_test))

    scores = cross_val_score(clf, x, y, cv=100)
    cv_scores.append(scores.mean())

    if idx % 10 == 0:
        print(idx)

print('Done')

t = np.arange(len(acc))
t = t/10
#plt.plot(t, acc)

plt.plot(t, cv_scores)

window = 7

ma = np.convolve(cv_scores, np.ones(window), 'valid') / window

plt.plot(t[:-window+1], ma)
plt.plot([2,2], [min(cv_scores), max(cv_scores)])
plt.title('Single sample approach, 180-fold CV')
plt.savefig('distance_acc_single.pdf')

In [None]:
# 5 point LDA
X = []
y = []
for i,epoch in enumerate(epochs_long_short):
    #print(epoch.shape)
    # Deleting Marker channel:
    # Deleting EOG channels:
    epoch = np.delete(epoch, 40, 0)
    epoch = np.delete(epoch, 21, 0)
    epoch = np.delete(epoch, 16, 0)
    X.append(epoch[:61,:])
    y.append(list(epochs_long_short[i].event_id.values())[0])

for i,label in enumerate(y):
    if label % 2 == 0:
        y[i] = 0
    else:
        y[i] = 1


X = np.array(X)
y = np.array(y)


# Split training and test set:

clf = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto')
acc = []
cv_scores = []
for idx in range(len(X[0,0])-5):
    x = X[:,:,idx:idx+5]
    if idx % 10 == 0:
        print(idx)
        print(x.shape)
    # Reshape X to 2d array:
    nsamples, nx, ny = x.shape
    x = x.reshape((nsamples,nx*ny))
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc.append(clf.score(X_test, y_test))

    scores = cross_val_score(clf, x, y, cv=100)
    cv_scores.append(scores.mean())



print('Done')
#print(acc)

t = np.arange(len(acc))
t = t/10 + 5/10
#plt.plot(t, acc)

plt.plot(t, cv_scores)

window = 7

ma = np.convolve(cv_scores, np.ones(window), 'valid') / window

plt.plot(t[window-1:], ma)
plt.plot([2,2], [min(cv_scores), max(cv_scores)])
plt.xlabel('Time (s)')
plt.ylabel('Accuracy (a.u.)')
plt.title('Windowed approach accuracies, distance 180-fold CV')
plt.savefig('distance_acc_5point.pdf')

In [None]:
%reset