In [2]:
import os

import mne
import glob
import pandas as pd
from pathlib import Path
import numpy as np

In [3]:
def pairwise(iterable):
    "s -> (s0, s1), (s2, s3), (s4, s5), ..."
    a = iter(iterable)
    return zip(a, a)

def set_labels(cropped,raw,labels,clip_duration=50):
    anon = {i:[] for i in ['onsets','duration','description']}
    l = []
    for annotation in pairwise(raw.annotations):
        video_name =annotation[0]['description'].split('_')
        if video_name[0]=='video':
            if cropped:
                onset = annotation[1]['onset']-clip_duration
                duration = clip_duration
            else:
                onset = annotation[0]['onset']
                duration = annotation[1]['onset']-annotation[0]['onset']
            video_id = int(video_name[2])
            idx = labels.index[labels['video_id'] == video_id]
            anon['onsets'].append(onset)
            anon['duration'].append(duration)
            l.append([labels['valence'].values[idx][0], labels['arousal'].values[idx][0], labels['dominance'].values[idx][0]])
            # anon['description'].append([labels['valence'].values[idx][0], labels['arousal'].values[idx][0], labels['dominance'].values[idx][0]])
    annotations = mne.Annotations(
        onset=anon['onsets'],  # in seconds, the moment each epoch starts
        duration=anon['duration'],  # in seconds, too, duration of each epoch (if data clipped 50s, should be equal for all)
        description=["a n dim description would be nice" for _ in range(len(l))],
    )
    raw.set_annotations(annotations)
    return raw, l


def get_segments(raw,labels,cropped):
    raw,labels = set_labels(cropped,raw,labels)
    segments = []
    events_from_annot, event_dict = mne.events_from_annotations(raw)
    start_idx = events_from_annot[:,0]
    end_idx = events_from_annot[:,0] + (raw.annotations.duration*raw.info['sfreq']).astype(int)
    # filtering
    filtered_raw = raw.filter(l_freq=1, h_freq=45,verbose=False).notch_filter([50,100],verbose=False)
    for indices in zip(start_idx,end_idx):
        segments.append(filtered_raw.get_data()[:,indices[0]:indices[1]])
    return segments, labels

def get_segments_labels(folder,subject,cropped=True):
    raw = mne.io.Raw(folder+'Data/'+subject+'_eeg.fif', preload=True)
    labels = pd.read_csv(folder+'ratings/'+subject+'.csv')
    segments, labels = get_segments(raw,labels,cropped)
    return segments, labels

In [4]:
import pickle

cropped = True
folder = "./"
files = sorted(glob.glob(folder+"Data/*"))
preprocessed_path = os.path.join(folder, 'preprocessed_data')
Path(preprocessed_path).mkdir(parents=True, exist_ok=True)
for filename in files:
    subject = Path(filename).stem[:-4]
    print('Subject:',subject)
    segments, labels = get_segments_labels(folder,subject,cropped)
    if subject == 'kaleem':
        segments = segments[:-2]
        labels = labels[:-2]
    stacked_segments = np.stack(segments, axis=0)
    labels = np.array(labels)
    # scale data
    scaler = mne.decoding.Scaler(scalings='mean')
    scaled_data = scaler.fit_transform(stacked_segments)
    print(scaled_data.shape)
    data = {'data': scaled_data, 'labels': labels}
    with open(os.path.join(folder, 'preprocessed_data', f'{subject}.pkl'), 'wb') as file:
        pickle.dump(data, file)


Subject: kaleem
Opening raw data file ./Data/kaleem_eeg.fif...
    Range : 0 ... 912219 =      0.000 ...  3648.876 secs
Ready.
Reading 0 ... 912219  =      0.000 ...  3648.876 secs...
Used Annotations descriptions: ['a n dim description would be nice']
(16, 8, 12500)
Subject: subj10
Opening raw data file ./Data/subj10_eeg.fif...
    Range : 0 ... 849419 =      0.000 ...  3397.676 secs
Ready.
Reading 0 ... 849419  =      0.000 ...  3397.676 secs...
Used Annotations descriptions: ['a n dim description would be nice']
(16, 8, 12500)
Subject: subj13
Opening raw data file ./Data/subj13_eeg.fif...
    Range : 0 ... 904669 =      0.000 ...  3618.676 secs
Ready.
Reading 0 ... 904669  =      0.000 ...  3618.676 secs...
Used Annotations descriptions: ['a n dim description would be nice']
(16, 8, 12500)
Subject: subj14
Opening raw data file ./Data/subj14_eeg.fif...
    Range : 0 ... 825369 =      0.000 ...  3322.205 secs
Ready.
Reading 0 ... 825369  =      0.000 ...  3322.205 secs...
Used Annotat