In [1]:
from pathlib import Path

In [2]:
DATASET_BASE_DIR = Path('/home/username/Data/eeg-datasets/')
DATASET_SEED_DIR = DATASET_BASE_DIR / 'DREAMER'

In [3]:
!ls $DATASET_SEED_DIR

DREAMER.mat  DREAMER.pdf  Processed.mat


In [6]:
import scipy.io as scio

data = scio.loadmat(DATASET_SEED_DIR / 'DREAMER.mat', verify_compressed_data_integrity=False)

The DREAMER database contains the participant ratings and physiological recordings of an experiment where 23 volunteers watched 18 film clips selected and evaluated by Gabert-Quillen et al. [1]. EEG and ECG signals were recorded and each participant rated their emotion by reporting the felt arousal, valence and dominance on five point scales. For more details, please refer to [2].

The DREAMER database consists of the “DREAMER.mat” file in Matlab format. Loading this file loads a variable named “DREAMER” in the workspace. The “DREAMER” variable is structured as follows:

DREAMER = struct with fields:
Data: {1×23 cell}

EEG_SamplingRate: 128

ECG_SamplingRate: 256

EEG_Electrodes: {'AF3' 'F7' 'F3' 'FC5' 'T7' 'P7' 'O1' 'O2' 'P8' 'T8' 'FC6' 'F4' 'F8' 'AF4'} noOfSubjects: 23
noOfVideoSequences: 18

Disclaimer: 'While every care has been taken...' Provider: 'University of the West of Scotland'

Version: '1.0.2'

Acknowledgement: 'The authors would like to thank...'

In [30]:
data['DREAMER'][0,0]
data['DREAMER'][0,0]['Data']
data['DREAMER'][0,0]['EEG_SamplingRate']
data['DREAMER'][0,0]['ECG_SamplingRate']
data['DREAMER'][0,0]['EEG_Electrodes']

array([[array(['AF3'], dtype='<U3'), array(['F7'], dtype='<U2'),
        array(['F3'], dtype='<U2'), array(['FC5'], dtype='<U3'),
        array(['T7'], dtype='<U2'), array(['P7'], dtype='<U2'),
        array(['O1'], dtype='<U2'), array(['O2'], dtype='<U2'),
        array(['P8'], dtype='<U2'), array(['T8'], dtype='<U2'),
        array(['FC6'], dtype='<U3'), array(['F4'], dtype='<U2'),
        array(['F8'], dtype='<U2'), array(['AF4'], dtype='<U3')]],
      dtype=object)

The cell DREAMER.Data{i} contains the data for the ith participant and is structured as follows:
struct with fields:

Age: 'X'

Gender: 'X' ('male' or 'female') EEG: [1×1 struct]

ECG: [1×1 struct]

ScoreValence: [18×1 double] ScoreArousal: [18×1 double] ScoreDominance: [18×1 double]

ScoreValence, ScoreArousal and ScoreDominance are vectors that their ith element corresponds to the participant rating for the ith film clip in terms of Valence, Arousal, and Dominance respectively.

In [34]:
data['DREAMER'][0,0]['Data'][0,0]['EEG']
data['DREAMER'][0,0]['Data'][0,0]['ECG']
data['DREAMER'][0,0]['Data'][0,0]['Age']
data['DREAMER'][0,0]['Data'][0,0]['Gender']
data['DREAMER'][0,0]['Data'][0,0]['ScoreValence']
data['DREAMER'][0,0]['Data'][0,0]['ScoreArousal']
data['DREAMER'][0,0]['Data'][0,0]['ScoreDominance']

array([[array([[4],
       [3],
       [5],
       [4],
       [4],
       [1],
       [5],
       [1],
       [1],
       [5],
       [4],
       [4],
       [4],
       [3],
       [2],
       [3],
       [1],
       [3]], dtype=uint8)]], dtype=object)

The EEG and ECG recordings are stored in the DREAMER.Data{i}.EEG and DREAMER.Data{i}.ECG variables respectively which are structured as follows:
struct with fields:

baseline: {18×1 cell} stimuli: {18×1 cell}

In [45]:
data['DREAMER'][0,0]['Data'][0,0]['EEG'][0,0]['baseline'][0,0].shape

(18, 1)

In [47]:
data['DREAMER'][0,0]['Data'][0,0]['EEG'][0,0]['baseline'][0,0][0][0].shape

(7808, 14)

In [48]:
data['DREAMER'][0,0]['Data'][0,0]['EEG'][0,0]['stimuli'][0,0].shape

(18, 1)

In [49]:
data['DREAMER'][0,0]['Data'][0,0]['EEG'][0,0]['stimuli'][0,0][0][0].shape

(25472, 14)

The recordings referring to the stimuli film clips are stored in the “stimuli” variable, while the recordings for the neutral clip shown before each film clip are stored in the “baseline” variable. The cells ....baseline{i} and ....stimuli{i} contain the data referring to the ith film clip.

For ECG, each recording is in the form of an M x 2 matrix where M refers to the number of available samples and each column contains the sample of each of the two ECG channels.

For EEG, each recording is in the form of an M x 14 matrix where M refers to the number of available samples and each column contains the sample of each of the 14 EEG channels.

In [68]:
import numpy as np
import pandas as pd

def get_labels():
    labels = np.zeros((23, 18, 3))
    for k in range(0, 23):
        for j in range(0, 18):
            if data['DREAMER'][0, 0]['Data'][0, k]['ScoreValence'][0, 0][j, 0] < 4:
                labels[k, j, 0] = 0
            else:
                labels[k, j, 0] = 1
            if data['DREAMER'][0, 0]['Data'][0, k]['ScoreArousal'][0, 0][j, 0] < 4:
                labels[k, j, 1] = 0
            else:
                labels[k, j, 1] = 1
            if data['DREAMER'][0, 0]['Data'][0, k]['ScoreDominance'][0, 0][j, 0] < 4:
                labels[k, j, 2] = 0
            else:
                labels[k, j, 2] = 1
    return labels

labels = get_labels()

In [63]:
def get_features(FREQ = 128):
    volunteers = []
    for k in range(23):
        clips = []
        for j in range(18):
            basl = data['DREAMER'][0, 0]['Data'][0, k]['EEG'][0, 0]['baseline'][0, 0][j, 0].swapaxes(1, 0)
            basl = basl.reshape(basl.shape[0], -1, FREQ)
            basl = basl.mean(1, keepdims=True)

            stim = data['DREAMER'][0, 0]['Data'][0, k]['EEG'][0, 0]['stimuli'][0, 0][j, 0].swapaxes(1, 0)
            stim = stim.reshape(basl.shape[0], -1, FREQ)

            stim = (stim - basl).swapaxes(1, 0)

            clips.append(stim)
        volunteers.append(clips)
    return volunteers

features = get_features()

In [77]:
def mult2binary_labels(features, labels):
    volunteers = features
    volunteer_labels = []
    for k in range(23):
        clip_labels = []
        for j in range(18):
            label = labels[k][j]
            label = label[np.newaxis, ...]
            label = np.repeat(label, volunteers[k][j].shape[0], axis=0)
            clip_labels.append(label)
        volunteer_labels.append(clip_labels)
    return volunteer_labels

binary_labels = mult2binary_labels(features, labels)

In [78]:
import operator
import functools

def flatten_samples(samples):
    samples = functools.reduce(operator.concat, samples)
    samples = np.concatenate(samples)
    
    return samples

In [81]:
flattened_features = flatten_samples(features)
flatttened_labels = flatten_samples(labels)

In [90]:
CHANNEL_LIST = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']

LOCATION_LIST = [['-', '-', '-', 'FP1', '-', 'FP2', '-', '-', '-'], ['-', '-', '-', 'AF3', '-', 'AF4', '-', '-', '-'],
                 ['F7', '-', 'F3', '-', 'FZ', '-', 'F4', '-', 'F8'],
                 ['-', 'FC5', '-', 'FC1', '-', 'FC2', '-', 'FC6', '-'],
                 ['T7', '-', 'C3', '-', 'CZ', '-', 'C4', '-', 'T8'],
                 ['-', 'CP5', '-', 'CP1', '-', 'CP2', '-', 'CP6', '-'],
                 ['P7', '-', 'P3', '-', 'PZ', '-', 'P4', '-', 'P8'], ['-', '-', '-', 'PO3', '-', 'PO4', '-', '-', '-'],
                 ['-', '-', '-', 'O1', 'OZ', 'O2', '-', '-', '-']]


def get_channel_location(channel_list, location_list):
    location_list = np.array(location_list)
    output = {}
    for channel in channel_list:
        location = (np.argwhere(location_list == channel)[0] + 1).tolist()
        output[channel] = location
    return output


def channel_to_location(data, norm_fn=None):
    # sample_num*channel*bands
    data = data.swapaxes(0, 1)
    # channel*sample_num*bands
    outputs = np.zeros([9, 9, *data.shape[1:]])
    channel_location = get_channel_location(CHANNEL_LIST, LOCATION_LIST)
    for i, (x, y) in enumerate(channel_location.values()):
        x = x - 1
        y = y - 1
        if norm_fn:
            outputs[x][y] = norm_fn(data[i])
        else:
            outputs[x][y] = data[i]
    # 9*9*sample_num*bands
    outputs = outputs.transpose([2, 3, 0, 1])
    return outputs

In [92]:
flattened_features_2d = channel_to_location(flattened_features)

In [94]:
import pickle as pkl

with open('./cache/dreamer_binary_dataset.pkl', 'wb') as file:
    pkl.dump({
        'feature': flattened_features_2d,
        'label': flatttened_labels
    }, file, protocol=4)