In [1]:
import os
os.environ["THEANO_FLAGS"] = "floatX=float32,device=cpu"

import deepthought
import os
from deepthought.datasets.eeg.meta_class import DataFileWithMetaClasses

db = DataFileWithMetaClasses(
        filepath='data/OpenMIIR-Perception-512Hz.pklz',
        meta_classes={                                                
                    'within_subject_tuples_group': ['subject', 'condition'],
                    'cross_subject_tuples_group': ['condition'], # cross-participant triplets - this should be very tough                },
                },
    )

In [2]:
from deepthought.datasets.eeg.EEGEpochsDataset import EEGEpochsDataset
INPUT_LEN = 3518
base_dataset = EEGEpochsDataset(
                db=db,
                meta_sources=['subject'],
                selectors={         
                    #'trial_no' : [0,1,  3,4],
                    'condition' : [1],
                },
                use_targets=False,
                stop_sample=INPUT_LEN,
                label_attribute='stimulus_id',
                label_map= { # stimulus_id
                    1: 0, 2: 1, 3: 2, 4: 3, 11: 4, 12: 5, 13: 6, 14: 7, 21: 8, 22: 9, 23: 10, 24: 11,
                },
                remove_dc_offset=True,
                layout='tf',
            )

In [3]:
print base_dataset.trials.shape
print base_dataset.y.shape
print base_dataset.metadata[0]
print base_dataset.metadata[0].keys()

(540, 3518, 1, 64)
(540, 12)
{'trial_type': 'perception', 'cross_subject_tuples_group': 0, 'trial_no': 0, 'stimulus_id': 1, 'within_subject_tuples_group': 0, 'condition': 1, 'subject': 'P01'}
['trial_type', 'cross_subject_tuples_group', 'trial_no', 'stimulus_id', 'within_subject_tuples_group', 'condition', 'subject']


In [4]:
# convert to bc01 axis format
import numpy as np
features_bc01 = np.rollaxis(base_dataset.trials,3,1)
print features_bc01.shape

(540, 64, 3518, 1)


In [5]:
# save dataset

import h5py
from fuel.datasets.hdf5 import H5PYDataset

hdf5name = 'data/OpenMIIR-Perception-512Hz.hdf5'
f = h5py.File(hdf5name, mode='w')

N = len(base_dataset.trials)
fx = f.create_dataset('features', features_bc01.shape, dtype='float32')
fy = f.create_dataset('targets', base_dataset.y.shape, dtype='int8')
fx[...] = features_bc01
fy[...] = base_dataset.y

# for using indices
findices = f.create_dataset('indices', (N,), dtype='int')
findices[...] = xrange(N)

# additional metadata source for hydra-net control flow
subjects = ['P01','P04','P06','P07','P09','P11','P12','P13','P14']
fsubject = f.create_dataset('subjects', (N,), dtype='int8')
fsubject[...] = [subjects.index(meta['subject']) for meta in base_dataset.metadata]

split_dict = {
    'all': {'features': (0, N), 
            'indices': (0, N), 
            'targets': (0, N),
            'subjects': (0, N)
           },
}

f.attrs['split'] = H5PYDataset.create_split_array(split_dict)

f.flush()
f.close()

# metadata for subset selection
import deepthought.util.fs_util as fs_util
fs_util.save(hdf5name+'.meta.pklz', base_dataset.metadata)