From ffad6f2627df86fb297b17562ea8ef131c24bd93 Mon Sep 17 00:00:00 2001 From: Mainak Jas Date: Mon, 7 Dec 2015 14:25:10 +0100 Subject: [PATCH] ENH: Add option to preload data --- mne/io/eeglab/eeglab.py | 53 +++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/mne/io/eeglab/eeglab.py b/mne/io/eeglab/eeglab.py index 298b26f3c01..0d84c656be8 100644 --- a/mne/io/eeglab/eeglab.py +++ b/mne/io/eeglab/eeglab.py @@ -8,7 +8,7 @@ from ...utils import logger from ..meas_info import create_info -from ..base import _BaseRaw +from ..base import _BaseRaw, _mult_cal_one from ..constants import FIFF @@ -65,7 +65,7 @@ def _get_info(eeg, eog, ch_fname): return info -def read_raw_set(fname, ch_fname, eog=None, verbose=None): +def read_raw_set(fname, ch_fname, eog=None, preload=False, verbose=None): """Read an EEGLAB .set file Parameters @@ -79,6 +79,12 @@ def read_raw_set(fname, ch_fname, eog=None, verbose=None): Names of channels or list of indices that should be designated EOG channels. If None (default), the channel names beginning with ``EOG`` are used. + preload : bool or str (default False) + Preload data into memory for data manipulation and faster indexing. + If True, the data will be preloaded into memory (fast, requires + large amount of memory). If preload is a string, preload is the + file name of a memory-mapped file which is used to store the data + on the hard drive (slower, requires less memory). verbose : bool, str, int, or None If not None, override default verbose level (see mne.verbose). @@ -87,7 +93,8 @@ def read_raw_set(fname, ch_fname, eog=None, verbose=None): raw : Instance of RawSet A Raw object containing EEGLAB .set data. """ - return RawSet(fname, ch_fname, eog, verbose) + return RawSet(fname=fname, ch_fname=ch_fname, eog=eog, preload=preload, + verbose=verbose) class RawSet(_BaseRaw): @@ -104,6 +111,12 @@ class RawSet(_BaseRaw): Names of channels or list of indices that should be designated EOG channels. If None (default), the channel names beginning with ``EOG`` are used. + preload : bool or str (default False) + Preload data into memory for data manipulation and faster indexing. + If True, the data will be preloaded into memory (fast, requires + large amount of memory). If preload is a string, preload is the + file name of a memory-mapped file which is used to store the data + on the hard drive (slower, requires less memory). verbose : bool, str, int, or None If not None, override default verbose level (see mne.verbose). @@ -112,19 +125,17 @@ class RawSet(_BaseRaw): raw : Instance of RawSet A Raw object containing EEGLAB .set data. """ - def __init__(self, fname, ch_fname, eog=None, verbose=None): + def __init__(self, fname, ch_fname, eog=None, preload=False, verbose=None): """Read EEGLAB .set file. """ - scaling = 1e-6 basedir = op.dirname(fname) eeg = io.loadmat(fname, struct_as_record=False, squeeze_me=True)['EEG'] # read the data data_fname = op.join(basedir, eeg.data) logger.info('Reading %s' % data_fname) - data_fid = open(data_fname) - data = np.fromfile(data_fid, dtype=np.float64) * scaling - data = data.reshape((-1, eeg.nbchan)).T + + last_samps = [int(eeg.xmax * eeg.srate)] # get info if ch_fname is not None: @@ -132,9 +143,31 @@ def __init__(self, fname, ch_fname, eog=None, verbose=None): info = _get_info(eeg, eog, ch_fname) super(RawSet, self).__init__( - info, data, filenames=[fname], orig_format='double', - verbose=verbose) + info, preload, filenames=[data_fname], last_samps=last_samps, + orig_format='double', verbose=verbose) logger.info(' Range : %d ... %d = %9.3f ... %9.3f secs' % (self.first_samp, self.last_samp, float(self.first_samp) / self.info['sfreq'], float(self.last_samp) / self.info['sfreq'])) + + def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): + """Read a chunk of raw data""" + scaling = 1e-6 + nchan = self.info['nchan'] + data_offset = self.info['nchan'] * start * 4 + data_left = (stop - start) * nchan + # Read up to 100 MB of data at a time. + blk_size = min(data_left, (50000000 // nchan) * nchan) + + with open(self._filenames[fi], 'rb', buffering=0) as fid: + fid.seek(data_offset) + # extract data in chunks + for blk_start in np.arange(0, data_left, blk_size) // nchan: + blk_size = min(blk_size, data_left - blk_start * nchan) + block = np.fromfile(fid, + dtype=np.float32, count=blk_size) * scaling + block = block.reshape(nchan, -1, order='F') + blk_stop = blk_start + block.shape[1] + data_view = data[:, blk_start:blk_stop] + _mult_cal_one(data_view, block, idx, cals, mult) + return data