forked from mne-tools/mne-python
/
edf.py
561 lines (499 loc) · 22.6 KB
/
edf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
"""Conversion tool from EDF, EDF+, BDF to FIF
"""
# Authors: Teon Brooks <teon.brooks@gmail.com>
# Martin Billinger <martin.billinger@tugraz.at>
#
# License: BSD (3-clause)
import os
import calendar
import datetime
import re
import warnings
import numpy as np
from ...utils import verbose, logger
from ..utils import _blk_read_lims
from ..base import _BaseRaw, _check_update_montage
from ..meas_info import _empty_info
from ..constants import FIFF
from ...filter import resample
from ...externals.six.moves import zip
class RawEDF(_BaseRaw):
"""Raw object from EDF, EDF+, BDF file
Parameters
----------
input_fname : str
Path to the EDF+,BDF file.
montage : str | None | instance of Montage
Path or instance of montage containing electrode positions.
If None, sensor locations are (0,0,0). See the documentation of
:func:`mne.channels.read_montage` for more information.
eog : list or tuple
Names of channels or list of indices that should be designated
EOG channels. Values should correspond to the electrodes in the
edf file. Default is None.
misc : list or tuple
Names of channels or list of indices that should be designated
MISC channels. Values should correspond to the electrodes in the
edf file. Default is None.
stim_channel : str | int | None
The channel name or channel index (starting at 0).
-1 corresponds to the last channel (default).
If None, there will be no stim channel added.
annot : str | None
Path to annotation file.
If None, no derived stim channel will be added (for files requiring
annotation file to interpret stim channel).
annotmap : str | None
Path to annotation map file containing mapping from label to trigger.
Must be specified if annot is not None.
preload : bool or str (default False)
Preload data into memory for data manipulation and faster indexing.
If True, the data will be preloaded into memory (fast, requires
large amount of memory). If preload is a string, preload is the
file name of a memory-mapped file which is used to store the data
on the hard drive (slower, requires less memory).
verbose : bool, str, int, or None
If not None, override default verbose level (see mne.verbose).
See Also
--------
mne.io.Raw : Documentation of attribute and methods.
"""
@verbose
def __init__(self, input_fname, montage, eog=None, misc=None,
stim_channel=-1, annot=None, annotmap=None,
preload=False, verbose=None):
logger.info('Extracting edf Parameters from %s...' % input_fname)
input_fname = os.path.abspath(input_fname)
info, edf_info = _get_edf_info(input_fname, stim_channel,
annot, annotmap,
eog, misc, preload)
logger.info('Creating Raw.info structure...')
_check_update_montage(info, montage)
if bool(annot) != bool(annotmap):
warnings.warn(("Stimulus Channel will not be annotated. "
"Both 'annot' and 'annotmap' must be specified."))
# Raw attributes
last_samps = [edf_info['nsamples'] - 1]
super(RawEDF, self).__init__(
info, preload, filenames=[input_fname], raw_extras=[edf_info],
last_samps=last_samps, orig_format='int',
verbose=verbose)
logger.info('Ready.')
@verbose
def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
"""Read a chunk of raw data"""
from scipy.interpolate import interp1d
if mult is not None:
# XXX "cals" here does not function the same way as in RawFIF,
# and for efficiency we want to be able to combine mult and cals
# so proj support will have to wait until this is resolved
raise NotImplementedError('mult is not supported yet')
sel = np.arange(self.info['nchan'])[idx]
n_samps = self._raw_extras[fi]['n_samps']
buf_len = int(self._raw_extras[fi]['max_samp'])
sfreq = self.info['sfreq']
n_chan = self.info['nchan']
data_size = self._raw_extras[fi]['data_size']
data_offset = self._raw_extras[fi]['data_offset']
stim_channel = self._raw_extras[fi]['stim_channel']
tal_channel = self._raw_extras[fi]['tal_channel']
annot = self._raw_extras[fi]['annot']
annotmap = self._raw_extras[fi]['annotmap']
subtype = self._raw_extras[fi]['subtype']
# gain constructor
physical_range = np.array([ch['range'] for ch in self.info['chs']])
cal = np.array([ch['cal'] for ch in self.info['chs']])
gains = np.atleast_2d(self._raw_extras[fi]['units'] *
(physical_range / cal))
# physical dimension in uV
physical_min = np.atleast_2d(self._raw_extras[fi]['units'] *
self._raw_extras[fi]['physical_min'])
digital_min = self._raw_extras[fi]['digital_min']
offsets = np.atleast_2d(physical_min - (digital_min * gains)).T
if tal_channel is not None:
offsets[tal_channel] = 0
block_start_idx, r_lims, d_lims = _blk_read_lims(start, stop, buf_len)
read_size = len(r_lims) * buf_len
with open(self._filenames[fi], 'rb', buffering=0) as fid:
# extract data
start_offset = (data_offset +
block_start_idx * buf_len * n_chan * data_size)
ch_offsets = np.cumsum(np.concatenate([[0], n_samps * data_size]))
this_data = np.empty((len(sel), buf_len))
for bi in range(len(r_lims)):
block_offset = bi * ch_offsets[-1]
d_sidx, d_eidx = d_lims[bi]
r_sidx, r_eidx = r_lims[bi]
n_buf_samp = r_eidx - r_sidx
for ii, ci in enumerate(sel):
n_samp = n_samps[ci]
# bdf data: 24bit data
fid.seek(start_offset + block_offset + ch_offsets[ci], 0)
if n_samp == buf_len:
# use faster version with skips built in
fid.seek(r_sidx * data_size, 1)
ch_data = _read_ch(fid, subtype, n_buf_samp, data_size)
else:
# read in all the data and triage appropriately
ch_data = _read_ch(fid, subtype, n_samp, data_size)
if ci == tal_channel:
# don't resample tal_channel,
# pad with zeros instead.
n_missing = int(buf_len - n_samp)
ch_data = np.hstack([ch_data, [0] * n_missing])
ch_data = ch_data[r_sidx:r_eidx]
elif ci == stim_channel:
if annot and annotmap or \
tal_channel is not None:
# don't bother with resampling the stim ch
# because it gets overwritten later on.
ch_data = np.zeros(n_buf_samp)
else:
warnings.warn('Interpolating stim channel.'
' Events may jitter.')
oldrange = np.linspace(0, 1, n_samp + 1, True)
newrange = np.linspace(0, 1, buf_len, False)
newrange = newrange[r_sidx:r_eidx]
ch_data = interp1d(
oldrange, np.append(ch_data, 0),
kind='zero')(newrange)
else:
ch_data = resample(ch_data, buf_len, n_samp,
npad=0)[r_sidx:r_eidx]
this_data[ii, :n_buf_samp] = ch_data
data[:, d_sidx:d_eidx] = this_data[:, :n_buf_samp]
data *= gains.T[sel]
data += offsets[sel]
# only try to read the stim channel if it's not None and it's
# actually one of the requested channels
if stim_channel is not None and (sel == stim_channel).sum() > 0:
stim_channel_idx = np.where(sel == stim_channel)[0]
if annot and annotmap:
evts = _read_annot(annot, annotmap, sfreq,
self._last_samps[fi])
data[stim_channel_idx, :] = evts[start:stop + 1]
elif tal_channel is not None:
tal_channel_idx = np.where(sel == tal_channel)[0][0]
evts = _parse_tal_channel(data[tal_channel_idx])
self._raw_extras[fi]['events'] = evts
unique_annots = sorted(set([e[2] for e in evts]))
mapping = dict((a, n + 1) for n, a in enumerate(unique_annots))
stim = np.zeros(read_size)
for t_start, t_duration, annotation in evts:
evid = mapping[annotation]
n_start = int(t_start * sfreq)
n_stop = int(t_duration * sfreq) + n_start - 1
# make sure events without duration get one sample
n_stop = n_stop if n_stop > n_start else n_start + 1
if any(stim[n_start:n_stop]):
raise NotImplementedError('EDF+ with overlapping '
'events not supported.')
stim[n_start:n_stop] = evid
data[stim_channel_idx, :] = stim[start:stop]
else:
# Allows support for up to 17-bit trigger values (2 ** 17 - 1)
stim = np.bitwise_and(data[stim_channel_idx].astype(int),
131071)
data[stim_channel_idx, :] = stim
def _read_ch(fid, subtype, samp, data_size):
"""Helper to read a number of samples for a single channel"""
if subtype in ('24BIT', 'bdf'):
ch_data = np.fromfile(fid, dtype=np.uint8,
count=samp * data_size)
ch_data = ch_data.reshape(-1, 3).astype(np.int32)
ch_data = ((ch_data[:, 0]) +
(ch_data[:, 1] << 8) +
(ch_data[:, 2] << 16))
# 24th bit determines the sign
ch_data[ch_data >= (1 << 23)] -= (1 << 24)
# edf data: 16bit data
else:
ch_data = np.fromfile(fid, dtype='<i2', count=samp)
return ch_data
def _parse_tal_channel(tal_channel_data):
"""Parse time-stamped annotation lists (TALs) in stim_channel
and return list of events.
Parameters
----------
tal_channel_data : ndarray, shape = [n_samples]
channel data in EDF+ TAL format
Returns
-------
events : list
List of events. Each event contains [start, duration, annotation].
References
----------
http://www.edfplus.info/specs/edfplus.html#tal
"""
# convert tal_channel to an ascii string
tals = bytearray()
for s in tal_channel_data:
i = int(s)
tals.extend([i % 256, i // 256])
regex_tal = '([+-]\d+\.?\d*)(\x15(\d+\.?\d*))?(\x14.*?)\x14\x00'
tal_list = re.findall(regex_tal, tals.decode('ascii'))
events = []
for ev in tal_list:
onset = float(ev[0])
duration = float(ev[2]) if ev[2] else 0
for annotation in ev[3].split('\x14')[1:]:
if annotation:
events.append([onset, duration, annotation])
return events
def _get_edf_info(fname, stim_channel, annot, annotmap, eog, misc, preload):
"""Extracts all the information from the EDF+,BDF file"""
if eog is None:
eog = []
if misc is None:
misc = []
edf_info = dict()
edf_info['annot'] = annot
edf_info['annotmap'] = annotmap
edf_info['events'] = []
with open(fname, 'rb') as fid:
assert(fid.tell() == 0)
fid.seek(8)
fid.read(80).strip().decode() # subject id
fid.read(80).strip().decode() # recording id
day, month, year = [int(x) for x in re.findall('(\d+)',
fid.read(8).decode())]
hour, minute, sec = [int(x) for x in re.findall('(\d+)',
fid.read(8).decode())]
date = datetime.datetime(year + 2000, month, day, hour, minute, sec)
edf_info['data_offset'] = header_nbytes = int(fid.read(8).decode())
subtype = fid.read(44).strip().decode()[:5]
if len(subtype) > 0:
edf_info['subtype'] = subtype
else:
edf_info['subtype'] = os.path.splitext(fname)[1][1:].lower()
edf_info['n_records'] = n_records = int(fid.read(8).decode())
# record length in seconds
record_length = float(fid.read(8).decode())
if record_length == 0:
edf_info['record_length'] = record_length = 1.
warnings.warn('Header information is incorrect for record length. '
'Default record length set to 1.')
else:
edf_info['record_length'] = record_length
nchan = int(fid.read(4).decode())
channels = list(range(nchan))
ch_names = [fid.read(16).strip().decode() for ch in channels]
for ch in channels:
fid.read(80) # transducer
units = [fid.read(8).strip().decode() for ch in channels]
for i, unit in enumerate(units):
if unit == 'uV':
units[i] = 1e-6
else:
units[i] = 1
edf_info['units'] = units
physical_min = np.array([float(fid.read(8).decode())
for ch in channels])
edf_info['physical_min'] = physical_min
physical_max = np.array([float(fid.read(8).decode())
for ch in channels])
digital_min = np.array([float(fid.read(8).decode())
for ch in channels])
edf_info['digital_min'] = digital_min
digital_max = np.array([float(fid.read(8).decode())
for ch in channels])
prefiltering = [fid.read(80).strip().decode() for ch in channels][:-1]
highpass = np.ravel([re.findall('HP:\s+(\w+)', filt)
for filt in prefiltering])
lowpass = np.ravel([re.findall('LP:\s+(\w+)', filt)
for filt in prefiltering])
# number of samples per record
n_samps = np.array([int(fid.read(8).decode()) for ch in channels])
edf_info['n_samps'] = n_samps
fid.read(32 * nchan).decode() # reserved
assert fid.tell() == header_nbytes
physical_ranges = physical_max - physical_min
cals = digital_max - digital_min
if edf_info['subtype'] in ('24BIT', 'bdf'):
edf_info['data_size'] = 3 # 24-bit (3 byte) integers
else:
edf_info['data_size'] = 2 # 16-bit (2 byte) integers
# Creates a list of dicts of eeg channels for raw.info
logger.info('Setting channel info structure...')
chs = list()
tal_ch_name = 'EDF Annotations'
if tal_ch_name in ch_names:
tal_channel = ch_names.index(tal_ch_name)
else:
tal_channel = None
edf_info['tal_channel'] = tal_channel
if tal_channel is not None and stim_channel is not None and not preload:
raise RuntimeError('%s' % ('EDF+ Annotations (TAL) channel needs to be'
' parsed completely on loading.'
' You must set preload parameter to True.'))
if stim_channel == -1:
stim_channel = nchan - 1
pick_mask = np.ones(len(ch_names))
for idx, ch_info in enumerate(zip(ch_names, physical_ranges, cals)):
ch_name, physical_range, cal = ch_info
chan_info = {}
chan_info['cal'] = cal
chan_info['logno'] = idx + 1
chan_info['scanno'] = idx + 1
chan_info['range'] = physical_range
chan_info['unit_mul'] = 0.
chan_info['ch_name'] = ch_name
chan_info['unit'] = FIFF.FIFF_UNIT_V
chan_info['coord_frame'] = FIFF.FIFFV_COORD_HEAD
chan_info['coil_type'] = FIFF.FIFFV_COIL_EEG
chan_info['kind'] = FIFF.FIFFV_EEG_CH
chan_info['loc'] = np.zeros(12)
if ch_name in eog or idx in eog or idx - nchan in eog:
chan_info['coil_type'] = FIFF.FIFFV_COIL_NONE
chan_info['kind'] = FIFF.FIFFV_EOG_CH
pick_mask[idx] = False
if ch_name in misc or idx in misc or idx - nchan in misc:
chan_info['coil_type'] = FIFF.FIFFV_COIL_NONE
chan_info['kind'] = FIFF.FIFFV_MISC_CH
pick_mask[idx] = False
check1 = stim_channel == ch_name
check2 = stim_channel == idx
check3 = nchan > 1
stim_check = np.logical_and(np.logical_or(check1, check2), check3)
if stim_check:
chan_info['coil_type'] = FIFF.FIFFV_COIL_NONE
chan_info['unit'] = FIFF.FIFF_UNIT_NONE
chan_info['kind'] = FIFF.FIFFV_STIM_CH
pick_mask[idx] = False
chan_info['ch_name'] = 'STI 014'
ch_names[idx] = chan_info['ch_name']
units[idx] = 1
if isinstance(stim_channel, str):
stim_channel = idx
if tal_channel == idx:
chan_info['range'] = 1
chan_info['cal'] = 1
chan_info['coil_type'] = FIFF.FIFFV_COIL_NONE
chan_info['unit'] = FIFF.FIFF_UNIT_NONE
chan_info['kind'] = FIFF.FIFFV_MISC_CH
pick_mask[idx] = False
chs.append(chan_info)
edf_info['stim_channel'] = stim_channel
if any(pick_mask):
picks = [item for item, mask in zip(range(nchan), pick_mask) if mask]
edf_info['max_samp'] = max_samp = n_samps[picks].max()
else:
edf_info['max_samp'] = max_samp = n_samps.max()
# sfreq defined as the max sampling rate of eeg
sfreq = n_samps.max() / record_length
info = _empty_info(sfreq)
info['filename'] = fname
info['meas_date'] = calendar.timegm(date.utctimetuple())
info['chs'] = chs
if highpass.size == 0:
pass
elif all(highpass):
if highpass[0] == 'NaN':
pass # Placeholder for future use. Highpass set in _empty_info.
elif highpass[0] == 'DC':
info['highpass'] = 0.
else:
info['highpass'] = float(highpass[0])
else:
info['highpass'] = float(np.min(highpass))
warnings.warn('Channels contain different highpass filters. '
'Highest filter setting will be stored.')
if lowpass.size == 0:
pass
elif all(lowpass):
if lowpass[0] == 'NaN':
pass # Placeholder for future use. Lowpass set in _empty_info.
else:
info['lowpass'] = float(lowpass[0])
else:
info['lowpass'] = float(np.min(lowpass))
warnings.warn('%s' % ('Channels contain different lowpass filters.'
' Lowest filter setting will be stored.'))
# Some keys to be consistent with FIF measurement info
info['description'] = None
info['buffer_size_sec'] = 10.
edf_info['nsamples'] = int(n_records * max_samp)
return info, edf_info
def _read_annot(annot, annotmap, sfreq, data_length):
"""Annotation File Reader
Parameters
----------
annot : str
Path to annotation file.
annotmap : str
Path to annotation map file containing mapping from label to trigger.
sfreq : float
Sampling frequency.
data_length : int
Length of the data file.
Returns
-------
stim_channel : ndarray
An array containing stimulus trigger events.
"""
pat = '([+/-]\d+.\d+),(\w+)'
annot = open(annot).read()
triggers = re.findall(pat, annot)
times, values = zip(*triggers)
times = [float(time) * sfreq for time in times]
pat = '(\w+):(\d+)'
annotmap = open(annotmap).read()
mappings = re.findall(pat, annotmap)
maps = {}
for mapping in mappings:
maps[mapping[0]] = mapping[1]
triggers = [int(maps[value]) for value in values]
stim_channel = np.zeros(data_length)
for time, trigger in zip(times, triggers):
stim_channel[time] = trigger
return stim_channel
def read_raw_edf(input_fname, montage=None, eog=None, misc=None,
stim_channel=-1, annot=None, annotmap=None,
preload=False, verbose=None):
"""Reader function for EDF+, BDF conversion to FIF
Parameters
----------
input_fname : str
Path to the EDF+,BDF file.
montage : str | None | instance of Montage
Path or instance of montage containing electrode positions.
If None, sensor locations are (0,0,0). See the documentation of
:func:`mne.channels.read_montage` for more information.
eog : list or tuple
Names of channels or list of indices that should be designated
EOG channels. Values should correspond to the electrodes in the
edf file. Default is None.
misc : list or tuple
Names of channels or list of indices that should be designated
MISC channels. Values should correspond to the electrodes in the
edf file. Default is None.
stim_channel : str | int | None
The channel name or channel index (starting at 0).
-1 corresponds to the last channel (default).
If None, there will be no stim channel added.
annot : str | None
Path to annotation file.
If None, no derived stim channel will be added (for files requiring
annotation file to interpret stim channel).
annotmap : str | None
Path to annotation map file containing mapping from label to trigger.
Must be specified if annot is not None.
preload : bool or str (default False)
Preload data into memory for data manipulation and faster indexing.
If True, the data will be preloaded into memory (fast, requires
large amount of memory). If preload is a string, preload is the
file name of a memory-mapped file which is used to store the data
on the hard drive (slower, requires less memory).
verbose : bool, str, int, or None
If not None, override default verbose level (see mne.verbose).
Returns
-------
raw : Instance of RawEDF
A Raw object containing EDF data.
See Also
--------
mne.io.Raw : Documentation of attribute and methods.
"""
return RawEDF(input_fname=input_fname, montage=montage, eog=eog, misc=misc,
stim_channel=stim_channel, annot=annot, annotmap=annotmap,
preload=preload, verbose=verbose)