Skip to content

Commit

Permalink
BUG: Fix EDF header parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
larsoner committed Nov 24, 2020
1 parent ea8120f commit a372544
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 26 deletions.
4 changes: 4 additions & 0 deletions doc/changes/latest.inc
Expand Up @@ -24,6 +24,8 @@ Current (0.22.dev0)

.. |Evan Hathaway| replace:: **Evan Hathaway**

.. |Austin Hurst| replace:: **Austin Hurst**


Enhancements
~~~~~~~~~~~~
Expand Down Expand Up @@ -82,6 +84,8 @@ Bugs

- Fix bug with reading split files that have dashes in the filename (:gh:`8339` **by new contributor** |Eduard Ort|_)

- Fix bug with parsing EDF dates and date integers (:gh:`8558` **by new contributor** |Austin Hurst|_ and `Eric Larson`_)

- Fix bug with `~mne.viz.plot_epochs_image` when ``order`` is supplied and multiple conditions are plotted (:gh:`8377` by `Daniel McCloy`_ )

- Fix bug with :func:`mne.viz.plot_source_estimates` when using the PyVista backend where singleton time points were not handled properly (:gh:`8285` by `Eric Larson`_)
Expand Down
2 changes: 2 additions & 0 deletions doc/changes/names.inc
Expand Up @@ -334,6 +334,8 @@

.. _Tod Flak: https://github.com/todflak

.. _Austin Hurst: https://github.com/a-hurst

.. _Victoria Peterson: https://github.com/vpeterson

.. _Evan Hathaway: https://github.com/ephathaway
57 changes: 43 additions & 14 deletions mne/io/edf/edf.py
Expand Up @@ -510,7 +510,7 @@ def _parse_prefilter_string(prefiltering):


def _edf_str_int(x, fid=None):
return int(x.decode().rstrip('\x00'))
return int(x.decode().split('\x00')[0])


def _read_edf_header(fname, exclude):
Expand All @@ -522,27 +522,56 @@ def _read_edf_header(fname, exclude):
fid.read(8) # version (unused here)

# patient ID
pid = fid.read(80).decode('latin-1')
pid = pid.split(' ', 2)
patient = {}
if len(pid) >= 2:
patient['id'] = pid[0]
patient['name'] = pid[1]
id_info = fid.read(80).decode('latin-1').rstrip()
id_info = id_info.split(' ')
if len(id_info):
patient['id'] = id_info[0]
if len(id_info) == 4:
try:
birthdate = datetime.strptime(id_info[2], "%d-%b-%Y")
except ValueError:
birthdate = "X"
patient['sex'] = id_info[1]
patient['birthday'] = birthdate
patient['name'] = id_info[3]

# Recording ID
meas_id = {}
meas_id['recording_id'] = fid.read(80).decode('latin-1').strip(' \x00')
rec_info = fid.read(80).decode('latin-1').rstrip().split(' ')
valid_startdate = False
if len(rec_info) == 5:
try:
startdate = datetime.strptime(rec_info[1], "%d-%b-%Y")
except ValueError:
startdate = "X"
else:
valid_startdate = True
meas_id['startdate'] = startdate
meas_id['study_id'] = rec_info[2]
meas_id['technician'] = rec_info[3]
meas_id['equipment'] = rec_info[4]

# If startdate available in recording info, use it instead of the
# file's meas_date since it contains all 4 digits of the year
if valid_startdate:
day = meas_id['startdate'].day
month = meas_id['startdate'].month
year = meas_id['startdate'].year
fid.read(8) # skip file's meas_date
else:
meas_date = fid.read(8).decode('latin-1')
day, month, year = [int(x) for x in meas_date.split('.')]
year = year + 2000 if year < 85 else year + 1900

day, month, year = [int(x) for x in
re.findall(r'(\d+)', fid.read(8).decode())]
hour, minute, sec = [int(x) for x in
re.findall(r'(\d+)', fid.read(8).decode())]
century = 2000 if year < 50 else 1900
meas_time = fid.read(8).decode('latin-1')
hour, minute, sec = [int(x) for x in meas_time.split('.')]
try:
meas_date = datetime(year + century, month, day, hour, minute, sec,
meas_date = datetime(year, month, day, hour, minute, sec,
tzinfo=timezone.utc)
print(meas_date)
except ValueError:
warn(f'Invalid date encountered ({year + century:04d}-{month:02d}-'
warn(f'Invalid date encountered ({year:04d}-{month:02d}-'
f'{day:02d} {hour:02d}:{minute:02d}:{sec:02d}).')
meas_date = None

Expand Down
38 changes: 29 additions & 9 deletions mne/io/edf/tests/test_edf.py
Expand Up @@ -20,13 +20,11 @@

from mne import pick_types, Annotations
from mne.datasets import testing
from mne.utils import run_tests_if_main, requires_pandas, _TempDir
from mne.io import read_raw_edf, read_raw_bdf
from mne.utils import requires_pandas
from mne.io import read_raw_edf, read_raw_bdf, read_raw_fif
from mne.io.tests.test_raw import _test_raw_reader
from mne.io.edf.edf import _get_edf_default_event_id
from mne.io.edf.edf import _read_annotations_edf
from mne.io.edf.edf import _read_ch
from mne.io.edf.edf import _parse_prefilter_string
from mne.io.edf.edf import (_get_edf_default_event_id, _read_annotations_edf,
_read_ch, _parse_prefilter_string, _edf_str_int)
from mne.io.pick import channel_indices_by_type, get_channel_type_constants
from mne.annotations import events_from_annotations, read_annotations

Expand Down Expand Up @@ -71,6 +69,21 @@ def test_orig_units():
assert orig_units['A1'] == 'µV' # formerly 'uV' edit by _check_orig_units


def test_subject_info(tmpdir):
"""Test exposure of original channel units."""
raw = read_raw_edf(edf_path)
assert raw.info['subject_info'] is None # XXX this is arguably a bug
edf_info = raw._raw_extras[0]
assert edf_info['subject_info'] is not None
want = {'id': 'X', 'sex': 'X', 'birthday': 'X', 'name': 'X'}
for key, val in want.items():
assert edf_info['subject_info'][key] == val, key
fname = tmpdir.join('test_raw.fif')
raw.save(fname)
raw = read_raw_fif(fname)
assert raw.info['subject_info'] is None # XXX should eventually round-trip


def test_bdf_data():
"""Test reading raw bdf files."""
# XXX BDF data for these is around 0.01 when it should be in the uV range,
Expand Down Expand Up @@ -399,13 +412,17 @@ def test_edf_annot_sub_s_onset():
assert_allclose(raw.annotations.onset, [1.951172, 3.492188])


def test_invalid_date():
def test_invalid_date(tmpdir):
"""Test handling of invalid date in EDF header."""
tempdir = _TempDir()
tempdir = str(tmpdir)
with open(edf_path, 'rb') as f: # read valid test file
edf = bytearray(f.read())

# original date in header is 29.04.14 (2014-04-29) at pos 168:176
# but we also use Startdate if available,
# which starts at byte 88 and is b'Startdate 29-APR-2014 X X X'
# create invalid date 29.02.14 (2014 is not a leap year)
edf[101:104] = b'FEB'
edf[172] = ord('2')
with open(op.join(tempdir, "temp.edf"), "wb") as f:
f.write(edf)
Expand All @@ -420,4 +437,7 @@ def test_invalid_date():
read_raw_edf(op.join(tempdir, "temp.edf"), preload=True)


run_tests_if_main()
def test_empty_chars():
"""Test blank char support."""
# from gitter
assert _edf_str_int(b'1819\x00 ') == 1819
2 changes: 1 addition & 1 deletion mne/io/meas_info.py
Expand Up @@ -896,7 +896,7 @@ def write_dig(fname, pts, coord_frame=None):
here. Can be None (default) if the points could have varying
coordinate frames.
"""
return _dig_write_dig(fname, pts, coord_frame=None)
return _dig_write_dig(fname, pts, coord_frame=coord_frame)


@verbose
Expand Down
4 changes: 2 additions & 2 deletions mne/tests/test_morph.py
Expand Up @@ -27,7 +27,7 @@
_get_atlas_values, _add_interpolator,
_grid_interp)
from mne.transforms import quat_to_rot
from mne.utils import (requires_nibabel, check_version,
from mne.utils import (requires_nibabel, check_version, requires_version,
requires_dipy, requires_h5py, catch_logging)
from mne.fixes import _get_args

Expand Down Expand Up @@ -912,7 +912,7 @@ def _rand_affine(rng):


@requires_nibabel()
@requires_dipy()
@requires_version('dipy', '1.3')
@pytest.mark.parametrize('from_shape', _shapes)
@pytest.mark.parametrize('from_affine', _affines)
@pytest.mark.parametrize('to_shape', _shapes)
Expand Down

0 comments on commit a372544

Please sign in to comment.