BUG: Fix EDF header parsing

mne-tools · Nov 24, 2020 · a372544 · a372544
1 parent ea8120f
commit a372544
Show file tree

Hide file tree

Showing 6 changed files with 81 additions and 26 deletions.
diff --git a/doc/changes/latest.inc b/doc/changes/latest.inc
@@ -24,6 +24,8 @@ Current (0.22.dev0)
 
 .. |Evan Hathaway| replace:: **Evan Hathaway**
 
+.. |Austin Hurst| replace:: **Austin Hurst**
+
 
 Enhancements
 ~~~~~~~~~~~~
@@ -82,6 +84,8 @@ Bugs
 
 - Fix bug with reading split files that have dashes in the filename (:gh:`8339` **by new contributor** |Eduard Ort|_)
 
+- Fix bug with parsing EDF dates and date integers (:gh:`8558` **by new contributor** |Austin Hurst|_ and `Eric Larson`_)
+
 - Fix bug with `~mne.viz.plot_epochs_image` when ``order`` is supplied and multiple conditions are plotted (:gh:`8377` by `Daniel McCloy`_ )
 
 - Fix bug with :func:`mne.viz.plot_source_estimates` when using the PyVista backend where singleton time points were not handled properly (:gh:`8285` by `Eric Larson`_)

diff --git a/doc/changes/names.inc b/doc/changes/names.inc
@@ -334,6 +334,8 @@
 
 .. _Tod Flak: https://github.com/todflak
 
+.. _Austin Hurst: https://github.com/a-hurst
+
 .. _Victoria Peterson: https://github.com/vpeterson
 
 .. _Evan Hathaway: https://github.com/ephathaway
diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py
@@ -510,7 +510,7 @@ def _parse_prefilter_string(prefiltering):
 
 
 def _edf_str_int(x, fid=None):
-    return int(x.decode().rstrip('\x00'))
+    return int(x.decode().split('\x00')[0])
 
 
 def _read_edf_header(fname, exclude):
@@ -522,27 +522,56 @@ def _read_edf_header(fname, exclude):
         fid.read(8)  # version (unused here)
 
         # patient ID
-        pid = fid.read(80).decode('latin-1')
-        pid = pid.split(' ', 2)
         patient = {}
-        if len(pid) >= 2:
-            patient['id'] = pid[0]
-            patient['name'] = pid[1]
+        id_info = fid.read(80).decode('latin-1').rstrip()
+        id_info = id_info.split(' ')
+        if len(id_info):
+            patient['id'] = id_info[0]
+            if len(id_info) == 4:
+                try:
+                    birthdate = datetime.strptime(id_info[2], "%d-%b-%Y")
+                except ValueError:
+                    birthdate = "X"
+                patient['sex'] = id_info[1]
+                patient['birthday'] = birthdate
+                patient['name'] = id_info[3]
 
         # Recording ID
         meas_id = {}
-        meas_id['recording_id'] = fid.read(80).decode('latin-1').strip(' \x00')
+        rec_info = fid.read(80).decode('latin-1').rstrip().split(' ')
+        valid_startdate = False
+        if len(rec_info) == 5:
+            try:
+                startdate = datetime.strptime(rec_info[1], "%d-%b-%Y")
+            except ValueError:
+                startdate = "X"
+            else:
+                valid_startdate = True
+            meas_id['startdate'] = startdate
+            meas_id['study_id'] = rec_info[2]
+            meas_id['technician'] = rec_info[3]
+            meas_id['equipment'] = rec_info[4]
+
+        # If startdate available in recording info, use it instead of the
+        # file's meas_date since it contains all 4 digits of the year
+        if valid_startdate:
+            day = meas_id['startdate'].day
+            month = meas_id['startdate'].month
+            year = meas_id['startdate'].year
+            fid.read(8)  # skip file's meas_date
+        else:
+            meas_date = fid.read(8).decode('latin-1')
+            day, month, year = [int(x) for x in meas_date.split('.')]
+            year = year + 2000 if year < 85 else year + 1900
 
-        day, month, year = [int(x) for x in
-                            re.findall(r'(\d+)', fid.read(8).decode())]
-        hour, minute, sec = [int(x) for x in
-                             re.findall(r'(\d+)', fid.read(8).decode())]
-        century = 2000 if year < 50 else 1900
+        meas_time = fid.read(8).decode('latin-1')
+        hour, minute, sec = [int(x) for x in meas_time.split('.')]
         try:
-            meas_date = datetime(year + century, month, day, hour, minute, sec,
+            meas_date = datetime(year, month, day, hour, minute, sec,
                                  tzinfo=timezone.utc)
+            print(meas_date)
         except ValueError:
-            warn(f'Invalid date encountered ({year + century:04d}-{month:02d}-'
+            warn(f'Invalid date encountered ({year:04d}-{month:02d}-'
                  f'{day:02d} {hour:02d}:{minute:02d}:{sec:02d}).')
             meas_date = None
 

diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py
@@ -20,13 +20,11 @@
 
 from mne import pick_types, Annotations
 from mne.datasets import testing
-from mne.utils import run_tests_if_main, requires_pandas, _TempDir
-from mne.io import read_raw_edf, read_raw_bdf
+from mne.utils import requires_pandas
+from mne.io import read_raw_edf, read_raw_bdf, read_raw_fif
 from mne.io.tests.test_raw import _test_raw_reader
-from mne.io.edf.edf import _get_edf_default_event_id
-from mne.io.edf.edf import _read_annotations_edf
-from mne.io.edf.edf import _read_ch
-from mne.io.edf.edf import _parse_prefilter_string
+from mne.io.edf.edf import (_get_edf_default_event_id, _read_annotations_edf,
+                            _read_ch, _parse_prefilter_string, _edf_str_int)
 from mne.io.pick import channel_indices_by_type, get_channel_type_constants
 from mne.annotations import events_from_annotations, read_annotations
 
@@ -71,6 +69,21 @@ def test_orig_units():
     assert orig_units['A1'] == 'µV'  # formerly 'uV' edit by _check_orig_units
 
 
+def test_subject_info(tmpdir):
+    """Test exposure of original channel units."""
+    raw = read_raw_edf(edf_path)
+    assert raw.info['subject_info'] is None  # XXX this is arguably a bug
+    edf_info = raw._raw_extras[0]
+    assert edf_info['subject_info'] is not None
+    want = {'id': 'X', 'sex': 'X', 'birthday': 'X', 'name': 'X'}
+    for key, val in want.items():
+        assert edf_info['subject_info'][key] == val, key
+    fname = tmpdir.join('test_raw.fif')
+    raw.save(fname)
+    raw = read_raw_fif(fname)
+    assert raw.info['subject_info'] is None  # XXX should eventually round-trip
+
+
 def test_bdf_data():
     """Test reading raw bdf files."""
     # XXX BDF data for these is around 0.01 when it should be in the uV range,
@@ -399,13 +412,17 @@ def test_edf_annot_sub_s_onset():
     assert_allclose(raw.annotations.onset, [1.951172, 3.492188])
 
 
-def test_invalid_date():
+def test_invalid_date(tmpdir):
     """Test handling of invalid date in EDF header."""
-    tempdir = _TempDir()
+    tempdir = str(tmpdir)
     with open(edf_path, 'rb') as f:  # read valid test file
         edf = bytearray(f.read())
+
     # original date in header is 29.04.14 (2014-04-29) at pos 168:176
+    # but we also use Startdate if available,
+    # which starts at byte 88 and is b'Startdate 29-APR-2014 X X X'
     # create invalid date 29.02.14 (2014 is not a leap year)
+    edf[101:104] = b'FEB'
     edf[172] = ord('2')
     with open(op.join(tempdir, "temp.edf"), "wb") as f:
         f.write(edf)
@@ -420,4 +437,7 @@ def test_invalid_date():
         read_raw_edf(op.join(tempdir, "temp.edf"), preload=True)
 
 
-run_tests_if_main()
+def test_empty_chars():
+    """Test blank char support."""
+    # from gitter
+    assert _edf_str_int(b'1819\x00 ') == 1819
diff --git a/mne/io/meas_info.py b/mne/io/meas_info.py
@@ -896,7 +896,7 @@ def write_dig(fname, pts, coord_frame=None):
         here. Can be None (default) if the points could have varying
         coordinate frames.
     """
-    return _dig_write_dig(fname, pts, coord_frame=None)
+    return _dig_write_dig(fname, pts, coord_frame=coord_frame)
 
 
 @verbose

diff --git a/mne/tests/test_morph.py b/mne/tests/test_morph.py
@@ -27,7 +27,7 @@
                               _get_atlas_values, _add_interpolator,
                               _grid_interp)
 from mne.transforms import quat_to_rot
-from mne.utils import (requires_nibabel, check_version,
+from mne.utils import (requires_nibabel, check_version, requires_version,
                        requires_dipy, requires_h5py, catch_logging)
 from mne.fixes import _get_args
 
@@ -912,7 +912,7 @@ def _rand_affine(rng):
 
 
 @requires_nibabel()
-@requires_dipy()
+@requires_version('dipy', '1.3')
 @pytest.mark.parametrize('from_shape', _shapes)
 @pytest.mark.parametrize('from_affine', _affines)
 @pytest.mark.parametrize('to_shape', _shapes)