Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 51 additions & 26 deletions mne/io/cnt/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from math import modf
from datetime import datetime
import numpy as np
from os import SEEK_END

from ...utils import warn

Expand Down Expand Up @@ -96,7 +97,7 @@ def _session_date_2_meas_date(session_date, date_format):
return (int_part, frac_part)


def _compute_robust_event_table_position(fid):
def _compute_robust_event_table_position(fid, data_format):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need the data_format? we could always test agains [2, 4] and who ever matches we return it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand properly we start searching for this event_table_pos at whatever is in the SETUP and we keep incrementing by unit32. What if its 16?

Yes, but why 16? For example, if the value of readed event table pos is 3 , and the file size is 30 bytes, the maximum of uint32 is 10. The real event table pos must in [3, 13, 23], right? And now, n_bytes is unknown, if cal_event_table=5, when n_bytes=2 and cal_event_table=13, when n_bytes=4. Then we can say the overflow occurs, real event table pos is 13, n_bytes=4.

What if we try to replicate the process that generate the value of the SETUP? If I understand properly the value depends on n_bytes, sfreq, n_samples and n_channels. We have them all but n_bytes.
Yes, tha's the reason, we will try both n_bytes =2 and n_bytes = 4.

We can carry the computation twice one with int32 and once with np.int64 or c_longlong then we detect the overflow and compare the result of the overflow with whatever is in the event_table_pos. If they match it means that we found it.
Yes, and Maybe int16 and int32.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need the data_format? we could always test agains [2, 4] and who ever matches we return it.

The inference is based on the fact n_samples is reliable. But sometimes it not true (the 914flankers.cnt case), which makes the inference not so reliable. At that time, use the data format from user, could help use find out the correct event table pos correctly.

"""Compute `event_table_position`.

When recording event_table_position is computed (as accomulation). If the
Expand All @@ -112,40 +113,64 @@ def _compute_robust_event_table_position(fid):
x_xxxxxxxx : xxxxxxxx xx Xxxxxxxxxxx
Xxx xxxxxxxxxxx.
"""

def _infer_n_bytes_event_table_pos(readed_event_table_pos):
readed_event_table_pos_feature = np.binary_repr(
readed_event_table_pos).lstrip('-')

for n_bytes in [2, 4]:
computed_event_table_pos = (
900 + 75 * int(n_channels) +
n_bytes * int(n_channels) * int(n_samples))

if (
np.binary_repr(computed_event_table_pos)
.endswith(readed_event_table_pos_feature)
):
return n_bytes, computed_event_table_pos

raise Exception("event_table_dismatch")

SETUP_NCHANNELS_OFFSET = 370
SETUP_NSAMPLES_OFFSET = 864
SETUP_EVENTTABLEPOS_OFFSET = 886

def get_most_possible_sol(fid, possible_n_bytes, n_samples, n_channels):
"""Find the most possible solution

Since both event table position and n_bytes has many possible values,
and n_samples might be not so accurate, distance between the possible
event table position and calculated event table position is used to
find the most possible combination of the event table position and
n_bytes.

When the distance of the solution find is not equals to 0, there is
a mismatch between the n_samples and the event_table_pos.
"""
sol_table = []
for event_table_pos in possible_event_table_pos(fid):
for n_bytes in possible_n_bytes:
calc_event_table_pos = 900 + 75 * n_channels + \
n_bytes * n_channels * n_samples
distance = abs(calc_event_table_pos - event_table_pos)

if distance == 0:
return event_table_pos, n_bytes, distance
sol_table.append((event_table_pos, n_bytes, distance))
return sorted(sol_table, key=lambda x: x[2])[0]

def possible_event_table_pos(fid):
"""Yield all the possible event table position"""
fid.seek(SETUP_EVENTTABLEPOS_OFFSET)
event_table_pos = int(np.frombuffer(fid.read(4), dtype='<u4')[0])
file_size = fid.seek(0, SEEK_END)

while event_table_pos <= file_size:
yield event_table_pos
event_table_pos = event_table_pos + np.iinfo(np.uint32).max + 1

fid_origin = fid.tell() # save the state

fid.seek(SETUP_NSAMPLES_OFFSET)
(n_samples,) = np.frombuffer(fid.read(4), dtype='<i4')
n_samples = int(np.frombuffer(fid.read(4), dtype='<i4')[0])

fid.seek(SETUP_NCHANNELS_OFFSET)
(n_channels,) = np.frombuffer(fid.read(2), dtype='<u2')

fid.seek(SETUP_EVENTTABLEPOS_OFFSET)
(event_table_pos,) = np.frombuffer(fid.read(4), dtype='<i4')

n_bytes, event_table_pos = _infer_n_bytes_event_table_pos(event_table_pos)
n_channels = int(np.frombuffer(fid.read(2), dtype='<u2')[0])

if data_format == 'auto':
possible_n_bytes = [2, 4]
elif data_format == 'int16':
possible_n_bytes = [2]
elif data_format == 'int32':
possible_n_bytes = [4]
else:
raise Exception("Correct data format required: 'auto','int16' or 'int32'.")

event_table_pos, n_bytes, distance = get_most_possible_sol(fid, possible_n_bytes,
n_samples, n_channels)
if distance != 0:
warn("Metadata doesn't match so well, the samples might not loaded completely.")
fid.seek(fid_origin) # restore the state
return n_channels, n_samples, event_table_pos, n_bytes
5 changes: 2 additions & 3 deletions mne/io/cnt/cnt.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from ..base import BaseRaw
from ...annotations import Annotations


from ._utils import (_read_teeg, _get_event_parser, _session_date_2_meas_date,
_compute_robust_event_table_position, CNTEventType3)

Expand Down Expand Up @@ -57,7 +56,7 @@ def _translating_function(offset, event_type):
(sfreq,) = np.frombuffer(fid.read(2), dtype='<u2')

n_channels, n_samples, event_table_pos, n_bytes = (
_compute_robust_event_table_position(fid))
_compute_robust_event_table_position(fid, data_format))

with open(fname, 'rb') as fid:
teeg = _read_teeg(fid, teeg_offset=event_table_pos)
Expand Down Expand Up @@ -224,7 +223,7 @@ def _get_cnt_info(input_fname, eog, ecg, emg, misc, data_format, date_format,
count=1)[0]

n_channels, n_samples, event_offset, n_bytes = (
_compute_robust_event_table_position(fid))
_compute_robust_event_table_position(fid, data_format))

# Channel offset refers to the size of blocks per channel in the file.
cnt_info['channel_offset'] = np.fromfile(fid, dtype='<i4', count=1)[0]
Expand Down