Skip to content

Commit

Permalink
Merge 034c276 into 7877add
Browse files Browse the repository at this point in the history
  • Loading branch information
yarikoptic committed Oct 2, 2018
2 parents 7877add + 034c276 commit c9ad72c
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 33 deletions.
168 changes: 142 additions & 26 deletions nibabel/cmdline/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,21 @@ def get_opt_parser():
Option("-H", "--header-fields",
dest="header_fields", default='all',
help="Header fields (comma separated) to be printed as well (if present)"),

Option("--ma", "--data-max-abs-diff",
dest="data_max_abs_diff",
type=float,
default=0.0,
help="Maximal absolute difference in data between files to tolerate."),

Option("--mr", "--data-max-rel-diff",
dest="data_max_rel_diff",
type=float,
default=0.0,
help="Maximal relative difference in data between files to tolerate."
" If --data-max-abs-diff is also specified, only the data points "
" with absolute difference greater than that value would be "
" considered for relative difference check."),
])

return p
Expand Down Expand Up @@ -101,8 +116,8 @@ def get_headers_diff(file_headers, names=None):
return difference


def get_data_diff(files):
"""Get difference between md5 values
def get_data_hash_diff(files):
"""Get difference between md5 values of data
Parameters
----------
Expand All @@ -115,7 +130,7 @@ def get_data_diff(files):
"""

md5sums = [
hashlib.md5(np.ascontiguousarray(nib.load(f).get_data(), dtype=np.float32)).hexdigest()
hashlib.md5(np.ascontiguousarray(nib.load(f).get_fdata(dtype=np.float32))).hexdigest()
for f in files
]

Expand All @@ -125,6 +140,83 @@ def get_data_diff(files):
return md5sums


def get_data_diff(files, max_abs=0, max_rel=0):
"""Get difference between data
Parameters
----------
files: list of (str or ndarray)
If list of strings is provided -- they must be existing file names
max_abs: float, optional
Maximal absolute difference to tolerate.
max_rel: float, optional
Maximal relative (`abs(diff)/mean(diff)`) difference to tolerate.
If `max_abs` is specified, then those data points with lesser than that
absolute difference, are not considered for relative difference testing
Returns
-------
diffs: OrderedDict
An ordered dict with a record per each file which has differences
with other files subsequent detected. Each record is a list of
difference records, one per each file pair.
Each difference record is an Ordered Dict with possible keys
'abs' or 'rel' showing maximal absolute or relative differences
in the file or the record ('CMP': 'incompat') if file shapes
are incompatible.
"""

# we are doomed to keep them in RAM now
data = [f if isinstance(f, np.ndarray) else nib.load(f).get_fdata(dtype=np.float32) for f in files]
diffs = OrderedDict()
for i, d1 in enumerate(data[:-1]):
# populate empty entries for non-compared
diffs1 = [None] * (i + 1)

for j, d2 in enumerate(data[i + 1:], i + 1):

if d1.shape == d2.shape:
abs_diff = np.abs(d1 - d2)
mean_abs = (np.abs(d1) + np.abs(d2)) * 0.5
candidates = np.logical_or(mean_abs != 0, abs_diff != 0)

if max_abs:
candidates[abs_diff <= max_abs] = False

max_abs_diff = np.max(abs_diff)
if np.any(candidates):
rel_diff = abs_diff[candidates] / mean_abs[candidates]
if max_rel:
sub_thr = rel_diff <= max_rel
# Since we operated on sub-selected values already, we need
# to plug them back in
candidates[
tuple((indexes[sub_thr] for indexes in np.where(candidates)))
] = False
max_rel_diff = np.max(rel_diff)
else:
max_rel_diff = 0

if np.any(candidates):

diff_rec = OrderedDict() # so that abs goes before relative

diff_rec['abs'] = max_abs_diff
diff_rec['rel'] = max_rel_diff
diffs1.append(diff_rec)
else:
diffs1.append(None)

else:
diffs1.append({'CMP': "incompat"})

if any(diffs1):

diffs['DATA(diff %d:)' % (i + 1)] = diffs1

return diffs


def display_diff(files, diff):
"""Format header differences into a nice string
Expand All @@ -140,21 +232,27 @@ def display_diff(files, diff):
"""
output = ""
field_width = "{:<15}"
filename_width = "{:<53}"
value_width = "{:<55}"

output += "These files are different.\n"
output += field_width.format('Field')
output += field_width.format('Field/File')

for f in files:
output += value_width.format(os.path.basename(f))
for i, f in enumerate(files, 1):
output += "%d:%s" % (i, filename_width.format(os.path.basename(f)))

output += "\n"

for key, value in diff.items():
output += field_width.format(key)

for item in value:
item_str = str(item)
if isinstance(item, dict):
item_str = ', '.join('%s: %s' % i for i in item.items())
elif item is None:
item_str = '-'
else:
item_str = str(item)
# Value might start/end with some invisible spacing characters so we
# would "condition" it on both ends a bit
item_str = re.sub('^[ \t]+', '<', item_str)
Expand All @@ -169,8 +267,37 @@ def display_diff(files, diff):
return output


def diff(files, header_fields='all', data_max_abs_diff=None, data_max_rel_diff=None):
assert len(files) >= 2, "Please enter at least two files"

file_headers = [nib.load(f).header for f in files]

# signals "all fields"
if header_fields == 'all':
# TODO: header fields might vary across file types, thus prior sensing would be needed
header_fields = file_headers[0].keys()
else:
header_fields = header_fields.split(',')

diff = get_headers_diff(file_headers, header_fields)

data_md5_diffs = get_data_hash_diff(files)
if data_md5_diffs:
# provide details, possibly triggering the ignore of the difference
# in data
data_diffs = get_data_diff(files,
max_abs=data_max_abs_diff,
max_rel=data_max_rel_diff)
if data_diffs:
diff['DATA(md5)'] = data_md5_diffs
diff.update(data_diffs)

return diff


def main(args=None, out=None):
"""Getting the show on the road"""

out = out or sys.stdout
parser = get_opt_parser()
(opts, files) = parser.parse_args(args)
Expand All @@ -181,27 +308,16 @@ def main(args=None, out=None):
# suppress nibabel format-compliance warnings
nib.imageglobals.logger.level = 50

assert len(files) >= 2, "Please enter at least two files"

file_headers = [nib.load(f).header for f in files]

# signals "all fields"
if opts.header_fields == 'all':
# TODO: header fields might vary across file types, thus prior sensing would be needed
header_fields = file_headers[0].keys()
else:
header_fields = opts.header_fields.split(',')

diff = get_headers_diff(file_headers, header_fields)
data_diff = get_data_diff(files)

if data_diff:
diff['DATA(md5)'] = data_diff
files_diff = diff(
files,
header_fields=opts.header_fields,
data_max_abs_diff=opts.data_max_abs_diff,
data_max_rel_diff=opts.data_max_rel_diff
)

if diff:
out.write(display_diff(files, diff))
if files_diff:
out.write(display_diff(files, files_diff))
raise SystemExit(1)

else:
out.write("These files are identical.\n")
raise SystemExit(0)
40 changes: 35 additions & 5 deletions nibabel/cmdline/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import nibabel as nib
import numpy as np
from nibabel.cmdline.utils import *
from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_diff
from nibabel.cmdline.diff import get_headers_diff, display_diff, main, get_data_hash_diff, get_data_diff
from os.path import (join as pjoin)
from nibabel.testing import data_path
from collections import OrderedDict
Expand Down Expand Up @@ -96,9 +96,9 @@ def test_display_diff():
("bitpix", [np.array(8).astype(dtype="uint8"), np.array(16).astype(dtype="uint8")])
])

expected_output = "These files are different.\n" + "Field hellokitty.nii.gz" \
" " \
"privettovarish.nii.gz \n" \
expected_output = "These files are different.\n" + "Field/File 1:hellokitty.nii.gz" \
" " \
"2:privettovarish.nii.gz \n" \
"datatype " \
"2 " \
"4 \n" \
Expand All @@ -114,7 +114,37 @@ def test_get_data_diff():
# testing for identical files specifically as md5 may vary by computer
test_names = [pjoin(data_path, f)
for f in ('standard.nii.gz', 'standard.nii.gz')]
assert_equal(get_data_diff(test_names), [])
assert_equal(get_data_hash_diff(test_names), [])

# testing the maximum relative and absolute differences' different use cases
test_array = np.arange(16).reshape(4, 4)
test_array_2 = np.arange(1, 17).reshape(4, 4)
test_array_3 = np.arange(2, 18).reshape(4, 4)
test_array_4 = np.arange(100).reshape(10, 10)
test_array_5 = np.arange(64).reshape(8, 8)

# same shape, 2 files
assert_equal(get_data_diff([test_array, test_array_2]),
OrderedDict([('DATA(diff 1:)', [None, OrderedDict([('abs', 1), ('rel', 2.0)])])]))

# same shape, 3 files
assert_equal(get_data_diff([test_array, test_array_2, test_array_3]),
OrderedDict([('DATA(diff 1:)', [None, OrderedDict([('abs', 1), ('rel', 2.0)]),
OrderedDict([('abs', 2), ('rel', 2.0)])]),
('DATA(diff 2:)', [None, None,
OrderedDict([('abs', 1), ('rel', 0.66666666666666663)])])]))

# same shape, 2 files, modified maximum abs/rel
assert_equal(get_data_diff([test_array, test_array_2], max_abs=2, max_rel=2), OrderedDict())

# different shape, 2 files
assert_equal(get_data_diff([test_array_2, test_array_4]),
OrderedDict([('DATA(diff 1:)', [None, {'CMP': 'incompat'}])]))

# different shape, 3 files
assert_equal(get_data_diff([test_array_4, test_array_5, test_array_2]),
OrderedDict([('DATA(diff 1:)', [None, {'CMP': 'incompat'}, {'CMP': 'incompat'}]),
('DATA(diff 2:)', [None, None, {'CMP': 'incompat'}])]))


def test_main():
Expand Down
4 changes: 2 additions & 2 deletions nibabel/tests/test_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ def check_nib_diff_examples():
fnames = [pjoin(DATA_PATH, f)
for f in ('standard.nii.gz', 'example4d.nii.gz')]
code, stdout, stderr = run_command(['nib-diff'] + fnames, check_code=False)
checked_fields = ["Field", "regular", "dim_info", "dim", "datatype", "bitpix", "pixdim", "slice_end",
checked_fields = ["Field/File", "regular", "dim_info", "dim", "datatype", "bitpix", "pixdim", "slice_end",
"xyzt_units", "cal_max", "descrip", "qform_code", "sform_code", "quatern_b",
"quatern_c", "quatern_d", "qoffset_x", "qoffset_y", "qoffset_z", "srow_x",
"srow_y", "srow_z", "DATA(md5)"]
"srow_y", "srow_z", "DATA(md5)", "DATA(diff 1:)"]
for item in checked_fields:
assert_true(item in stdout)

Expand Down

0 comments on commit c9ad72c

Please sign in to comment.