Skip to content

Commit

Permalink
G5compare: adding option to check for data-type (#28)
Browse files Browse the repository at this point in the history
  • Loading branch information
tdegeus committed May 26, 2020
1 parent 57cfeb6 commit 6db11fd
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 17 deletions.
2 changes: 1 addition & 1 deletion GooseHDF5/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
warnings.filterwarnings("ignore")


__version__ = '0.7.3'
__version__ = '0.7.4'


def abspath(path):
Expand Down
37 changes: 22 additions & 15 deletions GooseHDF5/cli/G5compare.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
'''G5compare
Compare two HDF5 files. If the function does not output anything all datasets are present in both
files, and all the content of the datasets is equals
files, and all the content of the datasets is equal.
Each output line corresponds to a mismatch between the files.
Usage:
G5compare [options] [--renamed ARG]... <source> <other>
Expand All @@ -10,6 +11,7 @@
<other> HDF5-file.
Options:
-t, --dtype Verify that the type of the datasets match.
-r, --renamed=ARG Renamed paths, separated by a separator (see below).
-s, --ifs=ARG Separator used to separate renamed fields. [default: :]
-h, --help Show help.
Expand All @@ -35,20 +37,25 @@ def check_isfile(fname):
raise IOError('"{0:s}" does not exist'.format(fname))


def check_dataset(source, dest, source_dataset, dest_dataset=None):
def check_dataset(source, dest, source_dataset, dest_dataset, check_dtype):
r'''
Check if the datasets (read outside) "a" and "b" are equal. If not print a message with the "path"
to the screen and return "False".
'''

if not equal(source, dest, source_dataset, dest_dataset):
print('!= {0:s}'.format(source_dataset))
print(' != {0:s}'.format(source_dataset))
return False

if check_dtype:
if source[source_dataset].dtype != dest[dest_dataset].dtype:
print('type {0:s}'.format(source_dataset))
return False

return True


def _check_plain(source, other):
def _check_plain(source, other, check_dtype):
r'''
Support function for "check_plain."
'''
Expand All @@ -63,19 +70,19 @@ def _check_plain(source, other):

for path in getpaths(source):
if path in other:
check_dataset(source, other, path)
check_dataset(source, other, path, path, check_dtype)


def check_plain(source_name, other_name):
def check_plain(source_name, other_name, check_dtype):
r'''
Check all datasets (without allowing for renamed datasets).
'''
with h5py.File(source_name, 'r') as source:
with h5py.File(other_name, 'r') as other:
_check_plain(source, other)
_check_plain(source, other, check_dtype)


def _check_renamed(source, other, renamed):
def _check_renamed(source, other, renamed, check_dtype):
r'''
Support function for "check_renamed."
'''
Expand All @@ -89,26 +96,26 @@ def _check_renamed(source, other, renamed):

for _, path in s2o.items():
if path not in o2s:
print('-> {0:s}'.format(path))
print(' -> {0:s}'.format(path))

for _, path in o2s.items():
if path not in s2o:
print('<- {0:s}'.format(path))
print(' <- {0:s}'.format(path))

for new_path, path in s2o.items():
if new_path in o2s:
check_dataset(source, other, path, new_path)
check_dataset(source, other, path, new_path, check_dtype)


def check_renamed(source_name, other_name, renamed):
def check_renamed(source_name, other_name, renamed, check_dtype):
r'''
Check all datasets while allowing for renamed datasets.
renamed = [['source_name1', 'other_name1'], ['source_name2', 'other_name2'], ...]
'''

with h5py.File(source_name, 'r') as source:
with h5py.File(other_name, 'r') as other:
_check_renamed(source, other, renamed)
_check_renamed(source, other, renamed, check_dtype)


def main():
Expand All @@ -122,9 +129,9 @@ def main():
check_isfile(args['<other>'])

if len(args['--renamed']) == 0:
check_plain(args['<source>'], args['<other>'])
check_plain(args['<source>'], args['<other>'], args['--dtype'])
sys.exit(0)

renamed = [i.split(args['--ifs']) for i in args['--renamed']]

check_renamed(args['<source>'], args['<other>'], renamed)
check_renamed(args['<source>'], args['<other>'], renamed, args['--dtype'])
2 changes: 1 addition & 1 deletion test/cli/G5compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def run(cmd):

output = sorted(run("G5compare a.hdf5 b.hdf5 -r /d/equal:/e/equal"))

expected_output = sorted(['!= /a/not_equal', '!= /b/not_equal', '!= /c/not_equal', '!= /f/not_equal'])
expected_output = sorted([' != /a/not_equal', ' != /b/not_equal', ' != /c/not_equal', ' != /f/not_equal'])

os.remove('a.hdf5')
os.remove('b.hdf5')
Expand Down

0 comments on commit 6db11fd

Please sign in to comment.