Skip to content

Commit

Permalink
Simplifying implementation of "G5compare". Updating docs. (#16)
Browse files Browse the repository at this point in the history
  • Loading branch information
tdegeus committed Dec 7, 2019
1 parent 181790a commit b722c8a
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 32 deletions.
96 changes: 95 additions & 1 deletion GooseHDF5/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import h5py

__version__ = '0.3.0'
__version__ = '0.3.1'

# ==================================================================================================

Expand Down Expand Up @@ -338,6 +338,14 @@ def filter_datasets(data, paths):
datasets = GooseHDF5.filter_datasets(data,
GooseHDF5.getpaths(data, max_depth=2, fold='/data'))
:arguments:
**data** (``<h5py.File>``)
A HDF5-archive.
**paths** (``<list<str>>``)
A list of paths to datasets.
'''

import re
Expand All @@ -353,6 +361,20 @@ def verify(data, datasets, error=False):
r'''
Try reading each dataset of a list of datasets. Return a list with only those datasets that can be
successfully opened.
:arguments:
**data** (``<h5py.File>``)
A HDF5-archive.
**datasets** (``<list<str>>``)
A list of paths to datasets.
:option:
**error** ([``False``] | ``True``)
If true, the function raises an error if reading failed. If false, the function just
continues.
'''

out = []
Expand All @@ -376,6 +398,14 @@ def verify(data, datasets, error=False):
def exists(data, path):
r'''
Check if a path exists in the HDF5-archive.
:arguments:
**data** (``<h5py.File>``)
A HDF5-archive.
**path** (``<str>``)
A path to datasets.
'''

if path in data:
Expand All @@ -388,6 +418,14 @@ def exists(data, path):
def exists_any(data, paths):
r'''
Check if any of the input paths exists in the HDF5-archive.
:arguments:
**data** (``<h5py.File>``)
A HDF5-archive.
**paths** (``<list<str>>``)
A list of paths to datasets.
'''

if type(paths) == str:
Expand All @@ -404,6 +442,14 @@ def exists_any(data, paths):
def exists_all(data, paths):
r'''
Check if all of the input paths exists in the HDF5-archive.
:arguments:
**data** (``<h5py.File>``)
A HDF5-archive.
**paths** (``<list<str>>``)
A list of paths to datasets.
'''

if type(paths) == str:
Expand All @@ -424,6 +470,23 @@ def copydatasets(source, dest, source_datasets, dest_datasets=None, root=None):
'source_datasets').
In addition a 'root' (path prefix) for the destination datasets name can be specified.
:arguments:
**source, dest** (``<h5py.File>``)
A HDF5-archive.
**source_datatsets** (``<list<str>>``)
A list of paths to datasets in "source".
:options:
**dest_datasets** (``<list<str>>``)
A list of paths to datasets in "dest".
If not specified, it is taken equal to "source_datasets".
**root** (``<str>``)
Path prefix for all 'dest_datasets'.
'''

import posixpath
Expand Down Expand Up @@ -492,6 +555,23 @@ def _equal(a, b):
# --------------------------------------------------------------------------------------------------

def equal(source, dest, source_dataset, dest_dataset=None):
r'''
Check that a dataset is equal in both files.
:arguments:
**source, dest** (``<h5py.File>``)
A HDF5-archive.
**source_datatset** (``<str>``)
The path to a dataset in ``source``.
:options:
**dest_dataset** (``<str>``)
The path to a dataset in ``dest``.
If not specified, it is taken equal to ``source_dataset``.
'''

if not dest_dataset:
dest_dataset = source_dataset
Expand All @@ -509,6 +589,20 @@ def equal(source, dest, source_dataset, dest_dataset=None):
def allequal(source, dest, source_datasets, dest_datasets=None):
r'''
Check that all listed datasets are equal in both files.
:arguments:
**source, dest** (``<h5py.File>``)
A HDF5-archive.
**source_datatsets** (``<list<str>>``)
A list of paths to datasets in "source".
:options:
**dest_datasets** (``<list<str>>``)
A list of paths to datasets in "dest".
If not specified, it is taken equal to "source_datasets".
'''

if not dest_datasets:
Expand Down
38 changes: 7 additions & 31 deletions GooseHDF5/cli/G5compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

from .. import __version__
from .. import getpaths
from .. import equal

# --------------------------------------------------------------------------------------------------
# Check if a file exists, quit otherwise.
Expand All @@ -41,41 +42,16 @@ def check_isfile(fname):
if not os.path.isfile(fname):
raise IOError('"{0:s}" does not exist'.format(fname))

# --------------------------------------------------------------------------------------------------
# Support function for "check_dataset".
# --------------------------------------------------------------------------------------------------

def not_equal(path):

print('!= {0:s}'.format(path))
return False

# --------------------------------------------------------------------------------------------------
# Check if the datasets (read outside) "a" and "b" are equal. If not print a message with the "path"
# to the screen and return "False".
# --------------------------------------------------------------------------------------------------

def check_dataset(path, a, b):

if np.issubdtype(a.dtype, np.number) and np.issubdtype(b.dtype, np.number):
if np.allclose(a, b):
return True
else:
return not_equal(path)

if a.size != b.size:
return not_equal(path)

if a.size == 1:
if a[...] == b[...]:
return True
else:
return not_equal(path)
def check_dataset(source, dest, source_dataset, dest_dataset=None):

if list(a) == list(b):
return True
else:
return not_equal(path)
if not equal(source, dest, source_dataset, dest_dataset):
print('!= {0:s}'.format(source_dataset))
return False

return True

Expand All @@ -95,7 +71,7 @@ def _check_plain(source, other):

for path in getpaths(source):
if path in other:
check_dataset(path, source[path][...], other[path][...])
check_dataset(source, other, path)

# --------------------------------------------------------------------------------------------------

Expand Down Expand Up @@ -128,7 +104,7 @@ def _check_renamed(source, other, renamed):

for new_path, path in s2o.items():
if new_path in o2s:
check_dataset(path, source[path][...], other[new_path][...])
check_dataset(source, other, path, new_path)

# --------------------------------------------------------------------------------------------------

Expand Down

0 comments on commit b722c8a

Please sign in to comment.