Skip to content

Commit

Permalink
[CLI] Adding compression & copy of attributes (#24)
Browse files Browse the repository at this point in the history
  • Loading branch information
tdegeus committed May 18, 2020
1 parent 19b7135 commit 1fcb9f7
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 17 deletions.
39 changes: 27 additions & 12 deletions GooseHDF5/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
warnings.filterwarnings("ignore")


__version__ = '0.6.0'
__version__ = '0.7.0'


def abspath(path):
Expand Down Expand Up @@ -504,16 +504,15 @@ def copydatasets(source, dest, source_datasets, dest_datasets=None, root=None):
group = posixpath.split(dest_path)[0]
source.copy(source_path, dest[group], posixpath.split(dest_path)[1])


def _equal(a, b):
def _equal_value(a, b):

import numpy as np

if isinstance(a, h5py.Group) and isinstance(b, h5py.Group):
return True

if not isinstance(a, h5py.Dataset) or not isinstance(b, h5py.Dataset):
raise IOError('Not a Dataset')
if type(a) == str:
if type(b) == str:
return a == b
else:
return False

if np.issubdtype(a.dtype, np.number) and np.issubdtype(b.dtype, np.number):
if np.allclose(a, b):
Expand All @@ -530,12 +529,28 @@ def _equal(a, b):
else:
return False

if list(a) == list(b):
return list(a) == list(b)


def _equal(a, b):

if isinstance(a, h5py.Group) and isinstance(b, h5py.Group):
return True
else:
return False

return True
if not isinstance(a, h5py.Dataset) or not isinstance(b, h5py.Dataset):
raise IOError('Not a Dataset')

for key in a.attrs:
if key not in b.attrs:
return False
if not _equal_value(a.attrs[key], b.attrs[key]):
return False

for key in b.attrs:
if key not in a.attrs:
return False

return _equal_value(a, b)


def equal(source, dest, source_dataset, dest_dataset=None):
Expand Down
16 changes: 15 additions & 1 deletion GooseHDF5/cli/G5repack.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
<source> HDF5-file.
Options:
-c, --compress Apply compression (using the loss-less GZip algorithm).
-h, --help Show help.
--version Show version.
Expand All @@ -28,6 +29,19 @@ def check_isfile(fname):
if not os.path.isfile(fname):
raise IOError('"{0:s}" does not exist'.format(fname))

def copy_dataset(old, new, path, compress):

data = old[path][...]

if data.size == 1 or not compress:
new[path] = old[path][...]
else:
dset = new.create_dataset(path, data.shape, compression="gzip")
dset[:] = data

for key in old[path].attrs:
new[path].attrs[key] = old[path].attrs[key]

def main():

args = docopt.docopt(__doc__, version=__version__)
Expand All @@ -42,6 +56,6 @@ def main():
with h5py.File(filename, 'r') as source:
with h5py.File(tempname, 'w') as tmp:
for path in getpaths(source):
tmp[path] = source[path][...]
copy_dataset(source, tmp, path, args['--compress'])

os.replace(tempname, filename)
16 changes: 15 additions & 1 deletion test/cli/G5compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,24 @@ def run(cmd):
source['/d/equal'] = d
other['/e/equal'] = d

# attribute

f = np.random.random(25)

source['/f/equal'] = f
source['/f/equal'].attrs['key'] = f
source['/f/not_equal'] = f
source['/f/not_equal'].attrs['key'] = f

other['/f/equal'] = f
other['/f/equal'].attrs['key'] = f
other['/f/not_equal'] = f
other['/f/not_equal'].attrs['key'] = np.random.random(25)


output = sorted(run("G5compare a.hdf5 b.hdf5 -r /d/equal:/e/equal"))

expected_output = sorted(['!= /a/not_equal', '!= /b/not_equal', '!= /c/not_equal'])
expected_output = sorted(['!= /a/not_equal', '!= /b/not_equal', '!= /c/not_equal', '!= /f/not_equal'])

os.remove('a.hdf5')
os.remove('b.hdf5')
Expand Down
6 changes: 3 additions & 3 deletions test/cli/G5repack.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@ def run(cmd):
out = list(filter(None, subprocess.check_output(cmd, shell=True).decode('utf-8').split('\n')))
return out

a = np.random.random(5)
a = np.random.random(1000)

with h5py.File('a.hdf5', 'w') as source:
source['/a'] = a

output = run('G5repack a.hdf5')
output = run('G5repack -c a.hdf5')

with h5py.File('a.hdf5', 'r') as source:
b = source['/a'][...]

os.remove('a.hdf5')

if not np.all(a == b):
if not np.allclose(a, b):
raise IOError('Test failed')

0 comments on commit 1fcb9f7

Please sign in to comment.