Skip to content

Commit

Permalink
Bugfix: don't compress non-numeric arrays (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
tdegeus committed May 25, 2020
1 parent 454bf2c commit fecfebf
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 5 deletions.
19 changes: 18 additions & 1 deletion GooseHDF5/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
warnings.filterwarnings("ignore")


__version__ = '0.7.1'
__version__ = '0.7.2'


def abspath(path):
Expand Down Expand Up @@ -504,6 +504,23 @@ def copydatasets(source, dest, source_datasets, dest_datasets=None, root=None):
group = posixpath.split(dest_path)[0]
source.copy(source_path, dest[group], posixpath.split(dest_path)[1])


def isnumeric(a):
r'''
Returns ``True`` is an array contains numeric values.
'''

import numpy as np

if type(a) == str:
return False

if np.issubdtype(a.dtype, np.number):
return True

return False


def _equal_value(a, b):

import numpy as np
Expand Down
3 changes: 2 additions & 1 deletion GooseHDF5/cli/G5repack.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
(c - MIT) T.W.J. de Geus | tom@geus.me | www.geus.me | github.com/tdegeus/GooseHDF5
'''

from .. import isnumeric
from .. import getpaths
from .. import __version__
import docopt
Expand All @@ -33,7 +34,7 @@ def copy_dataset(old, new, path, compress):

data = old[path][...]

if data.size == 1 or not compress:
if data.size == 1 or not compress or not isnumeric(data):
new[path] = old[path][...]
else:
dset = new.create_dataset(path, data.shape, compression="gzip")
Expand Down
3 changes: 2 additions & 1 deletion docs/tools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ G5print
Print datasets in a HDF5-file.
Usage:
G5print [options] <source> <dataset>...
G5print [options] <source> [<dataset>...]
Arguments:
<source> HDF5-file.
Expand Down Expand Up @@ -139,5 +139,6 @@ G5repack
<source> HDF5-file.
Options:
-c, --compress Apply compression (using the loss-less GZip algorithm).
-h, --help Show help.
--version Show version.
10 changes: 8 additions & 2 deletions test/cli/G5repack.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,23 @@ def run(cmd):
return out

a = np.random.random(1000)
b = 'foo'

with h5py.File('a.hdf5', 'w') as source:
source['/a'] = a
source['/b'] = b

output = run('G5repack -c a.hdf5')

with h5py.File('a.hdf5', 'r') as source:
b = source['/a'][...]
a_r = source['/a'][...]
b_r = source['/b'][...]

os.remove('a.hdf5')

if not np.allclose(a, b):
if not np.allclose(a, a_r):
raise IOError('Test failed')

if b != b_r:
raise IOError('Test failed')

0 comments on commit fecfebf

Please sign in to comment.