Skip to content

Commit

Permalink
copy: adding source_root
Browse files Browse the repository at this point in the history
  • Loading branch information
tdegeus committed Sep 14, 2022
1 parent 61105b7 commit 98745ef
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 8 deletions.
25 changes: 17 additions & 8 deletions GooseHDF5/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ def copy(
source_datasets: list[str],
dest_datasets: list[str] = None,
root: str = None,
source_root: str = None,
recursive: bool = True,
skip: bool = False,
expand_soft: bool = True,
Expand All @@ -542,13 +543,15 @@ def copy(
Copy groups/datasets from one HDF5-archive ``source`` to another HDF5-archive ``dest``.
The datasets can be renamed by specifying a list of ``dest_datasets``
(whose entries should correspond to the ``source_datasets``).
In addition, a ``root`` (path prefix) for the destination datasets name can be specified.
In addition, a ``root`` path prefix can be specified for the destination datasets.
Likewise, a ``source_root`` path prefix can be specified for the source datasets.
:param source: The source HDF5-archive.
:param dest: The destination HDF5-archive.
:param source_datasets: List of dataset-paths in ``source``.
:param dest_datasets: List of dataset-paths in ``dest``, defaults to ``source_datasets``.
:param root: Path prefix for all ``dest_datasets``.
:param source_root: Path prefix for all ``source_datasets``.
:param recursive: If the source is a group, copy all objects within that group recursively.
:param skip: Skip datasets that are not present in source.
:param expand_soft: Copy the underlying data of a link, or copy as link with the same path.
Expand Down Expand Up @@ -577,9 +580,16 @@ def copy(
if root:
dest_datasets = np.array([join(root, path, root=True) for path in dest_datasets])

if source_root:
source_datasets = np.array([join(source_root, path, root=True) for path in source_datasets])

for path in source_datasets:
if not exists(source, path):
raise OSError(f'Dataset "{path}" does not exists in source.')

for path in dest_datasets:
if exists(dest, path):
raise OSError(f'Dataset "{path}" already exists')
raise OSError(f'Dataset "{path}" already exists in dest.')

isgroup = np.array([isinstance(source[path], h5py.Group) for path in source_datasets])

Expand Down Expand Up @@ -1462,12 +1472,11 @@ def _G5print_catch():


def _G5list_catch():
G5list(sys.argv[1:])
# try:
# G5list(sys.argv[1:])
# except Exception as e:
# print(e)
# return 1
try:
G5list(sys.argv[1:])
except Exception as e:
print(e)
return 1


def _G5compare_catch():
Expand Down
19 changes: 19 additions & 0 deletions tests/copying.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,25 @@ def test_copy_groupattrs(self):
for path in datasets:
self.assertTrue(g5.equal(source, dest, path))

def test_copy_root(self):

datasets = ["/a", "/b/foo", "/c/d/foo"]
source_pre = "/my/source"
dest_pre = "/your/dest"

with h5py.File(basedir / "foo_1.h5", "w") as source:
with h5py.File(basedir / "bar_1.h5", "w") as dest:

for d in datasets:
source[g5.join(source_pre, d)] = np.random.rand(10)

g5.copy(source, dest, datasets, source_root=source_pre, root=dest_pre)

for path in datasets:
s = g5.join(source_pre, path)
d = g5.join(dest_pre, path)
self.assertTrue(g5.equal(source, dest, s, d))


if __name__ == "__main__":

Expand Down

0 comments on commit 98745ef

Please sign in to comment.