Skip to content

Commit

Permalink
Adding expand_soft option to copy
Browse files Browse the repository at this point in the history
  • Loading branch information
tdegeus committed Mar 8, 2022
1 parent d944ee0 commit 9ab4fbc
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 4 deletions.
28 changes: 24 additions & 4 deletions GooseHDF5/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ def _create_groups(file, paths):
file.create_group(group)


def _copy(source, dest, source_paths, dest_paths):
def _copy(source, dest, source_paths, dest_paths, expand_soft):
"""
Copy paths recursively.
"""
Expand All @@ -533,11 +533,18 @@ def _copy(source, dest, source_paths, dest_paths):
# (main function checks existence before, so this can be the only reason)
if dest_path in dest:
continue

if not expand_soft:
link = source.get(source_path, getlink=True)
if isinstance(link, h5py.SoftLink):
dest[dest_path] = h5py.SoftLink(link.path)
continue

group = posixpath.split(dest_path)[0]
source.copy(source_path, dest[group], posixpath.split(dest_path)[1])


def _copy_attrs(source, dest, source_paths, dest_paths):
def _copy_attrs(source, dest, source_paths, dest_paths, expand_soft):

if len(source_paths) == 0:
return 0
Expand All @@ -546,6 +553,12 @@ def _copy_attrs(source, dest, source_paths, dest_paths):

for source_path, dest_path in zip(source_paths, dest_paths):

if not expand_soft:
link = source.get(source_path, getlink=True)
if isinstance(link, h5py.SoftLink):
dest[dest_path] = h5py.SoftLink(link.path)
continue

source_group = source[source_path]

if dest_path not in dest:
Expand All @@ -565,6 +578,7 @@ def copy(
root: str = None,
recursive: bool = True,
skip: bool = False,
expand_soft: bool = True,
):
"""
Copy groups/datasets from one HDF5-archive ``source`` to another HDF5-archive ``dest``.
Expand All @@ -579,6 +593,7 @@ def copy(
:param root: Path prefix for all ``dest_datasets``.
:param recursive: If the source is a group, copy all objects within that group recursively.
:param skip: Skip datasets that are not present in source.
:param expand_soft: Copy the underlying data of a link, or copy as link with the same path.
"""

if len(source_datasets) == 0:
Expand Down Expand Up @@ -606,17 +621,22 @@ def copy(
isgroup = np.array([isinstance(source[path], h5py.Group) for path in source_datasets])

if recursive:
_copy(source, dest, source_datasets[isgroup], dest_datasets[isgroup])
_copy(
source, dest, source_datasets[isgroup], dest_datasets[isgroup], expand_soft=expand_soft
)

_copy(
source,
dest,
source_datasets[np.logical_not(isgroup)],
dest_datasets[np.logical_not(isgroup)],
expand_soft=expand_soft,
)

if not recursive:
_copy_attrs(source, dest, source_datasets[isgroup], dest_datasets[isgroup])
_copy_attrs(
source, dest, source_datasets[isgroup], dest_datasets[isgroup], expand_soft=expand_soft
)


def copydatasets(
Expand Down
44 changes: 44 additions & 0 deletions tests/copying.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,50 @@ def test_copy_plain(self):
for path in datasets:
self.assertTrue(g5.equal(source, dest, path))

def test_copy_softlinks(self):

sourcepath = os.path.join(dirname, "foo_1.h5")
destpath = os.path.join(dirname, "bar_1.h5")
datasets = ["/a", "/b/foo", "/c/d/foo"]
links = ["/mylink/a", "/mylink/b/foo", "/mylink/c/d/foo"]

with h5py.File(sourcepath, "w") as source:
with h5py.File(destpath, "w") as dest:

for link, d in zip(links, datasets):
source[d] = np.random.rand(10)
source[link] = h5py.SoftLink(d)

g5.copy(source, dest, datasets + links, expand_soft=False)

for path in datasets + links:
self.assertTrue(g5.equal(source, dest, path))
for path in datasets:
self.assertTrue(not isinstance(dest.get(path, getlink=True), h5py.SoftLink))
for path in links:
self.assertTrue(isinstance(dest.get(path, getlink=True), h5py.SoftLink))

def test_copy_expand_softlinks(self):

sourcepath = os.path.join(dirname, "foo_1.h5")
destpath = os.path.join(dirname, "bar_1.h5")
datasets = ["/a", "/b/foo", "/c/d/foo"]
links = ["/mylink/a", "/mylink/b/foo", "/mylink/c/d/foo"]

with h5py.File(sourcepath, "w") as source:
with h5py.File(destpath, "w") as dest:

for link, d in zip(links, datasets):
source[d] = np.random.rand(10)
source[link] = h5py.SoftLink(d)

g5.copy(source, dest, datasets + links)

for path in datasets + links:
self.assertTrue(g5.equal(source, dest, path))
for path in datasets + links:
self.assertTrue(not isinstance(dest.get(path, getlink=True), h5py.SoftLink))

def test_copy_skip(self):

sourcepath = os.path.join(dirname, "foo_1.h5")
Expand Down

0 comments on commit 9ab4fbc

Please sign in to comment.