Merge remote-tracking branch 'refs/remotes/origin/main' into h5io_update

# Conflicts: # pyiron_base/storage/helper_functions.py
pyiron · Jun 6, 2024 · 30af1f1 · 30af1f1
2 parents 1647d60 + 111dc7d
commit 30af1f1
Show file tree

Hide file tree

Showing 7 changed files with 110 additions and 42 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.4.5
+    rev: v0.4.7
     hooks:
       - id: ruff
         name: ruff lint

diff --git a/pyiron_base/interfaces/lockable.py b/pyiron_base/interfaces/lockable.py
@@ -15,6 +15,7 @@
 sibling classes may bring in.
 """
 
+from contextlib import nullcontext
 from typing import Optional, Literal
 from functools import wraps
 import warnings
@@ -348,4 +349,7 @@ def unlocked(self) -> _UnlockContext:
 
             There is a small asymmetry between these two methods.  :meth:`.lock` can only be done once (meaningfully), while :meth:`.unlocked` is a context manager and can be called multiple times.
         """
-        return _UnlockContext(self)
+        if self.read_only:
+            return _UnlockContext(self)
+        else:
+            return nullcontext(self)
diff --git a/pyiron_base/jobs/job/core.py b/pyiron_base/jobs/job/core.py
@@ -151,7 +151,7 @@ def successive_path_splits(name_lst):
             # where we are looking for the data container
             container_path = "/".join(name_lst[:-i])
             # where we are looking for data in the container
-            data_path = "/".join(name_lst[-1:])
+            data_path = "/".join(name_lst[-i:])
             yield container_path, data_path
 
     try:

diff --git a/pyiron_base/storage/datacontainer.py b/pyiron_base/storage/datacontainer.py
@@ -7,8 +7,8 @@
 
 import copy
 import json
-import warnings
 from collections.abc import Sequence, Set, Mapping, MutableMapping
+import warnings
 
 import numpy as np
 import pandas
@@ -814,7 +814,24 @@ def write(self, file_name):
 
     # Lockable overload
     def _on_unlock(self):
-        warnings.warn("Unlock previously locked object!")
+        from sys import version_info as python_version
+
+        # a little dance to ensure that warning appear at the correct call
+        # site, i.e. where someone either calls unlocked() or sets read_only
+        if python_version[0] == 3 and python_version[1] >= 12:
+            from pyiron_base.interfaces.lockable import __file__ as lock_file
+
+            warnings.warn(
+                "Unlock previously locked object!",
+                skip_file_prefixes=(__file__, lock_file),
+            )
+        else:
+            # stacklevel is so high, because _on_unlock is called after several
+            # layers of Lockable and DataContainer.__setattr__ when used to set
+            # read_only; when used with unlocked() a fixed stack level still
+            # emits it at the wrong place, but we cannot do better without
+            # Python 3.12
+            warnings.warn("Unlock previously locked object!", stacklevel=5)
         super()._on_unlock()
 
 
@@ -904,46 +921,50 @@ def _to_hdf(self, hdf):
                 del hdf[n]
 
     def _from_hdf(self, hdf, version=None):
-        self.clear()
+        with self.unlocked():
+            self.clear()
 
-        if version == "0.1.0":
-            self.update(hdf["data"], wrap=True)
-            self.read_only = bool(hdf.get("read_only", False))
-        else:
+            if version == "0.1.0":
+                self.update(hdf["data"], wrap=True)
+                self.read_only = bool(hdf.get("read_only", False))
+            else:
 
-            def normalize_key(name):
-                # split a dataset/group name into the position in the list and
-                # the key
-                if "__index_" in name:
-                    k, i = name.split("__index_", maxsplit=1)
-                else:
-                    k = name
-                    i = -1
-                i = int(i)
-                if k == "":
-                    return i, i
-                else:
-                    return i, k
-
-            items = []
-            for n in hdf.list_nodes():
-                if n in _internal_hdf_nodes:
-                    continue
-                items.append(
-                    (*normalize_key(n), hdf[n] if not self._lazy else HDFStub(hdf, n))
-                )
-            for g in hdf.list_groups():
-                items.append(
-                    (
-                        *normalize_key(g),
-                        to_object(hdf[g]) if not self._lazy else HDFStub(hdf, g),
+                def normalize_key(name):
+                    # split a dataset/group name into the position in the list and
+                    # the key
+                    if "__index_" in name:
+                        k, i = name.split("__index_", maxsplit=1)
+                    else:
+                        k = name
+                        i = -1
+                    i = int(i)
+                    if k == "":
+                        return i, i
+                    else:
+                        return i, k
+
+                items = []
+                for n in hdf.list_nodes():
+                    if n in _internal_hdf_nodes:
+                        continue
+                    items.append(
+                        (
+                            *normalize_key(n),
+                            hdf[n] if not self._lazy else HDFStub(hdf, n),
+                        )
+                    )
+                for g in hdf.list_groups():
+                    items.append(
+                        (
+                            *normalize_key(g),
+                            to_object(hdf[g]) if not self._lazy else HDFStub(hdf, g),
+                        )
                     )
-                )
 
-            for _, k, v in sorted(items, key=lambda x: x[0]):
-                self[k] = v
+                for _, k, v in sorted(items, key=lambda x: x[0]):
+                    self[k] = v
 
-            self.read_only = bool(hdf.get("READ_ONLY", False))
+                self.read_only = bool(hdf.get("READ_ONLY", False))
 
     # HDFStub compat
     def __getitem__(self, key):

diff --git a/pyiron_base/storage/helper_functions.py b/pyiron_base/storage/helper_functions.py
diff --git a/tests/generic/test_datacontainer.py b/tests/generic/test_datacontainer.py
@@ -688,8 +688,10 @@ def test_read_only(self):
 
         with warnings.catch_warnings(record=True) as w:
             pl.read_only = False
-            self.assertEqual(
-                len(w), 1, "Trying to change read-only flag back didn't raise warning."
+            # since read_only is propageted recursively through sub data
+            # containers, this can raise more than one warning
+            self.assertGreater(
+                len(w), 0, "Trying to change read-only flag back didn't raise warning."
             )
 
     def test_recursive_append(self):

diff --git a/tests/job/test_hdf5content.py b/tests/job/test_hdf5content.py
@@ -4,16 +4,30 @@
 
 import unittest
 import os
+from pyiron_base import DataContainer
 from pyiron_base.project.generic import Project
 from pyiron_base._tests import PyironTestCase
 
+test_keys = ["my", "recursive", "test", "data"]
+
+
+def _wrap(k, *vs):
+    if vs == ():
+        return k
+    else:
+        return {k: _wrap(*vs)}
+
+
+test_data = _wrap(*test_keys)
+
 
 class InspectTest(PyironTestCase):
     @classmethod
     def setUpClass(cls):
         cls.file_location = os.path.dirname(os.path.abspath(__file__))
         cls.project = Project(os.path.join(cls.file_location, "hdf5_content"))
         cls.ham = cls.project.create_job("ScriptJob", "job_test_run")
+        cls.ham["user/test"] = DataContainer(test_data)
         cls.ham.save()
 
     @classmethod
@@ -38,6 +52,33 @@ def test_inspect_job(self):
             ),
         )
 
+    def test_recusive_load(self):
+        """DataContainer values should be accessible at any (recursive) level
+        without explicit to_object() from job.content."""
+        for i in range(len(test_keys)):
+            container_path = "/".join(test_keys[:i])
+            with self.subTest(container_path=container_path):
+                try:
+                    val = self.ham.content["user/test/" + container_path]
+                    self.assertEqual(
+                        _wrap(*test_keys[i:]),
+                        val,
+                        "HDF5Content did not return correct value from "
+                        "recursive DataContainer!",
+                    )
+                    # last val we get will be a str
+                    if i + 1 != len(test_keys):
+                        self.assertIsInstance(
+                            val,
+                            DataContainer,
+                            "HDF5Content did not return a DataContainers!",
+                        )
+                except KeyError:
+                    self.fail(
+                        "HDF5Content should not raise errors in partial "
+                        "access to recursive DataContainers"
+                    )
+
     def test_setitem(self):
         self.ham["user/output/some_value"] = 0.3
         self.assertEqual(self.ham["user/output/some_value"], 0.3)