From 506458d53d4a20808c51d294d5ef26f9b27e3418 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 6 Aug 2024 20:07:04 -0700 Subject: [PATCH 01/19] Refactor storage test --- tests/unit/nodes/test_macro.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/unit/nodes/test_macro.py b/tests/unit/nodes/test_macro.py index eb2145d23..9e4c8e7c2 100644 --- a/tests/unit/nodes/test_macro.py +++ b/tests/unit/nodes/test_macro.py @@ -496,7 +496,7 @@ def test_storage_for_modified_macros(self): TypeError, msg="h5io can't handle custom reconstructors" ): macro.save() - else: + elif backend == "tinybase": macro.save() reloaded = Macro.create.demo.AddThree( label="m", storage_backend=backend @@ -524,15 +524,16 @@ def test_storage_for_modified_macros(self): ) rerun = reloaded() - if backend == "tinybase": - self.assertDictEqual( - original_result, - rerun, - msg="Rerunning should re-execute the _original_ " - "functionality" - ) - else: - raise ValueError(f"Unexpected backend {backend}?") + self.assertDictEqual( + original_result, + rerun, + msg="Rerunning should re-execute the _original_ " + "functionality" + ) + else: + raise ValueError( + f"Backend {backend} not recognized -- write a test for it" + ) finally: macro.storage.delete() From 54fe84f20d0b0c1a8dfe44c821591f7903bd141a Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 6 Aug 2024 20:10:11 -0700 Subject: [PATCH 02/19] Remove empty line --- tests/unit/nodes/test_macro.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/nodes/test_macro.py b/tests/unit/nodes/test_macro.py index 9e4c8e7c2..94c160d4e 100644 --- a/tests/unit/nodes/test_macro.py +++ b/tests/unit/nodes/test_macro.py @@ -488,7 +488,6 @@ def test_storage_for_modified_macros(self): Macro.create.demo.AddPlusOne() ) - modified_result = macro() if backend == "h5io": From 4176906636eeee65b37642500f9f3af0b794fc88 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 6 Aug 2024 20:41:16 -0700 Subject: [PATCH 03/19] Rename workflow in tests With the introduction of a pickle backend, this test wound up failing. I haven't been able to work out exactly why, but since pickle is much faster my guess is that something in the test parallelization gets in a race condition with deleting a saved workflow from another test. Simply renaming it here solved the issue, which is an innocuous change. --- tests/unit/test_workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_workflow.py b/tests/unit/test_workflow.py index 649ae46cd..caf5f37d0 100644 --- a/tests/unit/test_workflow.py +++ b/tests/unit/test_workflow.py @@ -446,7 +446,7 @@ def test_storage_values(self): with self.subTest(backend): try: print("Trying", backend) - wf = Workflow("wf", storage_backend=backend) + wf = Workflow("wf_values", storage_backend=backend) wf.register("static.demo_nodes", domain="demo") wf.inp = wf.create.demo.AddThree(x=0) wf.out = wf.inp.outputs.add_three + 1 @@ -461,7 +461,7 @@ def test_storage_values(self): wf.save() else: wf.save() - reloaded = Workflow("wf", storage_backend=backend) + reloaded = Workflow("wf_values", storage_backend=backend) self.assertEqual( wf_out.out__add, reloaded.outputs.out__add.value, From 6a1b49c7fe73de10b8de14322ad7f9baf1fd14bc Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 6 Aug 2024 20:47:23 -0700 Subject: [PATCH 04/19] Introduce `pickle` as a backend for saving --- notebooks/deepdive.ipynb | 9 +++-- pyiron_workflow/mixin/storage.py | 54 ++++++++++++++++++++++++++++++ pyiron_workflow/node.py | 28 ++++++++++------ pyiron_workflow/nodes/composite.py | 2 +- pyiron_workflow/nodes/for_loop.py | 2 +- pyiron_workflow/nodes/macro.py | 2 +- pyiron_workflow/nodes/static_io.py | 2 +- pyiron_workflow/workflow.py | 2 +- tests/unit/nodes/test_macro.py | 35 +++++++++++++++---- 9 files changed, 110 insertions(+), 26 deletions(-) diff --git a/notebooks/deepdive.ipynb b/notebooks/deepdive.ipynb index dfb381d83..856d74b99 100644 --- a/notebooks/deepdive.ipynb +++ b/notebooks/deepdive.ipynb @@ -5521,15 +5521,18 @@ "- Also related, there is currently zero filtering of which data, or to which depth the graph gets stored -- it's all or nothing\n", "- If the source code for nodes gets modified between saving and loading, weird stuff is likely to happen, and some of it may happen silently.\n", "\n", - "Lastly, we currently use two backends: `tinybase.storage.H5ioStorage` and `h5io` directly. They have slightly different strengths:\n", - "- `\"h5io\"` (the default) \n", + "Lastly, we currently use three backends: `pickle`, ``tinybase.storage.H5ioStorage` and `h5io` directly. They have slightly different strengths:\n", + "- `\"h5io\"` \n", " - Will let you save and load any nodes that defined by subclassing (this includes all nodes defined using the decorators)\n", " - Will preserve changes to a macro (replace/add/remove/rewire)\n", " - Has trouble with some data\n", "- `\"tinybase\"`\n", " - Requires all nodes to have been instantiated with the creator (`wf.create...`; this means moving node definitions to a `.py` file in your pythonpath and registering it as a node package -- not particularly difficult!)\n", " - _Ignores_ changes to a macro (will crash nicely if the macro IO changed)\n", - " - Falls back to `pickle` for data failures, so can handle a wider variety of data IO objects" + " - Falls back to `pickle` for data failures, so can handle a wider variety of data IO objects\n", + "- `\"pickle\"`\n", + " - Can't handle unpickleable IO items, if that's a problem for your data\n", + " - Stored in byte format; not browsable and needs to be loaded to inspect it at all" ] }, { diff --git a/pyiron_workflow/mixin/storage.py b/pyiron_workflow/mixin/storage.py index 19ac24b57..b44229b29 100644 --- a/pyiron_workflow/mixin/storage.py +++ b/pyiron_workflow/mixin/storage.py @@ -8,6 +8,7 @@ from abc import ABC, abstractmethod from importlib import import_module import os +import pickle import sys from typing import Optional @@ -84,6 +85,51 @@ def _delete(self): """Remove an existing save-file for this backend""" +class PickleStorage(StorageInterface): + + _PICKLE_STORAGE_FILE_NAME = "pickle.pckl" + + def __init__(self, owner: HasPickleStorage): + super().__init__(owner=owner) + + @property + def owner(self) -> HasPickleStorage: + return self._owner + + def _save(self): + with open(self._pickle_storage_file_path, "wb") as file: + pickle.dump(self.owner, file) + + def _load(self): + with open(self._pickle_storage_file_path, "rb") as file: + inst = pickle.load(file) + if inst.__class__ != self.owner.__class__: + raise TypeError( + f"{self.owner.label} cannot load, as it has type " + f"{self.owner.__class__.__name__}, but the saved node has type " + f"{inst.__class__.__name__}" + ) + self.owner.__setstate__(inst.__getstate__()) + + def _delete(self): + if self.has_contents: + FileObject( + self._PICKLE_STORAGE_FILE_NAME, self.owner.storage_directory + ).delete() + + @property + def _pickle_storage_file_path(self) -> str: + return str( + ( + self.owner.storage_directory.path / self._PICKLE_STORAGE_FILE_NAME + ).resolve() + ) + + @property + def _has_contents(self) -> bool: + return os.path.isfile(self._pickle_storage_file_path) + + class H5ioStorage(StorageInterface): _H5IO_STORAGE_FILE_NAME = "h5io.h5" @@ -365,6 +411,14 @@ def report_import_readiness(self, tabs=0, report_so_far=""): ) +class HasPickleStorage(HasStorage, ABC): + @classmethod + def _storage_interfaces(cls): + interfaces = super(HasPickleStorage, cls)._storage_interfaces() + interfaces["pickle"] = PickleStorage + return interfaces + + class HasH5ioStorage(HasStorage, ABC): @classmethod def _storage_interfaces(cls): diff --git a/pyiron_workflow/node.py b/pyiron_workflow/node.py index e6a0f441a..678609d46 100644 --- a/pyiron_workflow/node.py +++ b/pyiron_workflow/node.py @@ -22,7 +22,11 @@ from pyiron_workflow.mixin.run import Runnable, ReadinessError from pyiron_workflow.mixin.semantics import Semantic from pyiron_workflow.mixin.single_output import ExploitsSingleOutput -from pyiron_workflow.mixin.storage import HasH5ioStorage, HasTinybaseStorage +from pyiron_workflow.mixin.storage import ( + HasH5ioStorage, + HasTinybaseStorage, + HasPickleStorage, +) from pyiron_workflow.topology import ( get_nodes_in_data_tree, set_run_connections_according_to_linear_dag, @@ -48,6 +52,7 @@ class Node( HasWorkingDirectory, HasH5ioStorage, HasTinybaseStorage, + HasPickleStorage, ABC, ): """ @@ -160,11 +165,11 @@ class Node( your graph this could be expensive in terms of storage space and/or time. - [ALPHA ISSUE] Similarly, there is no way to save only part of a graph; only the entire graph may be saved at once. - - [ALPHA ISSUE] There are two possible back-ends for saving: one leaning on + - [ALPHA ISSUE] There are three possible back-ends for saving: one leaning on `tinybase.storage.GenericStorage` (in practice, - `H5ioStorage(GenericStorage)`), that is the default, and the other that - uses the `h5io` module directly. The backend used is always the one on the - graph root. + `H5ioStorage(GenericStorage)`), and the other that uses the `h5io` module + directly. The third (default) option is to use `pickle`. The backend used + is always the one on the graph root. - [ALPHA ISSUE] The `h5io` backend is deprecated -- it can't handle custom reconstructors (i.e. when `__reduce__` returns a tuple with some non-standard callable as its first entry), and basically all our nodes do @@ -191,8 +196,9 @@ class Node( requirement is as simple as moving all the desired nodes off to a `.py` file, registering it, and building the composite from there. - [ALPHA ISSUE] Restrictions to macros: - - For the `h5io` backend: there are none; if a macro is modified, saved, - and reloaded, the modifications will be reflected in the loaded state. + - For the `h5io` and `pickle` backends: there are none; if a macro is + modified, saved, and reloaded, the modifications will be reflected in + the loaded state. Note there is a little bit of danger here, as the macro class still corresponds to the un-modified macro class. - For the `tinybase` backend: the macro will re-instantiate its original @@ -251,9 +257,9 @@ class Node( received output from this call. (Default is False.) save_after_run (bool): Whether to trigger a save after each run of the node (currently causes the entire graph to save). (Default is False.) - storage_backend (Literal["h5io" | "tinybase"] | None): The flag for the the - backend to use for saving and loading; for nodes in a graph the value on - the root node is always used. + storage_backend (Literal["h5io" | "tinybase", "pickle"] | None): The flag for + the backend to use for saving and loading; for nodes in a graph the value + on the root node is always used. signals (pyiron_workflow.io.Signals): A container for input and output signals, which are channels for controlling execution flow. By default, has a :attr:`signals.inputs.run` channel which has a callback to the :meth:`run` method @@ -324,7 +330,7 @@ def __init__( parent: Optional[Composite] = None, overwrite_save: bool = False, run_after_init: bool = False, - storage_backend: Literal["h5io", "tinybase"] | None = "h5io", + storage_backend: Literal["h5io", "tinybase", "pickle"] | None = "h5io", save_after_run: bool = False, **kwargs, ): diff --git a/pyiron_workflow/nodes/composite.py b/pyiron_workflow/nodes/composite.py index d82d39fd0..5271486de 100644 --- a/pyiron_workflow/nodes/composite.py +++ b/pyiron_workflow/nodes/composite.py @@ -102,7 +102,7 @@ def __init__( parent: Optional[Composite] = None, overwrite_save: bool = False, run_after_init: bool = False, - storage_backend: Optional[Literal["h5io", "tinybase"]] = None, + storage_backend: Optional[Literal["h5io", "tinybase", "pickle"]] = None, save_after_run: bool = False, strict_naming: bool = True, **kwargs, diff --git a/pyiron_workflow/nodes/for_loop.py b/pyiron_workflow/nodes/for_loop.py index 3b4f3c552..fab04f578 100644 --- a/pyiron_workflow/nodes/for_loop.py +++ b/pyiron_workflow/nodes/for_loop.py @@ -201,7 +201,7 @@ def __init__( parent: Optional[Composite] = None, overwrite_save: bool = False, run_after_init: bool = False, - storage_backend: Optional[Literal["h5io", "tinybase"]] = None, + storage_backend: Optional[Literal["h5io", "tinybase", "pickle"]] = None, save_after_run: bool = False, strict_naming: bool = True, body_node_executor: Optional[Executor] = None, diff --git a/pyiron_workflow/nodes/macro.py b/pyiron_workflow/nodes/macro.py index 977071928..406cf6976 100644 --- a/pyiron_workflow/nodes/macro.py +++ b/pyiron_workflow/nodes/macro.py @@ -302,7 +302,7 @@ def _scrape_output_labels(cls): return scraped_labels def _prepopulate_ui_nodes_from_graph_creator_signature( - self, storage_backend: Literal["h5io", "tinybase"] + self, storage_backend: Literal["h5io", "tinybase", "pickle"] ): ui_nodes = [] for label, (type_hint, default) in self.preview_inputs().items(): diff --git a/pyiron_workflow/nodes/static_io.py b/pyiron_workflow/nodes/static_io.py index 113739603..66cbe7e01 100644 --- a/pyiron_workflow/nodes/static_io.py +++ b/pyiron_workflow/nodes/static_io.py @@ -32,7 +32,7 @@ def __init__( parent: Optional[Composite] = None, overwrite_save: bool = False, run_after_init: bool = False, - storage_backend: Optional[Literal["h5io", "tinybase"]] = None, + storage_backend: Optional[Literal["h5io", "tinybase", "pickle"]] = None, save_after_run: bool = False, **kwargs, ): diff --git a/pyiron_workflow/workflow.py b/pyiron_workflow/workflow.py index 5f80d851f..95d831bfd 100644 --- a/pyiron_workflow/workflow.py +++ b/pyiron_workflow/workflow.py @@ -204,7 +204,7 @@ def __init__( *nodes: Node, overwrite_save: bool = False, run_after_init: bool = False, - storage_backend: Optional[Literal["h5io", "tinybase"]] = None, + storage_backend: Optional[Literal["h5io", "tinybase", "pickle"]] = None, save_after_run: bool = False, strict_naming: bool = True, inputs_map: Optional[dict | bidict] = None, diff --git a/tests/unit/nodes/test_macro.py b/tests/unit/nodes/test_macro.py index 94c160d4e..8b9fee52f 100644 --- a/tests/unit/nodes/test_macro.py +++ b/tests/unit/nodes/test_macro.py @@ -495,7 +495,7 @@ def test_storage_for_modified_macros(self): TypeError, msg="h5io can't handle custom reconstructors" ): macro.save() - elif backend == "tinybase": + elif backend in ["tinybase", "pickle"]: macro.save() reloaded = Macro.create.demo.AddThree( label="m", storage_backend=backend @@ -523,12 +523,33 @@ def test_storage_for_modified_macros(self): ) rerun = reloaded() - self.assertDictEqual( - original_result, - rerun, - msg="Rerunning should re-execute the _original_ " - "functionality" - ) + if backend == "tinybase": + self.assertIsInstance( + reloaded.two, + Macro.create.standard.Add, + msg="tinybase is re-instantiating the original macro " + "class and then carefully loading particular " + "pieces of data; that means each child is" + ) + self.assertDictEqual( + original_result, + rerun, + msg="Rerunning re-executes the _original_ " + "functionality" + ) + elif backend == "pickle": + self.assertIsInstance( + reloaded.two, + Macro.create.demo.AddPlusOne, + msg="pickle instantiates the macro node class, but " + "but then uses its serialized state, so we retain " + "the replaced node." + ) + self.assertDictEqual( + modified_result, + rerun, + msg="Rerunning re-executes the _replaced_ functionality" + ) else: raise ValueError( f"Backend {backend} not recognized -- write a test for it" From b7135e54d83da44894caf36f1bc4bac824de2293 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 6 Aug 2024 21:04:31 -0700 Subject: [PATCH 05/19] Fix root cause of storage conflict Ok, the reason workflow storage tests were failing after introducing pickle was that the original version of the test had the wrong order of the try/finally scope and the for-loop scope. I got away with it earlier because of interplay between the outer-most member of the loop and the default storage backend. Actually fixing the problem is as simple as ensuring the "finally delete" clause happens after _each_ loop step. This does that and reverts the renaming of the workflow. --- tests/unit/test_workflow.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/unit/test_workflow.py b/tests/unit/test_workflow.py index caf5f37d0..f9f6a1f16 100644 --- a/tests/unit/test_workflow.py +++ b/tests/unit/test_workflow.py @@ -446,7 +446,7 @@ def test_storage_values(self): with self.subTest(backend): try: print("Trying", backend) - wf = Workflow("wf_values", storage_backend=backend) + wf = Workflow("wf", storage_backend=backend) wf.register("static.demo_nodes", domain="demo") wf.inp = wf.create.demo.AddThree(x=0) wf.out = wf.inp.outputs.add_three + 1 @@ -461,7 +461,7 @@ def test_storage_values(self): wf.save() else: wf.save() - reloaded = Workflow("wf_values", storage_backend=backend) + reloaded = Workflow("wf", storage_backend=backend) self.assertEqual( wf_out.out__add, reloaded.outputs.out__add.value, @@ -488,8 +488,8 @@ def test_storage_scopes(self): for backend in Workflow.allowed_backends(): with self.subTest(backend): - try: - for backend in Workflow.allowed_backends(): + for backend in Workflow.allowed_backends(): + try: if backend == "h5io": with self.subTest(backend): with self.assertRaises( @@ -503,8 +503,8 @@ def test_storage_scopes(self): wf.storage_backend = backend wf.save() Workflow(wf.label, storage_backend=backend) - finally: - wf.storage.delete() + finally: + wf.storage.delete() with self.subTest("No unimportable nodes for either back-end"): try: From 5e9279aa086fea617a025e8c425cc6e30a813c11 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 6 Aug 2024 21:10:47 -0700 Subject: [PATCH 06/19] Again, correctly order try/finally and for-loops --- tests/unit/test_workflow.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/unit/test_workflow.py b/tests/unit/test_workflow.py index f9f6a1f16..3d2a6ebf4 100644 --- a/tests/unit/test_workflow.py +++ b/tests/unit/test_workflow.py @@ -507,20 +507,20 @@ def test_storage_scopes(self): wf.storage.delete() with self.subTest("No unimportable nodes for either back-end"): - try: - wf.import_type_mismatch = wf.create.demo.dynamic() - for backend in Workflow.allowed_backends(): + for backend in Workflow.allowed_backends(): + try: + wf.import_type_mismatch = wf.create.demo.dynamic() with self.subTest(backend): - with self.assertRaises( - TypeNotFoundError, - msg="Imported object is function but node type is node -- " - "should fail early on save" - ): - wf.storage_backend = backend - wf.save() - finally: - wf.remove_child(wf.import_type_mismatch) - wf.storage.delete() + with self.assertRaises( + TypeNotFoundError, + msg="Imported object is function but node type is node " + "-- should fail early on save" + ): + wf.storage_backend = backend + wf.save() + finally: + wf.remove_child(wf.import_type_mismatch) + wf.storage.delete() if "h5io" in Workflow.allowed_backends(): wf.add_child(PlusOne(label="local_but_importable")) From 6ccd24cb059650077e9d069dd32071c6d1778bf0 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 7 Aug 2024 16:30:06 -0700 Subject: [PATCH 07/19] Remove keyword argument from pure-decorator You're only supposed to use it as a decorator to start with, so the kwarg was senseless --- pyiron_workflow/nodes/transform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiron_workflow/nodes/transform.py b/pyiron_workflow/nodes/transform.py index 7f8482bc7..2e15e217c 100644 --- a/pyiron_workflow/nodes/transform.py +++ b/pyiron_workflow/nodes/transform.py @@ -400,7 +400,7 @@ def dataclass_node_factory( ) -def as_dataclass_node(dataclass: type, use_cache: bool = True): +def as_dataclass_node(dataclass: type): """ Decorates a dataclass as a dataclass node -- i.e. a node whose inputs correspond to dataclass fields and whose output is an instance of the dataclass. @@ -452,7 +452,7 @@ def as_dataclass_node(dataclass: type, use_cache: bool = True): >>> f(necessary="input as a node kwarg") Foo(necessary='input as a node kwarg', bar='bar', answer=42, complex_=[1, 2, 3]) """ - cls = dataclass_node_factory(dataclass, use_cache) + cls = dataclass_node_factory(dataclass) cls.preview_io() return cls From 3d1c9eaa527f349af7d67976cbbb600a65653b23 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 7 Aug 2024 16:31:15 -0700 Subject: [PATCH 08/19] Add factory import source This is necessary (but not sufficient) to get `as_dataclass_node` decorated classes to pickle. The field name is a bit off compared to `Function` and `Macro`, as now we decorate a class definition instead of a function definition, but it's close. --- pyiron_workflow/nodes/transform.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyiron_workflow/nodes/transform.py b/pyiron_workflow/nodes/transform.py index 2e15e217c..d3e7a722e 100644 --- a/pyiron_workflow/nodes/transform.py +++ b/pyiron_workflow/nodes/transform.py @@ -453,6 +453,7 @@ def as_dataclass_node(dataclass: type): Foo(necessary='input as a node kwarg', bar='bar', answer=42, complex_=[1, 2, 3]) """ cls = dataclass_node_factory(dataclass) + cls._class_returns_from_decorated_function = dataclass cls.preview_io() return cls From 6a0787961686dfb062e9faa0079d3f3f1569ca69 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 7 Aug 2024 16:54:19 -0700 Subject: [PATCH 09/19] Bring the dataclass node in line with function and macro --- pyiron_workflow/nodes/transform.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyiron_workflow/nodes/transform.py b/pyiron_workflow/nodes/transform.py index d3e7a722e..62e0230a0 100644 --- a/pyiron_workflow/nodes/transform.py +++ b/pyiron_workflow/nodes/transform.py @@ -388,10 +388,12 @@ def dataclass_node_factory( if not is_dataclass(dataclass): dataclass = as_dataclass(dataclass) return ( - f"{DataclassNode.__name__}{dataclass.__name__}", + dataclass.__name__, (DataclassNode,), { "dataclass": dataclass, + "__module__": dataclass.__module__, + "__qualname__": dataclass.__qualname__, "_output_type_hint": dataclass, "__doc__": dataclass.__doc__, "use_cache": use_cache, @@ -452,6 +454,7 @@ def as_dataclass_node(dataclass: type): >>> f(necessary="input as a node kwarg") Foo(necessary='input as a node kwarg', bar='bar', answer=42, complex_=[1, 2, 3]) """ + dataclass_node_factory.clear(dataclass.__name__) # Force a fresh class cls = dataclass_node_factory(dataclass) cls._class_returns_from_decorated_function = dataclass cls.preview_io() From 7f163851537b76e2b19b4d83e089e08f4e6697c4 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 7 Aug 2024 17:07:24 -0700 Subject: [PATCH 10/19] Leverage new pyiron_snippets.factory stuff to find the class --- pyiron_workflow/nodes/transform.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyiron_workflow/nodes/transform.py b/pyiron_workflow/nodes/transform.py index 62e0230a0..5d49c01b1 100644 --- a/pyiron_workflow/nodes/transform.py +++ b/pyiron_workflow/nodes/transform.py @@ -455,8 +455,9 @@ def as_dataclass_node(dataclass: type): Foo(necessary='input as a node kwarg', bar='bar', answer=42, complex_=[1, 2, 3]) """ dataclass_node_factory.clear(dataclass.__name__) # Force a fresh class + module, qualname = dataclass.__module__, dataclass.__qualname__ cls = dataclass_node_factory(dataclass) - cls._class_returns_from_decorated_function = dataclass + cls._reduce_imports_as = (module, qualname) cls.preview_io() return cls From de8518ec8b1925c13595c25ed56f44f94062a133 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 7 Aug 2024 17:07:41 -0700 Subject: [PATCH 11/19] Mangle the stored dataclass qualname so it can be found later --- pyiron_workflow/nodes/transform.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyiron_workflow/nodes/transform.py b/pyiron_workflow/nodes/transform.py index 5d49c01b1..ffa9342a5 100644 --- a/pyiron_workflow/nodes/transform.py +++ b/pyiron_workflow/nodes/transform.py @@ -387,13 +387,15 @@ def dataclass_node_factory( ) if not is_dataclass(dataclass): dataclass = as_dataclass(dataclass) + module, qualname = dataclass.__module__, dataclass.__qualname__ + dataclass.__qualname__ += ".dataclass" # So output type hints know where to find it return ( dataclass.__name__, (DataclassNode,), { "dataclass": dataclass, - "__module__": dataclass.__module__, - "__qualname__": dataclass.__qualname__, + "__module__": module, + "__qualname__": qualname, "_output_type_hint": dataclass, "__doc__": dataclass.__doc__, "use_cache": use_cache, From 2c0ca0eec6edff56f1bd6f672ac6111f8fffa250 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 7 Aug 2024 17:20:42 -0700 Subject: [PATCH 12/19] Add tests --- tests/unit/nodes/test_transform.py | 39 ++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/unit/nodes/test_transform.py b/tests/unit/nodes/test_transform.py index b2d9718b1..04d448989 100644 --- a/tests/unit/nodes/test_transform.py +++ b/tests/unit/nodes/test_transform.py @@ -6,6 +6,7 @@ from pandas import DataFrame from pyiron_workflow.channels import NOT_DATA +from pyiron_workflow.nodes.function import as_function_node from pyiron_workflow.nodes.transform import ( Transformer, as_dataclass_node, @@ -16,6 +17,15 @@ list_to_outputs, ) +@as_dataclass_node +class MyData: + stuff: bool = False + +@as_function_node +def Downstream(x: MyData.dataclass): + x.stuff = True + return x + class TestTransformer(unittest.TestCase): def test_pickle(self): @@ -249,6 +259,35 @@ class DecoratedDCLike: "instantiation" ) + def test_dataclass_typing_and_storage(self): + md = MyData() + + with self.assertRaises( + TypeError, + msg="Wrongly typed input should not connect" + ): + Downstream(5) + + ds = Downstream(md) + out = ds.pull() + self.assertTrue( + out.stuff, + msg="Sanity check" + ) + + rmd = pickle.loads(pickle.dumps(md)) + self.assertIs( + rmd.outputs.dataclass.type_hint, + MyData.dataclass, + msg="Type hint should be findable on the scope of the node decorating it" + ) + ds2 = Downstream(rmd) + out = ds2.pull() + self.assertTrue( + out.stuff, + msg="Flow should be able to survive (de)serialization" + ) + if __name__ == '__main__': unittest.main() From 71f021d3e0651e8c7827f094aae71d2acb788901 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 7 Aug 2024 17:20:55 -0700 Subject: [PATCH 13/19] Update docs examples to reflect new naming --- pyiron_workflow/nodes/transform.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyiron_workflow/nodes/transform.py b/pyiron_workflow/nodes/transform.py index ffa9342a5..4c3a7ac39 100644 --- a/pyiron_workflow/nodes/transform.py +++ b/pyiron_workflow/nodes/transform.py @@ -443,7 +443,7 @@ def as_dataclass_node(dataclass: type): >>> >>> f = Foo() >>> print(f.readiness_report) - DataclassNodeFoo readiness: False + Foo readiness: False STATE: running: False failed: False @@ -454,7 +454,7 @@ def as_dataclass_node(dataclass: type): complex_ ready: True >>> f(necessary="input as a node kwarg") - Foo(necessary='input as a node kwarg', bar='bar', answer=42, complex_=[1, 2, 3]) + Foo.dataclass(necessary='input as a node kwarg', bar='bar', answer=42, complex_=[1, 2, 3]) """ dataclass_node_factory.clear(dataclass.__name__) # Force a fresh class module, qualname = dataclass.__module__, dataclass.__qualname__ @@ -516,7 +516,7 @@ def dataclass_node(dataclass: type, use_cache: bool = True, *node_args, **node_k complex_ ready: True >>> f(necessary="input as a node kwarg") - Foo(necessary='input as a node kwarg', bar='bar', answer=42, complex_=[1, 2, 3]) + Foo.dataclass(necessary='input as a node kwarg', bar='bar', answer=42, complex_=[1, 2, 3]) """ cls = dataclass_node_factory(dataclass) cls.preview_io() From d05fea84663bbc0f3ae2d5e7b5823626e2873a7f Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 7 Aug 2024 20:47:29 -0700 Subject: [PATCH 14/19] Update snippets dependency --- .ci_support/environment.yml | 2 +- .ci_support/lower_bound.yml | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.ci_support/environment.yml b/.ci_support/environment.yml index c5f0618b0..97d09192b 100644 --- a/.ci_support/environment.yml +++ b/.ci_support/environment.yml @@ -12,7 +12,7 @@ dependencies: - pandas =2.2.2 - pyiron_base =0.9.10 - pyiron_contrib =0.1.17 -- pyiron_snippets =0.1.3 +- pyiron_snippets =0.1.4 - python-graphviz =0.20.3 - toposort =1.10 - typeguard =4.3.0 diff --git a/.ci_support/lower_bound.yml b/.ci_support/lower_bound.yml index b6b04d89d..850c94427 100644 --- a/.ci_support/lower_bound.yml +++ b/.ci_support/lower_bound.yml @@ -12,7 +12,7 @@ dependencies: - pandas =2.2.0 - pyiron_base =0.8.3 - pyiron_contrib =0.1.16 -- pyiron_snippets =0.1.0 +- pyiron_snippets =0.1.4 - python-graphviz =0.20.0 - toposort =1.10 - typeguard =4.2.0 diff --git a/pyproject.toml b/pyproject.toml index 40ec2a0e9..6dcadf281 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ dependencies = [ "pandas==2.2.2", "pyiron_base==0.9.10", "pyiron_contrib==0.1.17", - "pyiron_snippets==0.1.3", + "pyiron_snippets==0.1.4", "toposort==1.10", "typeguard==4.3.0", ] From d91c015baefa791169c9881489905fbab21032f3 Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Thu, 8 Aug 2024 03:50:46 +0000 Subject: [PATCH 15/19] [dependabot skip] Update env file --- .binder/environment.yml | 2 +- docs/environment.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.binder/environment.yml b/.binder/environment.yml index c5f0618b0..97d09192b 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -12,7 +12,7 @@ dependencies: - pandas =2.2.2 - pyiron_base =0.9.10 - pyiron_contrib =0.1.17 -- pyiron_snippets =0.1.3 +- pyiron_snippets =0.1.4 - python-graphviz =0.20.3 - toposort =1.10 - typeguard =4.3.0 diff --git a/docs/environment.yml b/docs/environment.yml index fdd9c94f5..ca1a3ec57 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -18,7 +18,7 @@ dependencies: - pandas =2.2.2 - pyiron_base =0.9.10 - pyiron_contrib =0.1.17 -- pyiron_snippets =0.1.3 +- pyiron_snippets =0.1.4 - python-graphviz =0.20.3 - toposort =1.10 - typeguard =4.3.0 From a1b1836e655343500430660d852b15ff93ae4f16 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 7 Aug 2024 20:57:27 -0700 Subject: [PATCH 16/19] Use new pyiron_snippets syntax consistently --- pyiron_workflow/nodes/function.py | 4 +++- pyiron_workflow/nodes/macro.py | 4 +++- tests/unit/mixin/test_preview.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pyiron_workflow/nodes/function.py b/pyiron_workflow/nodes/function.py index e1302a78a..15362df37 100644 --- a/pyiron_workflow/nodes/function.py +++ b/pyiron_workflow/nodes/function.py @@ -421,7 +421,9 @@ def decorator(node_function): factory_made = function_node_factory( node_function, validate_output_labels, use_cache, *output_labels ) - factory_made._class_returns_from_decorated_function = node_function + factory_made._reduce_imports_as = ( + node_function.__module__, node_function.__qualname__ + ) factory_made.preview_io() return factory_made diff --git a/pyiron_workflow/nodes/macro.py b/pyiron_workflow/nodes/macro.py index 406cf6976..27152dcc7 100644 --- a/pyiron_workflow/nodes/macro.py +++ b/pyiron_workflow/nodes/macro.py @@ -534,7 +534,9 @@ def decorator(graph_creator): factory_made = macro_node_factory( graph_creator, validate_output_labels, use_cache, *output_labels ) - factory_made._class_returns_from_decorated_function = graph_creator + factory_made._reduce_imports_as = ( + graph_creator.__module__, graph_creator.__qualname__ + ) factory_made.preview_io() return factory_made diff --git a/tests/unit/mixin/test_preview.py b/tests/unit/mixin/test_preview.py index 0c8ac5f2f..f070749a5 100644 --- a/tests/unit/mixin/test_preview.py +++ b/tests/unit/mixin/test_preview.py @@ -46,7 +46,7 @@ def scraper_decorator(fnc): factory_made = scraper_factory( fnc, validate_output_labels, io_defining_function_uses_self, *output_labels ) - factory_made._class_returns_from_decorated_function = fnc + factory_made._reduce_imports_as = (fnc.__module__, fnc.__qualname__) factory_made.preview_io() return factory_made return scraper_decorator From 91348e0efce97e184d4a30a3d710cc3529a164b6 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 7 Aug 2024 20:58:51 -0700 Subject: [PATCH 17/19] Expose `as_dataclass_node` in the API Now that it's pickling as well as anything else --- pyiron_workflow/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiron_workflow/__init__.py b/pyiron_workflow/__init__.py index 43b6030fa..ab992d40c 100644 --- a/pyiron_workflow/__init__.py +++ b/pyiron_workflow/__init__.py @@ -46,7 +46,7 @@ from pyiron_workflow.logging import logger from pyiron_workflow.nodes.macro import Macro, as_macro_node, macro_node from pyiron_workflow.nodes.transform import ( - # as_dataclass_node, # Not pickling nicely yet + as_dataclass_node, dataclass_node, inputs_to_dataframe, inputs_to_dict, From a8d3bd9923a2b553ec015bfa47cad4a4b21b2439 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Wed, 7 Aug 2024 21:48:45 -0700 Subject: [PATCH 18/19] [patch] Fall back on cloudpickle When the pickle backend fails --- pyiron_workflow/mixin/storage.py | 52 ++++++++++++++++++++++++-------- pyiron_workflow/node.py | 7 +++-- tests/unit/test_node.py | 24 ++++++++++++++- 3 files changed, 66 insertions(+), 17 deletions(-) diff --git a/pyiron_workflow/mixin/storage.py b/pyiron_workflow/mixin/storage.py index b44229b29..24d0abcaf 100644 --- a/pyiron_workflow/mixin/storage.py +++ b/pyiron_workflow/mixin/storage.py @@ -12,6 +12,7 @@ import sys from typing import Optional +import cloudpickle import h5io from pyiron_snippets.files import FileObject, DirectoryObject @@ -88,6 +89,7 @@ def _delete(self): class PickleStorage(StorageInterface): _PICKLE_STORAGE_FILE_NAME = "pickle.pckl" + _CLOUDPICKLE_STORAGE_FILE_NAME = "cloudpickle.cpckl" def __init__(self, owner: HasPickleStorage): super().__init__(owner=owner) @@ -97,12 +99,22 @@ def owner(self) -> HasPickleStorage: return self._owner def _save(self): - with open(self._pickle_storage_file_path, "wb") as file: - pickle.dump(self.owner, file) + try: + with open(self._pickle_storage_file_path, "wb") as file: + pickle.dump(self.owner, file) + except Exception: + self._delete() + with open(self._cloudpickle_storage_file_path, "wb") as file: + cloudpickle.dump(self.owner, file) def _load(self): - with open(self._pickle_storage_file_path, "rb") as file: - inst = pickle.load(file) + if self._has_pickle_contents: + with open(self._pickle_storage_file_path, "rb") as file: + inst = pickle.load(file) + elif self._has_cloudpickle_contents: + with open(self._cloudpickle_storage_file_path, "rb") as file: + inst = cloudpickle.load(file) + if inst.__class__ != self.owner.__class__: raise TypeError( f"{self.owner.label} cannot load, as it has type " @@ -111,24 +123,38 @@ def _load(self): ) self.owner.__setstate__(inst.__getstate__()) + def _delete_file(self, file: str): + FileObject(file, self.owner.storage_directory).delete() + def _delete(self): - if self.has_contents: - FileObject( - self._PICKLE_STORAGE_FILE_NAME, self.owner.storage_directory - ).delete() + if self._has_pickle_contents: + self._delete_file(self._PICKLE_STORAGE_FILE_NAME) + elif self._has_cloudpickle_contents: + self._delete_file(self._CLOUDPICKLE_STORAGE_FILE_NAME) + + def _storage_path(self, file: str): + return str((self.owner.storage_directory.path / file).resolve()) @property def _pickle_storage_file_path(self) -> str: - return str( - ( - self.owner.storage_directory.path / self._PICKLE_STORAGE_FILE_NAME - ).resolve() - ) + return self._storage_path(self._PICKLE_STORAGE_FILE_NAME) + + @property + def _cloudpickle_storage_file_path(self) -> str: + return self._storage_path(self._CLOUDPICKLE_STORAGE_FILE_NAME) @property def _has_contents(self) -> bool: + return self._has_pickle_contents or self._has_cloudpickle_contents + + @property + def _has_pickle_contents(self) -> bool: return os.path.isfile(self._pickle_storage_file_path) + @property + def _has_cloudpickle_contents(self) -> bool: + return os.path.isfile(self._cloudpickle_storage_file_path) + class H5ioStorage(StorageInterface): diff --git a/pyiron_workflow/node.py b/pyiron_workflow/node.py index 678609d46..66762d158 100644 --- a/pyiron_workflow/node.py +++ b/pyiron_workflow/node.py @@ -143,7 +143,8 @@ class Node( - As long as you haven't put anything unpickleable on them, or defined them in an unpicklable place (e.g. in the `` of another function), you can simple (un)pickle nodes. There is no save/load interface for this right - now, just import pickle and do it. + now, just import pickle and do it. The "pickle" backend to the `Node.save` + method will fall back on `cloudpickle` as needed to overcome this. - Saving is triggered manually, or by setting a flag to save after the nodes runs. - At the end of instantiation, nodes will load automatically if they find saved @@ -168,8 +169,8 @@ class Node( - [ALPHA ISSUE] There are three possible back-ends for saving: one leaning on `tinybase.storage.GenericStorage` (in practice, `H5ioStorage(GenericStorage)`), and the other that uses the `h5io` module - directly. The third (default) option is to use `pickle`. The backend used - is always the one on the graph root. + directly. The third (default) option is to use `(cloud)pickle`. The backend + used is always the one on the graph root. - [ALPHA ISSUE] The `h5io` backend is deprecated -- it can't handle custom reconstructors (i.e. when `__reduce__` returns a tuple with some non-standard callable as its first entry), and basically all our nodes do diff --git a/tests/unit/test_node.py b/tests/unit/test_node.py index 42ce5fab4..a93a98e51 100644 --- a/tests/unit/test_node.py +++ b/tests/unit/test_node.py @@ -20,7 +20,7 @@ class ANode(Node): """To de-abstract the class""" def _setup_node(self) -> None: - self._inputs = Inputs(InputData("x", self, type_hint=int)) + self._inputs = Inputs(InputData("x", self, type_hint=int),) self._outputs = OutputsWithInjection( OutputDataWithInjection("y", self, type_hint=int), ) @@ -472,7 +472,29 @@ def test_storage(self): force_run.outputs.y.value, msg="Destroying the save should allow immediate re-running" ) + + hard_input = ANode(label="hard", storage_backend=backend) + hard_input.inputs.x.type_hint = callable + hard_input.inputs.x = lambda x: x * 2 + if backend == "pickle": + hard_input.save() + reloaded = ANode( + label=hard_input.label, + storage_backend=backend + ) + self.assertEqual( + reloaded.inputs.x.value(4), + hard_input.inputs.x.value(4), + msg="Cloud pickle should be strong enough to recover this" + ) + else: + with self.assertRaises( + (TypeError, AttributeError), + msg="Other backends are not powerful enough for some values" + ): + hard_input.save() finally: + hard_input.delete_storage() self.n1.delete_storage() @unittest.skipIf(sys.version_info < (3, 11), "Storage will only work in 3.11+") From 4a1c879d3ce0527eb598ce9ca4b6a3aaabbe2223 Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Thu, 8 Aug 2024 16:03:22 +0000 Subject: [PATCH 19/19] Format black --- pyiron_workflow/nodes/function.py | 3 ++- pyiron_workflow/nodes/macro.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pyiron_workflow/nodes/function.py b/pyiron_workflow/nodes/function.py index 15362df37..96d9ac57e 100644 --- a/pyiron_workflow/nodes/function.py +++ b/pyiron_workflow/nodes/function.py @@ -422,7 +422,8 @@ def decorator(node_function): node_function, validate_output_labels, use_cache, *output_labels ) factory_made._reduce_imports_as = ( - node_function.__module__, node_function.__qualname__ + node_function.__module__, + node_function.__qualname__, ) factory_made.preview_io() return factory_made diff --git a/pyiron_workflow/nodes/macro.py b/pyiron_workflow/nodes/macro.py index 27152dcc7..36886f78a 100644 --- a/pyiron_workflow/nodes/macro.py +++ b/pyiron_workflow/nodes/macro.py @@ -535,7 +535,8 @@ def decorator(graph_creator): graph_creator, validate_output_labels, use_cache, *output_labels ) factory_made._reduce_imports_as = ( - graph_creator.__module__, graph_creator.__qualname__ + graph_creator.__module__, + graph_creator.__qualname__, ) factory_made.preview_io() return factory_made