From 326e589e692f1ef3f18aa33bd3d158b80c7b41a7 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Mon, 9 Oct 2023 00:32:38 +0200 Subject: [PATCH] Fix setting the name of `PythonNode`. (#443) --- .readthedocs.yaml | 13 ++- docs/rtd_environment.yml | 36 -------- docs/source/changes.md | 5 + docs/source/conf.py | 1 + .../how_to_guides/writing_custom_nodes.md | 23 ++--- docs/source/reference_guides/api.md | 27 +++--- .../defining_dependencies_products.md | 4 +- environment.yml | 1 + setup.cfg | 13 +++ src/_pytask/collect.py | 40 ++++---- src/_pytask/node_protocols.py | 2 +- src/_pytask/nodes.py | 91 ++++++++++++++----- tests/test_collect_command.py | 8 +- 13 files changed, 146 insertions(+), 118 deletions(-) delete mode 100644 docs/rtd_environment.yml diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 4eb1200d..1478786a 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,12 +1,17 @@ version: 2 build: - os: "ubuntu-20.04" + os: ubuntu-22.04 tools: - python: "mambaforge-4.10" + python: "3.10" sphinx: + configuration: docs/source/conf.py fail_on_warning: true -conda: - environment: docs/rtd_environment.yml +python: + install: + - method: pip + path: . + extra_requirements: + - docs diff --git a/docs/rtd_environment.yml b/docs/rtd_environment.yml deleted file mode 100644 index 5ac57126..00000000 --- a/docs/rtd_environment.yml +++ /dev/null @@ -1,36 +0,0 @@ -channels: - - conda-forge - - nodefaults - -dependencies: - - python >=3.8 - - pip - - setuptools_scm - - toml - - # Documentation - - furo - - ipython - - nbsphinx - - myst-parser - - sphinx - - sphinx-click - - sphinx-copybutton - - sphinx-design >=0.3.0 - - sphinxext-opengraph - - # Package dependencies necessary for sphinx-click - - attrs >=21.3.0 - - click - - click-default-group - - networkx >=2.4 - - pluggy - - optree >=0.9 - - pexpect - - rich - - sqlalchemy >=1.4.36 - - tomli >=1.0.0 - - typing_extensions - - - pip: - - ../ diff --git a/docs/source/changes.md b/docs/source/changes.md index 9b92ba2c..b3cb26b7 100644 --- a/docs/source/changes.md +++ b/docs/source/changes.md @@ -5,6 +5,11 @@ chronological order. Releases follow [semantic versioning](https://semver.org/) releases are available on [PyPI](https://pypi.org/project/pytask) and [Anaconda.org](https://anaconda.org/conda-forge/pytask). +## 0.4.1 - 2023-10-08 + +- {pull}`443` ensures that `PythonNode.name` is always unique by only handling it + internally. + ## 0.4.0 - 2023-10-07 - {pull}`323` remove Python 3.7 support and use a new Github action to provide mamba. diff --git a/docs/source/conf.py b/docs/source/conf.py index 981b8d39..ee6e4b72 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -41,6 +41,7 @@ "sphinx.ext.viewcode", "sphinx_copybutton", "sphinx_click", + "sphinx_toolbox.more_autodoc.autoprotocol", "nbsphinx", "myst_parser", "sphinx_design", diff --git a/docs/source/how_to_guides/writing_custom_nodes.md b/docs/source/how_to_guides/writing_custom_nodes.md index bbaadc7c..fbd92867 100644 --- a/docs/source/how_to_guides/writing_custom_nodes.md +++ b/docs/source/how_to_guides/writing_custom_nodes.md @@ -1,8 +1,8 @@ # Writing custom nodes In the previous tutorials and how-to guides, you learned that dependencies and products -can be represented as plain Python objects with {class}`pytask.PythonNode` or as paths -where every {class}`pathlib.Path` is converted to a {class}`pytask.PathNode`. +can be represented as plain Python objects with {class}`~pytask.PythonNode` or as paths +where every {class}`pathlib.Path` is converted to a {class}`~pytask.PathNode`. In this how-to guide, you will learn about the general concept of nodes and how to write your own to improve your workflows. @@ -54,13 +54,13 @@ A custom node needs to follow an interface so that pytask can perform several ac - Load and save values when tasks are executed. This interface is defined by protocols [^structural-subtyping]. A custom node must -follow at least the protocol {class}`pytask.PNode` or, even better, -{class}`pytask.PPathNode` if it is based on a path. The common node for paths, -{class}`pytask.PathNode`, follows the protocol {class}`pytask.PPathNode`. +follow at least the protocol {class}`~pytask.PNode` or, even better, +{class}`~pytask.PPathNode` if it is based on a path. The common node for paths, +{class}`~pytask.PathNode`, follows the protocol {class}`~pytask.PPathNode`. ## `PickleNode` -Since our {class}`PickleNode` will only vary slightly from {class}`pytask.PathNode`, we +Since our {class}`PickleNode` will only vary slightly from {class}`~pytask.PathNode`, we use it as a template, and with some minor modifications, we arrive at the following class. @@ -85,8 +85,8 @@ class. Here are some explanations. -- The node does not need to inherit from the protocol {class}`pytask.PPathNode`, but you - can do it to be more explicit. +- The node does not need to inherit from the protocol {class}`~pytask.PPathNode`, but + you can do it to be more explicit. - The node has two attributes - `name` identifies the node in the DAG, so the name must be unique. - `path` holds the path to the file and identifies the node as a path node that is @@ -107,9 +107,10 @@ Nodes are an important in concept pytask. They allow to pytask to build a DAG an generate a workflow, and they also allow users to extract IO operations from the task function into the nodes. -pytask only implements two node types, {class}`PathNode` and {class}`PythonNode`, but -many more are possible. In the future, there should probably be a plugin that implements -nodes for many other data sources like AWS S3 or databases. [^kedro] +pytask only implements two node types, {class}`~pytask.PathNode` and +{class}`~pytask.PythonNode`, but many more are possible. In the future, there should +probably be a plugin that implements nodes for many other data sources like AWS S3 or +databases. [^kedro] ## References diff --git a/docs/source/reference_guides/api.md b/docs/source/reference_guides/api.md index bad81a53..1bcc16fa 100644 --- a/docs/source/reference_guides/api.md +++ b/docs/source/reference_guides/api.md @@ -239,25 +239,30 @@ The remaining exceptions convey specific errors. ``` -## Nodes +## Protocols -Nodes are the interface for different kinds of dependencies or products. They inherit -from {class}`pytask.MetaNode`. +Protocols define how tasks and nodes for dependencies and products have to be set up. ```{eval-rst} -.. autoclass:: pytask.MetaNode +.. autoprotocol:: pytask.MetaNode + :show-inheritance: +.. autoprotocol:: pytask.PNode + :show-inheritance: +.. autoprotocol:: pytask.PPathNode + :show-inheritance: +.. autoprotocol:: pytask.PTask + :show-inheritance: +.. autoprotocol:: pytask.PTaskWithPath + :show-inheritance: ``` -Then, different kinds of nodes can be implemented. +## Nodes -```{eval-rst} -.. autoclass:: pytask.PathNode - :members: -``` +Nodes are the interface for different kinds of dependencies or products. ```{eval-rst} +.. autoclass:: pytask.PathNode .. autoclass:: pytask.PythonNode - :members: ``` To parse dependencies and products from nodes, use the following functions. @@ -357,8 +362,6 @@ There are some classes to handle different kinds of reports. An indicator to mark arguments of tasks as products. - Examples - -------- >>> def task_example(path: Annotated[Path, Product]) -> None: ... path.write_text("Hello, World!") diff --git a/docs/source/tutorials/defining_dependencies_products.md b/docs/source/tutorials/defining_dependencies_products.md index fa75bad6..97927b64 100644 --- a/docs/source/tutorials/defining_dependencies_products.md +++ b/docs/source/tutorials/defining_dependencies_products.md @@ -320,7 +320,7 @@ Secondly, dictionaries use keys instead of positions that are more verbose and descriptive and do not assume a fixed ordering. Both attributes are especially desirable in complex projects. -## Multiple decorators +**Multiple decorators** pytask merges multiple decorators of one kind into a single dictionary. This might help you to group dependencies and apply them to multiple tasks. @@ -344,7 +344,7 @@ Inside the task, `depends_on` will be {"first_text": ... / "text_1.txt", "second_text": "text_2.txt", 0: "text_3.txt"} ``` -## Nested dependencies and products +**Nested dependencies and products** Dependencies and products can be nested containers consisting of tuples, lists, and dictionaries. It is beneficial if you want more structure and nesting. diff --git a/environment.yml b/environment.yml index d0637049..b5fcff33 100644 --- a/environment.yml +++ b/environment.yml @@ -47,4 +47,5 @@ dependencies: - sphinxext-opengraph - pip: + - sphinx-toolbox - -e . diff --git a/setup.cfg b/setup.cfg index 79b22790..ffb6055e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,6 +54,19 @@ where = src console_scripts = pytask=_pytask.cli:cli +[options.extras_require] +docs = + furo + ipython + myst-parser + nbsphinx + sphinx + sphinx-click + sphinx-copybutton + sphinx-design>=0.3.0 + sphinx-toolbox + sphinxext-opengraph + [check-manifest] ignore = src/_pytask/_version.py diff --git a/src/_pytask/collect.py b/src/_pytask/collect.py index 16f7d3a8..c699dce1 100644 --- a/src/_pytask/collect.py +++ b/src/_pytask/collect.py @@ -325,20 +325,8 @@ def pytask_collect_node(session: Session, path: Path, node_info: NodeInfo) -> PN node = node_info.value if isinstance(node, PythonNode): - prefix = ( - node_info.task_path.as_posix() + "::" + node_info.task_name - if node_info.task_path - else node_info.task_name - ) - if node.name: - node.name = prefix + "::" + node.name - else: - node.name = prefix + "::" + node_info.arg_name - - suffix = "-".join(map(str, node_info.path)) if node_info.path else "" - if suffix: - node.name += "::" + suffix - + node_name = _create_name_of_python_node(node_info) + node.name = node_name return node if isinstance(node, PPathNode) and not node.path.is_absolute(): @@ -366,15 +354,7 @@ def pytask_collect_node(session: Session, path: Path, node_info: NodeInfo) -> PN ) return PathNode.from_path(node) - prefix = ( - node_info.task_path.as_posix() + "::" + node_info.task_name - if node_info.task_path - else node_info.task_name - ) - node_name = prefix + "::" + node_info.arg_name - suffix = "-".join(map(str, node_info.path)) if node_info.path else "" - if suffix: - node_name += "::" + suffix + node_name = _create_name_of_python_node(node_info) return PythonNode(value=node, name=node_name) @@ -514,3 +494,17 @@ def pytask_collect_log( ) raise CollectionError + + +def _create_name_of_python_node(node_info: NodeInfo) -> str: + """Create name of PythonNode.""" + prefix = ( + node_info.task_path.as_posix() + "::" + node_info.task_name + if node_info.task_path + else node_info.task_name + ) + node_name = prefix + "::" + node_info.arg_name + if node_info.path: + suffix = "-".join(map(str, node_info.path)) + node_name += "::" + suffix + return node_name diff --git a/src/_pytask/node_protocols.py b/src/_pytask/node_protocols.py index 78c9deb3..3ab328ea 100644 --- a/src/_pytask/node_protocols.py +++ b/src/_pytask/node_protocols.py @@ -22,7 +22,7 @@ class MetaNode(Protocol): """Protocol for an intersection between nodes and tasks.""" name: str - """The name of node that must be unique.""" + """Name of the node that must be unique.""" @abstractmethod def state(self) -> str | None: diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index c051f3f1..c7d4fd29 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -33,22 +33,33 @@ class TaskWithoutPath(PTask): - they are dynamically created in a REPL. - they are created in a Jupyter notebook. + Attributes + ---------- + name + The name of the task. + function + The task function. + depends_on + A list of dependencies of task. + produces + A list of products of task. + markers + A list of markers attached to the task function. + report_sections + Reports with entries for when, what, and content. + + Attributes + ---------- + A dictionary to store additional information of the task. """ name: str - """The base name of the task.""" function: Callable[..., Any] - """The task function.""" depends_on: dict[str, PyTree[PNode]] = field(factory=dict) - """A list of dependencies of task.""" produces: dict[str, PyTree[PNode]] = field(factory=dict) - """A list of products of task.""" markers: list[Mark] = field(factory=list) - """A list of markers attached to the task function.""" report_sections: list[tuple[str, str, str]] = field(factory=list) - """Reports with entries for when, what, and content.""" attributes: dict[Any, Any] = field(factory=dict) - """A dictionary to store additional information of the task.""" def state(self) -> str | None: """Return the state of the node.""" @@ -66,28 +77,43 @@ def execute(self, **kwargs: Any) -> None: @define(kw_only=True) class Task(PTaskWithPath): - """The class for tasks which are Python functions.""" + """The class for tasks which are Python functions. + + base_name + The base name of the task. + path + Path to the file where the task was defined. + function + The task function. + name + The name of the task. + display_name + The shortest uniquely identifiable name for task for display. + depends_on + A list of dependencies of task. + produces + A list of products of task. + markers + A list of markers attached to the task function. + report_sections + Reports with entries for when, what, and content. + + Attributes + ---------- + A dictionary to store additional information of the task. + + """ base_name: str - """The base name of the task.""" path: Path - """Path to the file where the task was defined.""" function: Callable[..., Any] - """The task function.""" name: str = field(default="", init=False) - """The name of the task.""" display_name: str = field(default="", init=False) - """The shortest uniquely identifiable name for task for display.""" depends_on: dict[str, PyTree[PNode]] = field(factory=dict) - """A list of dependencies of task.""" produces: dict[str, PyTree[PNode]] = field(factory=dict) - """A list of products of task.""" markers: list[Mark] = field(factory=list) - """A list of markers attached to the task function.""" report_sections: list[tuple[str, str, str]] = field(factory=list) - """Reports with entries for when, what, and content.""" attributes: dict[Any, Any] = field(factory=dict) - """A dictionary to store additional information of the task.""" def __attrs_post_init__(self: Task) -> None: """Change class after initialization.""" @@ -110,12 +136,19 @@ def execute(self, **kwargs: Any) -> None: @define(kw_only=True) class PathNode(PPathNode): - """The class for a node which is a path.""" + """The class for a node which is a path. + + Attributes + ---------- + name + Name of the node which makes it identifiable in the DAG. + path + The path to the file. + + """ name: str - """Name of the node which makes it identifiable in the DAG.""" path: Path - """The path to the file.""" @classmethod @functools.lru_cache @@ -157,14 +190,22 @@ def save(self, value: bytes | str) -> None: @define(kw_only=True) class PythonNode(PNode): - """The class for a node which is a Python object.""" + """The class for a node which is a Python object. + + Attributes + ---------- + name + Name of the node that is set internally. + value + Value of the node. + hash + Whether the value should be hashed to determine the state. + + """ name: str = "" - """Name of the node.""" value: Any = None - """Value of the node.""" hash: bool | Callable[[Any], bool] = False # noqa: A003 - """Whether the value should be hashed to determine the state.""" def load(self) -> Any: """Load the value.""" diff --git a/tests/test_collect_command.py b/tests/test_collect_command.py index 553e9201..0f7e017d 100644 --- a/tests/test_collect_command.py +++ b/tests/test_collect_command.py @@ -626,7 +626,7 @@ def task_example() -> Annotated[Dict[str, str], nodes]: tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) result = runner.invoke(cli, ["collect", "--nodes", tmp_path.as_posix()]) assert result.exit_code == ExitCode.OK - assert "dict" in result.output - assert "tuple1" in result.output - assert "tuple2" in result.output - assert "int" in result.output + assert "return::0" in result.output + assert "return::1-0" in result.output + assert "return::1-1" in result.output + assert "return::2" in result.output