Merge 8d7256c into b65d3a6

pytask-dev · Sep 30, 2023 · 2e5f425 · 2e5f425
2 parents b65d3a6 + 8d7256c
commit 2e5f425
Show file tree

Hide file tree

Showing 50 changed files with 658 additions and 1,483 deletions.
diff --git a/docs/source/changes.md b/docs/source/changes.md
@@ -48,6 +48,7 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and
 - {pull}`428` updates the example in the readme.
 - {pull}`429` implements a more informative error message when `node.state()` throws an
   exception. Now, it is easy to see which tasks are affected.
+- {pull}`430` updates some parts of the documentation.
 
 ## 0.3.2 - 2023-06-07
 

diff --git a/docs/source/explanations/index.md b/docs/source/explanations/index.md
@@ -8,6 +8,7 @@ systems in general as well as its design.
 maxdepth: 1
 ---
 why_pytask
+interfaces_for_dependencies_products
 comparison_to_other_tools
 pluggy
 ```
diff --git a/docs/source/explanations/interfaces_for_dependencies_products.md b/docs/source/explanations/interfaces_for_dependencies_products.md
@@ -0,0 +1,30 @@
+# Interfaces for dependencies and products
+
+There are different interfaces for dependencies and products and it might be confusing
+when to use what. The tables gives you an overview to decide which interface is most
+suitable for you.
+
+## Legend
+
+- ✅ = True
+- ❌ = False
+- ➖ = Does not apply
+
+## Products
+
+|                                                           | `Annotated[..., PNode, Product]` | `@task(produces=...)` | `produces` | `defd task() -> Annotated[..., PNode]` | `@pytask.mark.produces(...)` |
+| --------------------------------------------------------- | :------------------------------: | :-------------------: | :--------: | :------------------------------------: | :--------------------------: |
+| Not deprecated                                            |                ✅                 |           ✅           |     ✅      |                   ✅                    |              ❌               |
+| No type annotations required                              |                ❌                 |           ✅           |     ✅      |                   ❌                    |              ✅               |
+| Flexible choice of argument name                          |                ✅                 |           ✅           |     ❌      |                   ➖                    |              ❌               |
+| Supports third-party functions as tasks                   |                ❌                 |           ✅           |     ❌      |                   ❌                    |              ❌               |
+| Allows to pass custom node while preserving type of value |                ✅                 |           ✅           |     ✅      |                   ✅                    |              ✅               |
+
+## Dependencies
+
+|                                         | `Annotated[..., PNode]` | `@task(kwargs=...)` | `@pytask.mark.depends_on(...)` |
+| --------------------------------------- | :---------------------: | :-----------------: | :----------------------------: |
+| Not deprecated                          |            ✅            |          ✅          |               ❌                |
+| No type annotations required            |            ❌            |          ✅          |               ✅                |
+| Flexible choice of argument name        |            ✅            |          ✅          |               ❌                |
+| Supports third-party functions as tasks |            ❌            |          ✅          |               ❌                |
diff --git a/docs/source/how_to_guides/bp_scalable_repetitions_of_tasks.md b/docs/source/how_to_guides/bp_scalable_repetitions_of_tasks.md
@@ -62,54 +62,12 @@ What is new are the local configuration files in each subfolder of `my_project`,
 contain objects shared across tasks. For example, `config.py` holds the paths to the
 processed data and the names of the data sets.
 
-```python
-# Content of config.py
-
-from my_project.config import BLD
-from my_project.config import SRC
-
-
-DATA = ["data_0", "data_1", "data_2", "data_3"]
-
-
-def path_to_input_data(name):
-    return SRC / "data" / f"{name}.csv"
-
-
-def path_to_processed_data(name):
-    return BLD / "data" / f"processed_{name}.pkl"
+```{literalinclude} ../../../docs_src/how_to_guides/bp_scalable_repetitions_of_tasks_1.py
 ```
 
 The task file `task_prepare_data.py` uses these objects to build the parametrization.
 
-```python
-# Content of task_prepare_data.py
-
-from pytask import task
-
-from my_project.data_preparation.config import DATA
-from my_project.data_preparation.config import path_to_input_data
-from my_project.data_preparation.config import path_to_processed_data
-
-
-def _create_parametrization(data):
-    id_to_kwargs = {}
-    for data_name in data:
-        depends_on = path_to_input_data(data_name)
-        produces = path_to_processed_data(data_name)
-
-        id_to_kwargs[data_name] = {"depends_on": depends_on, "produces": produces}
-
-    return id_to_kwargs
-
-
-_ID_TO_KWARGS = _create_parametrization(DATA)
-
-for id_, kwargs in _ID_TO_KWARGS.items():
-
-    @task(id=id_, kwargs=kwargs)
-    def task_prepare_data(depends_on, produces):
-        ...
+```{literalinclude} ../../../docs_src/how_to_guides/bp_scalable_repetitions_of_tasks_2.py
 ```
 
 All arguments for the loop and the {func}`@task <pytask.task>` decorator is built within
@@ -127,25 +85,7 @@ an explicit id.
 Next, we move to the estimation to see how we can build another parametrization upon the
 previous one.
 
-```python
-# Content of config.py
-
-from my_project.config import BLD
-from my_project.data_preparation.config import DATA
-
-
-_MODELS = ["linear_probability", "logistic_model", "decision_tree"]
-
-
-ESTIMATIONS = {
-    f"{data_name}_{model_name}": {"model": model_name, "data": data_name}
-    for model_name in _MODELS
-    for data_name in DATA
-}
-
-
-def path_to_estimation_result(name):
-    return BLD / "estimation" / f"estimation_{name}.pkl"
+```{literalinclude} ../../../docs_src/how_to_guides/bp_scalable_repetitions_of_tasks_3.py
 ```
 
 In the local configuration, we define `ESTIMATIONS` which combines the information on
@@ -159,40 +99,7 @@ pytask -k linear_probability_data_0
 
 And here is the task file.
 
-```python
-# Content of task_estimate_models.py
-
-from pytask import task
-
-from my_project.data_preparation.config import path_to_processed_data
-from my_project.estimations.config import ESTIMATIONS
-from my_project.estimations.config import path_to_estimation_result
-
-
-def _create_parametrization(estimations):
-    id_to_kwargs = {}
-    for name, config in estimations.items():
-        depends_on = path_to_processed_data(config["data"])
-        produces = path_to_estimation_result(name)
-
-        id_to_kwargs[name] = {
-            "depends_on": depends_on,
-            "model": config["model"],
-            "produces": produces,
-        }
-
-    return id_to_kwargs
-
-
-_ID_TO_KWARGS = _create_parametrization(ESTIMATIONS)
-
-
-for id_, kwargs in _ID_TO_KWARGS.items():
-
-    @task(id=id_, kwargs=kwars)
-    def task_estmate_models(depends_on, model, produces):
-        if model == "linear_probability":
-            ...
+```{literalinclude} ../../../docs_src/how_to_guides/bp_scalable_repetitions_of_tasks_4.py
 ```
 
 Replicating this pattern across a project allows a clean way to define parametrizations.

diff --git a/docs/source/how_to_guides/bp_structure_of_task_files.md b/docs/source/how_to_guides/bp_structure_of_task_files.md
@@ -72,44 +72,7 @@ leading underscore which are used to accomplish this and only this task.
 
 Here is an example of a task module which conforms to all advices.
 
-```python
-# Content of task_census_data.py.
-
-import pandas as pd
-import pytask
-
-from checks import perform_general_checks_on_data
-
-
-@pytask.mark.depends_on("raw_census.csv")
-@pytask.mark.produces("census.pkl")
-def task_prepare_census_data(depends_on, produces):
-    """Prepare the census data.
-
-    This task prepares the data in three steps.
-
-    1. Clean the data.
-    2. Create new variables.
-    3. Perform some checks on the new data.
-
-    """
-    df = pd.read_csv(depends_on)
-
-    df = _clean_data(df)
-
-    df = _create_new_variables(df)
-
-    perform_general_checks_on_data(df)
-
-    df.to_pickle(produces)
-
-
-def _clean_data(df):
-    ...
-
-
-def _create_new_variables(df):
-    ...
+```{literalinclude} ../../../docs_src/how_to_guides/bp_structure_of_task_files.py
 ```
 
 :::{seealso}

diff --git a/docs/source/how_to_guides/capture_warnings.md b/docs/source/how_to_guides/capture_warnings.md
@@ -4,21 +4,7 @@ pytask captures warnings during the execution.
 
 Here is an example with the most infamous warning in the world of scientific Python.
 
-```python
-import pandas as pd
-import pytask
-
-
-def _create_df():
-    df = pd.DataFrame({"a": range(10), "b": range(10, 20)})
-    df[df["a"] < 5]["b"] = 1
-    return df
-
-
-@pytask.mark.products("df.pkl")
-def task_warning(produces):
-    df = _create_df()
-    df.to_pickle(produces)
+```{literalinclude} ../../../docs_src/how_to_guides/capturing_warnings_1.py
 ```
 
 Running pytask produces
@@ -66,22 +52,7 @@ You can use the `@pytask.mark.filterwarnings` to add warning filters to specific
 items, allowing you to have finer control of which warnings should be captured at test,
 class or even module level:
 
-```python
-import pandas as pd
-import pytask
-
-
-def _create_df():
-    df = pd.DataFrame({"a": range(10), "b": range(10, 20)})
-    df[df["a"] < 5]["b"] = 1
-    return df
-
-
-@pytask.mark.filterwarnings("ignore:.*:SettingWithCopyWarning")
-@pytask.mark.products("df.pkl")
-def task_warning(produces):
-    df = _create_df()
-    df.to_pickle(produces)
+```{literalinclude} ../../../docs_src/how_to_guides/capturing_warnings_2.py
 ```
 
 Filters applied using a mark take precedence over filters passed on the command line or