From 7a47924627b30dc774efef0faae08e13e44c350a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=B6lder?= Date: Fri, 23 Feb 2024 16:47:22 +0100 Subject: [PATCH] feat: add ability to return input functions from input functions. Such nesting is evaluated 10 times at most. Beyond that, an error is thrown. (#2717) ### Description Snakemake fails building the DAG when a function containing a inner-function is called within an expand-command. ### QC * [x] The PR contains a test case for the changes or the changes are already covered by an existing test case. * [x] The documentation (`docs/`) is updated to reflect the changes or this is not necessary (e.g. if the change does neither modify the language nor the behavior or functionalities of Snakemake). --------- Co-authored-by: Johannes Koester --- docs/snakefiles/rules.rst | 5 ++ snakemake/rules.py | 67 +++++++++++++------- tests/test_inner_call/Snakefile | 22 +++++++ tests/test_inner_call/a.in | 0 tests/test_inner_call/expected-results/a.txt | 0 tests/tests.py | 5 ++ 6 files changed, 75 insertions(+), 24 deletions(-) create mode 100644 tests/test_inner_call/Snakefile create mode 100644 tests/test_inner_call/a.in create mode 100644 tests/test_inner_call/expected-results/a.txt diff --git a/docs/snakefiles/rules.rst b/docs/snakefiles/rules.rst index 5d79928ad..4aa4876a8 100644 --- a/docs/snakefiles/rules.rst +++ b/docs/snakefiles/rules.rst @@ -186,6 +186,11 @@ The function has to accept a single argument that will be the wildcards object g Note that you can also use `lambda expressions `_ instead of full function definitions. By this, rules can have entirely different input files (both in form and number) depending on the inferred wildcards. E.g. you can assign input files that appear in entirely different parts of your filesystem based on some wildcard value and a dictionary that maps the wildcard value to file paths. +.. sidebar:: Note + + Input functions can themselves return input functions again (this also holds for functions given to params and resources.) + Such nested evaluation is allowed for a depth up to 10. Afterwards, an exception will be thrown. + In addition to a single wildcards argument, input functions can optionally take a ``groupid`` (with exactly that name) as second argument, see :ref:`snakefiles_group-local` for details. Finally, when implementing the input function, it is best practice to make sure that it can properly handle all possible wildcard values your rule can have. diff --git a/snakemake/rules.py b/snakemake/rules.py index b3ac2a4e4..e61477db8 100644 --- a/snakemake/rules.py +++ b/snakemake/rules.py @@ -598,7 +598,6 @@ def apply_input_function( groupid=None, **aux_params, ): - incomplete = False if isinstance(func, _IOFile): func = func._file.callable elif isinstance(func, AnnotatedString): @@ -621,29 +620,49 @@ def apply_input_function( if callable(value): _aux_params[name] = value() - try: - value = func(Wildcards(fromdict=wildcards), **_aux_params) - if isinstance(value, types.GeneratorType): - # generators should be immediately collected here, - # otherwise we would miss any exceptions and - # would have to capture them again later. - value = list(value) - except IncompleteCheckpointException as e: - value = incomplete_checkpoint_func(e) - incomplete = True - except Exception as e: - if "input" in aux_params and is_file_not_found_error( - e, aux_params["input"] - ): - # Function evaluation can depend on input files. Since expansion can happen during dryrun, - # where input files are not yet present, we need to skip such cases and - # mark them as . - value = TBDString() - elif raw_exceptions: - raise e - else: - raise InputFunctionException(e, rule=self, wildcards=wildcards) - return value, incomplete + wildcards_arg = Wildcards(fromdict=wildcards) + + def apply_func(func): + incomplete = False + try: + value = func(wildcards_arg, **_aux_params) + if isinstance(value, types.GeneratorType): + # generators should be immediately collected here, + # otherwise we would miss any exceptions and + # would have to capture them again later. + value = list(value) + except IncompleteCheckpointException as e: + value = incomplete_checkpoint_func(e) + incomplete = True + except Exception as e: + if "input" in aux_params and is_file_not_found_error( + e, aux_params["input"] + ): + # Function evaluation can depend on input files. Since expansion can happen during dryrun, + # where input files are not yet present, we need to skip such cases and + # mark them as . + value = TBDString() + elif raw_exceptions: + raise e + else: + raise InputFunctionException(e, rule=self, wildcards=wildcards) + return value, incomplete + + res = func + tries = 0 + while (callable(res) or tries == 0) and tries < 10: + res, incomplete = apply_func(res) + tries += 1 + if tries == 10: + raise WorkflowError( + "Evaluated 10 nested input functions (i.e. input functions that " + "themselves return an input function.). More than 10 such nested " + "evaluations are not allowed. Does the workflow accidentally return a " + "function instead of calling it in the input function?", + rule=self, + ) + + return res, incomplete def _apply_wildcards( self, diff --git a/tests/test_inner_call/Snakefile b/tests/test_inner_call/Snakefile new file mode 100644 index 000000000..df5e56c9a --- /dev/null +++ b/tests/test_inner_call/Snakefile @@ -0,0 +1,22 @@ +def some_b(): + def inner(wildcards): + return {wildcards.x} + return inner + +def some_a(): + def inner(wildcards): + return expand("{x}.in", x=some_b()) + return inner + +rule all: + input: + "a.txt" + +rule b: + input: + some_a() + output: + "{x}.txt" + shell: + "touch {output}" + diff --git a/tests/test_inner_call/a.in b/tests/test_inner_call/a.in new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_inner_call/expected-results/a.txt b/tests/test_inner_call/expected-results/a.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/tests.py b/tests/tests.py index 3dc86c336..2717ba87f 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -2028,3 +2028,8 @@ def test_set_resources_human_readable(): dpath("test05"), shellcmd="snakemake -c1 --set-resources \"compute1:runtime='50h'\"", ) + + +@skip_on_windows +def test_call_inner(): + run(dpath("test_inner_call"))