From 326bf7c4742061efa5d841ec27e9ba80b6cec647 Mon Sep 17 00:00:00 2001
From: Christophe Clienti <christophe.clienti@cfm.fr>
Date: Mon, 7 Mar 2022 14:17:16 +0100
Subject: [PATCH] fix: added missing input files in reason.updated_input in
 dag.py

Let's consider a 'A' rule that takes 'N' inputs from a 'B' rule with
different wildcards. The input function for the A rule requests output
files from 'B'.

If a first run has already generated the output of 'A' and 'B' and if
the input function of 'A' requests new input files from B not yet
generated, snakemake will neither generate missing 'B' output files
nor regenerate the 'A' output. However the list-input-changes is able
to list correctly the missing files.

The commit allows to generate missing 'B' outputs and regenerate the
'A' output.
---
 snakemake/dag.py                                   |  4 +++-
 tests/test_update_input/Snakefile                  | 14 ++++++++++++++
 tests/test_update_input/expected-results/A.txt     |  1 +
 tests/test_update_input/expected-results/B-doe.txt |  0
 .../test_update_input/expected-results/B-john.txt  |  0
 tests/tests.py                                     |  5 +++++
 6 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_update_input/Snakefile
 create mode 100644 tests/test_update_input/expected-results/A.txt
 create mode 100644 tests/test_update_input/expected-results/B-doe.txt
 create mode 100644 tests/test_update_input/expected-results/B-john.txt

diff --git a/snakemake/dag.py b/snakemake/dag.py
index d768728bbf..b0fbae1bbc 100755
--- a/snakemake/dag.py
+++ b/snakemake/dag.py
@@ -996,7 +996,9 @@ def update_needrun(job):
                 output_mintime_ = output_mintime.get(job)
                 if output_mintime_:
                     updated_input = [
-                        f for f in job.input if f.exists and f.is_newer(output_mintime_)
+                        f
+                        for f in job.input
+                        if (f.exists and f.is_newer(output_mintime_)) or (not f.exists)
                     ]
                     reason.updated_input.update(updated_input)
             if noinitreason and reason:
diff --git a/tests/test_update_input/Snakefile b/tests/test_update_input/Snakefile
new file mode 100644
index 0000000000..6204ab1037
--- /dev/null
+++ b/tests/test_update_input/Snakefile
@@ -0,0 +1,14 @@
+rule all:
+    input:
+        lambda wildcards: [rules.B.output[0].format(name=name)
+                           for name in config.get("names", "john").split(",")]
+    output:
+        "A.txt"
+
+    run:
+        f = open(output[0], "w")
+        f.write(' '.join(input) + "\n")
+
+rule B:
+    output:
+        touch("B-{name}.txt")
diff --git a/tests/test_update_input/expected-results/A.txt b/tests/test_update_input/expected-results/A.txt
new file mode 100644
index 0000000000..55da7d815e
--- /dev/null
+++ b/tests/test_update_input/expected-results/A.txt
@@ -0,0 +1 @@
+B-john.txt B-doe.txt
diff --git a/tests/test_update_input/expected-results/B-doe.txt b/tests/test_update_input/expected-results/B-doe.txt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/test_update_input/expected-results/B-john.txt b/tests/test_update_input/expected-results/B-john.txt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/tests.py b/tests/tests.py
index 63abd0edc4..0a9b47d388 100644
--- a/tests/tests.py
+++ b/tests/tests.py
@@ -1523,3 +1523,8 @@ def test_groupid_expand_cluster():
 @skip_on_windows
 def test_service_jobs():
     run(dpath("test_service_jobs"), check_md5=False)
+
+
+def test_update_input():
+    run(dpath("test_update_input"), cleanup=False, check_results=False)
+    run(dpath("test_update_input"), config={"names": "john,doe"})