lsst · andy-slac · Aug 12, 2021 · Jul 27, 2021 · Aug 2, 2021 · Aug 9, 2021
diff --git a/doc/changes/DM-27492.feature.md b/doc/changes/DM-27492.feature.md
@@ -0,0 +1,8 @@
+Several improvements in `pipetask` execution options:
+- New option `--skip-existing-in` which takes collection names(s), if output
+  datasets already exist in those collections corresponding quanta is skipped.
+- A `--skip-existing` option is now equivalent to appending output run
+  collection to the `--skip-existing-in` list.
+- An `--extend-run` option implicitly enables `--skip-existing` option.
+- A `--prune-replaced=unstore` option only removes regular output datasets;
+  InitOutputs, task configs, and package versions are not removed.
diff --git a/python/lsst/ctrl/mpexec/cli/opt/optionGroups.py b/python/lsst/ctrl/mpexec/cli/opt/optionGroups.py
@@ -67,6 +67,7 @@ def __init__(self):
             ctrlMpExecOpts.qgraph_option(),
             ctrlMpExecOpts.qgraph_id_option(),
             ctrlMpExecOpts.qgraph_node_id_option(),
+            ctrlMpExecOpts.skip_existing_in_option(),
             ctrlMpExecOpts.skip_existing_option(),
             ctrlMpExecOpts.clobber_outputs_option(),
             ctrlMpExecOpts.save_qgraph_option(),

diff --git a/python/lsst/ctrl/mpexec/cli/opt/options.py b/python/lsst/ctrl/mpexec/cli/opt/options.py
@@ -84,8 +84,8 @@ def _to_int(value):
 extend_run_option = MWOptionDecorator("--extend-run",
                                       help=unwrap("""Instead of creating a new RUN collection, insert datasets
                                                   into either the one given by --output-run (if provided) or
-                                                  the first child collection of - -output(which must be of
-                                                  type RUN)."""),
+                                                  the first child collection of --output (which must be of
+                                                  type RUN). This also enables --skip-existing option."""),
                                       is_flag=True)
 
 
@@ -244,19 +244,37 @@ def _to_int(value):
                                 multiple=True)
 
 
-skip_existing_option = MWOptionDecorator("--skip-existing",
-                                         help=unwrap("""If all Quantum outputs already exist in the output RUN
-                                                     collection then that Quantum will be excluded from the
-                                                     QuantumGraph. Requires the 'run` command's `--extend-run`
-                                                     flag to be set."""),
-                                         is_flag=True)
+skip_existing_in_option = MWOptionDecorator(
+    "--skip-existing-in",
+    callback=split_commas,
+    default=None,
+    metavar="COLLECTION",
+    multiple=True,
+    help=unwrap(
+        """If all Quantum outputs already exist in the specified list of
+        collections then that Quantum will be excluded from the QuantumGraph.
+        """
+    )
+)
+
+
+skip_existing_option = MWOptionDecorator(
+    "--skip-existing",
+    is_flag=True,
+    help=unwrap(
+        """This option is equivalent to --skip-existing-in with the name of
+        the output RUN collection. If both --skip-existing-in and
+        --skip-existing are given then output RUN collection is appended to
+        the list of collections."""
+    )
+)
 
 
 clobber_outputs_option = MWOptionDecorator("--clobber-outputs",
                                            help=unwrap("""Remove outputs from previous execution of the same
-                                                       quantum before new execution.  If `--skip-existing`
+                                                       quantum before new execution.  If --skip-existing
                                                        is also passed, then only failed quanta will be
-                                                       clobbered. Requires the 'run` command's `--extend-run`
+                                                       clobbered. Requires the 'run' command's --extend-run
                                                        flag to be set."""),
                                            is_flag=True)
 

diff --git a/python/lsst/ctrl/mpexec/cli/script/qgraph.py b/python/lsst/ctrl/mpexec/cli/script/qgraph.py
@@ -27,9 +27,10 @@
 _log = logging.getLogger(__name__.partition(".")[2])
 
 
-def qgraph(pipelineObj, qgraph, qgraph_id, qgraph_node_id, skip_existing, save_qgraph, save_single_quanta,
-           qgraph_dot, butler_config, input, output, output_run, extend_run, replace_run, prune_replaced,
-           data_query, show, save_execution_butler, clobber_execution_butler, clobber_outputs, **kwargs):
+def qgraph(pipelineObj, qgraph, qgraph_id, qgraph_node_id, skip_existing_in, skip_existing, save_qgraph,
+           save_single_quanta, qgraph_dot, butler_config, input, output, output_run, extend_run,
+           replace_run, prune_replaced, data_query, show, save_execution_butler, clobber_execution_butler,
+           clobber_outputs, **kwargs):
     """Implements the command line interface `pipetask qgraph` subcommand,
     should only be called by command line tools and unit test code that test
     this function.
@@ -48,10 +49,12 @@ def qgraph(pipelineObj, qgraph, qgraph_id, qgraph_node_id, skip_existing, save_q
     qgraph_node_id : `list` of `int`, optional
         Only load a specified set of nodes if graph is loaded from a file,
         nodes are identified by integer IDs.
+    skip_existing_in : `list` [ `str` ]
+        Accepts list of collections, if all Quantum outputs already exist in
+        the specified list of collections then that Quantum will be excluded
+        from the QuantumGraph.
     skip_existing : `bool`
-        If all Quantum outputs already exist in the output RUN collection then
-        that Quantum will be excluded from the QuantumGraph. Will only be used
-        if `extend_run` flag is set.
+        Appends output RUN collection to the ``skip_existing_in`` list.
     save_qgraph : `str` or `None`
         URI location for storing a serialized quantum graph definition as a
         pickle file.
@@ -67,10 +70,8 @@ def qgraph(pipelineObj, qgraph, qgraph_id, qgraph_node_id, skip_existing, save_q
         butler/registry config file. If `dict`, `butler_config` is key value
         pairs used to init or update the `lsst.daf.butler.Config` instance. If
         `Config`, it is the object used to configure a Butler.
-    input : `str`
-        Comma-separated names of the input collection(s). Entries may include a
-        colon (:), the first string is a dataset type name that restricts the
-        search in that collection.
+    input : `list` [ `str` ]
+        List of names of the input collection(s).
     output : `str`
         Name of the output CHAINED collection. This may either be an existing
         CHAINED collection to use as both input and output (if `input` is
@@ -136,6 +137,7 @@ def qgraph(pipelineObj, qgraph, qgraph_id, qgraph_node_id, skip_existing, save_q
                            prune_replaced=prune_replaced,
                            data_query=data_query,
                            show=show,
+                           skip_existing_in=skip_existing_in,
                            skip_existing=skip_existing,
                            execution_butler_location=save_execution_butler,
                            clobber_execution_butler=clobber_execution_butler,

diff --git a/python/lsst/ctrl/mpexec/cli/script/run.py b/python/lsst/ctrl/mpexec/cli/script/run.py
@@ -46,6 +46,7 @@ def run(do_raise,
         replace_run,
         prune_replaced,
         data_query,
+        skip_existing_in,
         skip_existing,
         debug,
         fail_fast,
@@ -89,10 +90,8 @@ def run(do_raise,
         butler/registry config file. If `dict`, `butler_config` is key value
         pairs used to init or update the `lsst.daf.butler.Config` instance. If
         `Config`, it is the object used to configure a Butler.
-    input : `str`
-        Comma-separated names of the input collection(s). Entries may include a
-        colon (:), the first string is a dataset type name that restricts the
-        search in that collection.
+    input : `list` [ `str` ]
+        List of names of the input collection(s).
     output : `str`
         Name of the output CHAINED collection. This may either be an existing
         CHAINED collection to use as both input and output (if `input` is
@@ -122,10 +121,12 @@ def run(do_raise,
         removing them and the RUN completely ("purge"). Requires `replace_run`.
     data_query : `str`
         User query selection expression.
+    skip_existing_in : `list` [ `str` ]
+        Accepts list of collections, if all Quantum outputs already exist in
+        the specified list of collections then that Quantum will be excluded
+        from the QuantumGraph.
     skip_existing : `bool`
-        If all Quantum outputs already exist in the output RUN collection then
-        that Quantum will be excluded from the QuantumGraph. Requires the 'run`
-        command's `--extend-run` flag to be set.
+        Appends output RUN collection to the ``skip_existing_in`` list.
     debug : `bool`
         If true, enable debugging output using lsstDebug facility (imports
         debug.py).
@@ -159,6 +160,7 @@ def run(do_raise,
                            replace_run=replace_run,
                            prune_replaced=prune_replaced,
                            data_query=data_query,
+                           skip_existing_in=skip_existing_in,
                            skip_existing=skip_existing,
                            enableLsstDebug=debug,
                            fail_fast=fail_fast,