Add --clean-run option to force Gen 3 to use a new run.

lsst · Mar 3, 2021 · af2a017 · af2a017
1 parent 96aecbd
commit af2a017
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 3 deletions.
diff --git a/doc/lsst.ap.verify/command-line-reference.rst b/doc/lsst.ap.verify/command-line-reference.rst
@@ -41,6 +41,21 @@ Named arguments
 
 Required arguments are :option:`--dataset` and :option:`--output`.
 
+.. option:: --clean-run
+
+   **Rerun ap_verify in a clean Gen 3 run even if the workspace already exists. (Gen 3 only)**
+
+   By default, when ``ap_verify`` is run multiple times with the same :option:`--output` workspace, the previous run collection is reused to avoid repeating processing.
+   If this is undesirable (e.g., experimental config changes), this flag creates a new run, and the pipeline is run from the beginning.
+   This flag has no effect if :option:`--output` is a fresh directory.
+
+   .. note::
+
+      The ``--clean-run`` flag does *not* reset the alert production database,
+      as this is not something that can be done without knowledge of the
+      specific database system being used. If the database has been written to
+      by a previous run, clear it by hand before running with ``--clean-run``.
+
 .. option:: --id <dataId>
 
    **Butler data ID.**

diff --git a/python/lsst/ap/verify/pipeline_driver.py b/python/lsst/ap/verify/pipeline_driver.py
@@ -68,6 +68,9 @@ def __init__(self):
         self.add_argument("--skip-pipeline", action="store_true",
                           help="Do not run the AP pipeline itself. This argument is useful "
                                "for testing metrics on a fixed data set.")
+        self.add_argument("--clean-run", action="store_true",
+                          help="Run the pipeline with a new run collection, "
+                               "even if one already exists.")
 
     class AppendOptional(argparse.Action):
         """A variant of the built-in "append" action that ignores None values
@@ -160,7 +163,7 @@ def runApPipeGen3(workspace, parsedCmdLine, processes=1):
     # TODO: collections should be determined exclusively by Workspace.workButler,
     # but I can't find a way to hook that up to the graph builder. So use the CLI
     # for now and revisit once DM-26239 is done.
-    pipelineArgs.extend(_getCollectionArguments(workspace))
+    pipelineArgs.extend(_getCollectionArguments(workspace, reuse=(not parsedCmdLine.clean_run)))
     pipelineArgs.extend(_getConfigArgumentsGen3(workspace, parsedCmdLine))
     if parsedCmdLine.dataIds:
         for singleId in parsedCmdLine.dataIds:
@@ -281,14 +284,17 @@ def _getConfigArgumentsGen3(workspace, parsed):
     return args
 
 
-def _getCollectionArguments(workspace):
+def _getCollectionArguments(workspace, reuse):
     """Return the collections for running the Gen 3 AP Pipeline on this
     workspace, as command-line arguments.
 
     Parameters
     ----------
     workspace : `lsst.ap.verify.workspace.WorkspaceGen3`
         A Workspace with a Gen 3 repository.
+    reuse : `bool`
+        If true, use the previous run collection if one exists. Otherwise,
+        create a new run.
 
     Returns
     -------
@@ -303,6 +309,6 @@ def _getCollectionArguments(workspace):
 
     registry = workspace.workButler.registry
     oldRuns = list(registry.queryCollections(re.compile(workspace.outputName + r"/\d+T\d+Z")))
-    if oldRuns:
+    if reuse and oldRuns:
         args.extend(["--extend-run", "--skip-existing"])
     return args