Merge branch 'tickets/DM-27154' into master

lsst · Nov 17, 2020 · 6353d3e · 6353d3e
2 parents dcdd93c + 5e9389d
commit 6353d3e
Show file tree

Hide file tree

Showing 13 changed files with 647 additions and 312 deletions.
diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
@@ -24,12 +24,20 @@
 """
 from __future__ import annotations
 
-__all__ = ("Butler", "ButlerValidationError")
+__all__ = (
+    "Butler",
+    "ButlerValidationError",
+    "PruneCollectionsArgsError",
+    "PurgeWithoutUnstorePruneCollectionsError",
+    "RunWithoutPurgePruneCollectionsError",
+    "PurgeUnsupportedPruneCollectionsError",
+)
+
 
-import os
 from collections import defaultdict
 import contextlib
 import logging
+import os
 from typing import (
     Any,
     ClassVar,
@@ -84,6 +92,40 @@ class ButlerValidationError(ValidationError):
     pass
 
 
+class PruneCollectionsArgsError(TypeError):
+    """Base class for errors relating to Butler.pruneCollections input
+    arguments.
+    """
+    pass
+
+
+class PurgeWithoutUnstorePruneCollectionsError(PruneCollectionsArgsError):
+    """Raised when purge and unstore are both required to be True, and
+    purge is True but unstore is False.
+    """
+
+    def __init__(self):
+        super().__init__("Cannot pass purge=True without unstore=True.")
+
+
+class RunWithoutPurgePruneCollectionsError(PruneCollectionsArgsError):
+    """Raised when pruning a RUN collection but purge is False."""
+
+    def __init__(self, collectionType):
+        self.collectionType = collectionType
+        super().__init__(f"Cannot prune RUN collection {self.collectionType.name} without purge=True.")
+
+
+class PurgeUnsupportedPruneCollectionsError(PruneCollectionsArgsError):
+    """Raised when purge is True but is not supported for the given
+    collection."""
+
+    def __init__(self, collectionType):
+        self.collectionType = collectionType
+        super().__init__(
+            f"Cannot prune {self.collectionType} collection {self.collectionType.name} with purge=True.")
+
+
 class Butler:
     """Main entry point for the data access system.
 
@@ -1068,19 +1110,21 @@ def pruneCollection(self, name: str, purge: bool = False, unstore: bool = False)
             Raised if the butler is read-only or arguments are mutually
             inconsistent.
         """
+
         # See pruneDatasets comments for more information about the logic here;
         # the cases are almost the same, but here we can rely on Registry to
         # take care everything but Datastore deletion when we remove the
         # collection.
         if not self.isWriteable():
             raise TypeError("Butler is read-only.")
-        if purge and not unstore:
-            raise TypeError("Cannot pass purge=True without unstore=True.")
         collectionType = self.registry.getCollectionType(name)
+        if purge and not unstore:
+            raise PurgeWithoutUnstorePruneCollectionsError()
         if collectionType is CollectionType.RUN and not purge:
-            raise TypeError(f"Cannot prune RUN collection {name} without purge=True.")
+            raise RunWithoutPurgePruneCollectionsError(collectionType)
         if collectionType is not CollectionType.RUN and purge:
-            raise TypeError(f"Cannot prune {collectionType.name} collection {name} with purge=True.")
+            raise PurgeUnsupportedPruneCollectionsError(collectionType)
+
         with self.registry.transaction():
             if unstore:
                 for ref in self.registry.queryDatasets(..., collections=name, findFirst=True):

diff --git a/python/lsst/daf/butler/cli/cmd/commands.py b/python/lsst/daf/butler/cli/cmd/commands.py
@@ -24,6 +24,7 @@
 
 from ..opt import (
     collection_type_option,
+    collection_argument,
     collections_option,
     dataset_type_option,
     datasets_option,
@@ -37,7 +38,7 @@
     where_option,
 )
 
-from ..utils import cli_handle_exception, split_commas, typeStrAcceptsMultiple, unwrap
+from ..utils import cli_handle_exception, split_commas, to_upper, typeStrAcceptsMultiple, unwrap
 from ... import script
 
 
@@ -122,11 +123,10 @@ def config_validate(*args, **kwargs):
 
 @click.command()
 @repo_argument(required=True)
-@click.option("--collection",
-              help=unwrap("""Name of the collection to remove. If this is a TAGGED or CHAINED collection,
-                          datasets within the collection are not modified unless --unstore is passed. If this
-                          is a RUN collection, --purge and --unstore must be passed, and all datasets in it
-                          are fully removed from the data repository. """))
+@collection_argument(help=unwrap("""COLLECTION is the Name of the collection to remove. If this is a tagged or
+                          chained collection, datasets within the collection are not modified unless --unstore
+                          is passed. If this is a run collection, --purge and --unstore must be passed, and
+                          all datasets in it are fully removed from the data repository."""))
 @click.option("--purge",
               help=unwrap("""Permit RUN collections to be removed, fully removing datasets within them.
                           Requires --unstore as an added precaution against accidental deletion. Must not be
@@ -146,19 +146,25 @@ def prune_collection(**kwargs):
 @glob_argument(help="GLOB is one or more glob-style expressions that fully or partially identify the "
                     "collections to return.")
 @collection_type_option()
-@click.option("--flatten-chains/--no-flatten-chains",
-              help="Recursively get the child collections of matching CHAINED collections. Default is "
-                   "--no-flatten-chains.")
-@click.option("--include-chains/--no-include-chains",
-              default=None,
-              help="For --include-chains, return records for matching CHAINED collections. For "
-                   "--no-include-chains do not return records for CHAINED collections. Default is the "
-                   "opposite of --flatten-chains: include either CHAINED collections or their children, but "
-                   "not both.")
+@click.option("--chains",
+              default="table",
+              help=unwrap("""Affects how results are presented. TABLE lists each dataset in a row with
+                          chained datasets' children listed in a Definition column. TREE lists children below
+                          their parent in tree form. FLATTEN lists all datasets, including child datasets in
+                          one list.Defaults to TABLE. """),
+              callback=to_upper,
+              type=click.Choice(("TABLE", "TREE", "FLATTEN"), case_sensitive=False))
 @options_file_option()
 def query_collections(*args, **kwargs):
     """Get the collections whose names match an expression."""
-    print(yaml.dump(cli_handle_exception(script.queryCollections, *args, **kwargs)))
+    table = cli_handle_exception(script.queryCollections, *args, **kwargs)
+    # The unit test that mocks script.queryCollections does not return a table
+    # so we need the following `if`.
+    if table:
+        # When chains==TREE, the children of chained datasets are indented
+        # relative to their parents. For this to work properly the table must
+        # be left-aligned.
+        table.pprint_all(align="<")
 
 
 @click.command()

diff --git a/python/lsst/daf/butler/cli/opt/arguments.py b/python/lsst/daf/butler/cli/opt/arguments.py
@@ -22,6 +22,12 @@
 from ..utils import MWArgumentDecorator, split_commas
 
 
+collection_argument = MWArgumentDecorator("collection")
+
+dimensions_argument = MWArgumentDecorator("dimensions",
+                                          callback=split_commas,
+                                          nargs=-1)
+
 directory_argument = MWArgumentDecorator("directory",
                                          help="DIRECTORY is the folder containing dataset files.")
 
@@ -30,12 +36,8 @@
                                     help="GLOB is one or more strings to apply to the search.",
                                     nargs=-1)
 
-repo_argument = MWArgumentDecorator("repo")
-
 locations_argument = MWArgumentDecorator("locations",
                                          callback=split_commas,
                                          nargs=-1)
 
-dimensions_argument = MWArgumentDecorator("dimensions",
-                                          callback=split_commas,
-                                          nargs=-1)
+repo_argument = MWArgumentDecorator("repo")
diff --git a/python/lsst/daf/butler/cli/utils.py b/python/lsst/daf/butler/cli/utils.py
@@ -449,13 +449,14 @@ def cli_handle_exception(func, *args, **kwargs):
     if mockEnvVarKey in os.environ:
         Mocker(*args, **kwargs)
         return
+
     try:
         return func(*args, **kwargs)
-    except Exception:
+    except Exception as e:
         msg = io.StringIO()
-        msg.write("An error occurred during command execution:\n")
         traceback.print_exc(file=msg)
-        raise click.ClickException(msg.getvalue())
+        log.debug(msg.getvalue())
+        raise click.ClickException(e) from e
 
 
 class option_section:  # noqa: N801

diff --git a/python/lsst/daf/butler/script/pruneCollection.py b/python/lsst/daf/butler/script/pruneCollection.py
@@ -20,6 +20,11 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from .. import Butler
+from .. import (
+    PurgeWithoutUnstorePruneCollectionsError,
+    RunWithoutPurgePruneCollectionsError,
+    PurgeUnsupportedPruneCollectionsError,
+)
 
 
 def pruneCollection(repo, collection, purge, unstore):
@@ -37,4 +42,12 @@ def pruneCollection(repo, collection, purge, unstore):
         Same as the ``unstore`` argument to ``Butler.pruneCollection``.
     """
     butler = Butler(repo, writeable=True)
-    butler.pruneCollection(collection, purge, unstore)
+    try:
+        butler.pruneCollection(collection, purge, unstore)
+    except PurgeWithoutUnstorePruneCollectionsError as e:
+        raise TypeError("Cannot pass --purge without --unstore.") from e
+    except RunWithoutPurgePruneCollectionsError as e:
+        raise TypeError(f"Cannot prune RUN collection {e.collectionType.name} without --purge.") from e
+    except PurgeUnsupportedPruneCollectionsError as e:
+        raise TypeError(
+            f"Cannot prune {e.collectionType} collection {e.collectionType.name} with --purge.") from e
diff --git a/python/lsst/daf/butler/script/queryCollections.py b/python/lsst/daf/butler/script/queryCollections.py
@@ -19,11 +19,15 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+from astropy.table import Table
+import itertools
+from numpy import array
+
 from .. import Butler
 from ..core.utils import globToRegex
 
 
-def queryCollections(repo, glob, collection_type, flatten_chains, include_chains):
+def queryCollections(repo, glob, collection_type, chains):
     """Get the collections whose names match an expression.
 
     Parameters
@@ -36,19 +40,15 @@ def queryCollections(repo, glob, collection_type, flatten_chains, include_chains
         the dataset type names to search for.
     collection_type : `Iterable` [ `CollectionType` ], optional
         If provided, only return collections of these types.
-    flatten_chains : `bool`
-        If `True` (`False` is default), recursively yield the child collections
-        of matching `~CollectionType.CHAINED` collections.
-    include_chains : `bool` or `None`
-        If `True`, yield records for matching `~CollectionType.CHAINED`
-        collections.  Default is the opposite of ``flattenChains``: include
-        either CHAINED collections or their children, but not both.
+    chains : `str`
+        Must be one of "FLATTEN", "TABLE", or "TREE" (case sensitive).
+        Affects contents and formatting of results, see
+        ``cli.commands.query_collections``.
 
     Returns
     -------
-    collections : `dict` [`str`, [`str`]]
-        A dict whose key is "collections" and whose value is a list of
-        collection names.
+    collections : `astropy.table.Table`
+        A table containing information about collections.
     """
     butler = Butler(repo)
     expression = globToRegex(glob)
@@ -57,8 +57,71 @@ def queryCollections(repo, glob, collection_type, flatten_chains, include_chains
     kwargs = {}
     if expression:
         kwargs["expression"] = expression
-    collections = butler.registry.queryCollections(collectionTypes=frozenset(collection_type),
-                                                   flattenChains=flatten_chains,
-                                                   includeChains=include_chains,
-                                                   **kwargs)
-    return {"collections": list(collections)}
+
+    if chains == "TABLE":
+        collectionNames = butler.registry.queryCollections(collectionTypes=frozenset(collection_type),
+                                                           **kwargs)
+        collectionNames = list(collectionNames)  # Materialize list for multiple use.
+        collectionTypes = [butler.registry.getCollectionType(c).name for c in collectionNames]
+        collectionDefinitions = [str(butler.registry.getCollectionChain(name)) if colType == "CHAINED" else ""
+                                 for name, colType in zip(collectionNames, collectionTypes)]
+
+        # Only add a definition column if at least one definition row is
+        # populated:
+        if any(collectionDefinitions):
+            return Table((collectionNames, collectionTypes, collectionDefinitions),
+                         names=("Name", "Type", "Definition"))
+        return Table((collectionNames, collectionTypes), names=("Name", "Type"))
+    elif chains == "TREE":
+        def getCollections(collectionName, nesting=0):
+            """Get a list of the name and type of the passed-in collection,
+            and its child collections, if it is a CHAINED collection. Child
+            collection names are indended from their parents by adding spaces
+            before the collection name.
+
+            Parameters
+            ----------
+            collectionName : `str`
+                The name of the collection to get.
+            nesting : `int`
+                The amount of indent to apply before each collection.
+
+            Returns
+            -------
+            collections : `list` [`tuple` [`str`, `str`]]
+                Tuples of the collection name and its type. Starts with the
+                passed-in collection, and if it is a CHAINED collection, each
+                of its children follows, and so on.
+            """
+            def nested(val):
+                stepDepth = 2
+                return " " * (stepDepth * nesting) + val
+
+            collectionType = butler.registry.getCollectionType(collectionName).name
+            if collectionType == "CHAINED":
+                # Get the child collections of the chained collection:
+                childCollections = list(butler.registry.getCollectionChain(collectionName))
+
+                # Fill in the child collections of the chained collection:
+                collections = itertools.chain(*[getCollections(child, nesting + 1)
+                                                for child in childCollections])
+
+                # Insert the chained (parent) collection at the beginning of
+                # the list, and return the list:
+                return [(nested(collectionName), "CHAINED")] + list(collections)
+            else:
+                return [(nested(collectionName), collectionType)]
+
+        collectionNames = butler.registry.queryCollections(collectionTypes=frozenset(collection_type),
+                                                           **kwargs)
+        collections = itertools.chain(*[getCollections(name) for name in collectionNames])
+        return Table(array(list(collections)), names=("Name", "Type"))
+    elif chains == "FLATTEN":
+        collectionNames = list(butler.registry.queryCollections(collectionTypes=frozenset(collection_type),
+                                                                flattenChains=True,
+                                                                **kwargs))
+        collectionTypes = [butler.registry.getCollectionType(c).name for c in collectionNames]
+        return Table((collectionNames,
+                      collectionTypes),
+                     names=("Name", "Type"))
+    raise RuntimeError(f"Value for --chains not recognized: {chains}")