Skip to content

Commit

Permalink
Merge branch 'tickets/DM-27154' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
n8pease committed Nov 17, 2020
2 parents dcdd93c + 5e9389d commit 6353d3e
Show file tree
Hide file tree
Showing 13 changed files with 647 additions and 312 deletions.
56 changes: 50 additions & 6 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,20 @@
"""
from __future__ import annotations

__all__ = ("Butler", "ButlerValidationError")
__all__ = (
"Butler",
"ButlerValidationError",
"PruneCollectionsArgsError",
"PurgeWithoutUnstorePruneCollectionsError",
"RunWithoutPurgePruneCollectionsError",
"PurgeUnsupportedPruneCollectionsError",
)


import os
from collections import defaultdict
import contextlib
import logging
import os
from typing import (
Any,
ClassVar,
Expand Down Expand Up @@ -84,6 +92,40 @@ class ButlerValidationError(ValidationError):
pass


class PruneCollectionsArgsError(TypeError):
"""Base class for errors relating to Butler.pruneCollections input
arguments.
"""
pass


class PurgeWithoutUnstorePruneCollectionsError(PruneCollectionsArgsError):
"""Raised when purge and unstore are both required to be True, and
purge is True but unstore is False.
"""

def __init__(self):
super().__init__("Cannot pass purge=True without unstore=True.")


class RunWithoutPurgePruneCollectionsError(PruneCollectionsArgsError):
"""Raised when pruning a RUN collection but purge is False."""

def __init__(self, collectionType):
self.collectionType = collectionType
super().__init__(f"Cannot prune RUN collection {self.collectionType.name} without purge=True.")


class PurgeUnsupportedPruneCollectionsError(PruneCollectionsArgsError):
"""Raised when purge is True but is not supported for the given
collection."""

def __init__(self, collectionType):
self.collectionType = collectionType
super().__init__(
f"Cannot prune {self.collectionType} collection {self.collectionType.name} with purge=True.")


class Butler:
"""Main entry point for the data access system.
Expand Down Expand Up @@ -1068,19 +1110,21 @@ def pruneCollection(self, name: str, purge: bool = False, unstore: bool = False)
Raised if the butler is read-only or arguments are mutually
inconsistent.
"""

# See pruneDatasets comments for more information about the logic here;
# the cases are almost the same, but here we can rely on Registry to
# take care everything but Datastore deletion when we remove the
# collection.
if not self.isWriteable():
raise TypeError("Butler is read-only.")
if purge and not unstore:
raise TypeError("Cannot pass purge=True without unstore=True.")
collectionType = self.registry.getCollectionType(name)
if purge and not unstore:
raise PurgeWithoutUnstorePruneCollectionsError()
if collectionType is CollectionType.RUN and not purge:
raise TypeError(f"Cannot prune RUN collection {name} without purge=True.")
raise RunWithoutPurgePruneCollectionsError(collectionType)
if collectionType is not CollectionType.RUN and purge:
raise TypeError(f"Cannot prune {collectionType.name} collection {name} with purge=True.")
raise PurgeUnsupportedPruneCollectionsError(collectionType)

with self.registry.transaction():
if unstore:
for ref in self.registry.queryDatasets(..., collections=name, findFirst=True):
Expand Down
38 changes: 22 additions & 16 deletions python/lsst/daf/butler/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from ..opt import (
collection_type_option,
collection_argument,
collections_option,
dataset_type_option,
datasets_option,
Expand All @@ -37,7 +38,7 @@
where_option,
)

from ..utils import cli_handle_exception, split_commas, typeStrAcceptsMultiple, unwrap
from ..utils import cli_handle_exception, split_commas, to_upper, typeStrAcceptsMultiple, unwrap
from ... import script


Expand Down Expand Up @@ -122,11 +123,10 @@ def config_validate(*args, **kwargs):

@click.command()
@repo_argument(required=True)
@click.option("--collection",
help=unwrap("""Name of the collection to remove. If this is a TAGGED or CHAINED collection,
datasets within the collection are not modified unless --unstore is passed. If this
is a RUN collection, --purge and --unstore must be passed, and all datasets in it
are fully removed from the data repository. """))
@collection_argument(help=unwrap("""COLLECTION is the Name of the collection to remove. If this is a tagged or
chained collection, datasets within the collection are not modified unless --unstore
is passed. If this is a run collection, --purge and --unstore must be passed, and
all datasets in it are fully removed from the data repository."""))
@click.option("--purge",
help=unwrap("""Permit RUN collections to be removed, fully removing datasets within them.
Requires --unstore as an added precaution against accidental deletion. Must not be
Expand All @@ -146,19 +146,25 @@ def prune_collection(**kwargs):
@glob_argument(help="GLOB is one or more glob-style expressions that fully or partially identify the "
"collections to return.")
@collection_type_option()
@click.option("--flatten-chains/--no-flatten-chains",
help="Recursively get the child collections of matching CHAINED collections. Default is "
"--no-flatten-chains.")
@click.option("--include-chains/--no-include-chains",
default=None,
help="For --include-chains, return records for matching CHAINED collections. For "
"--no-include-chains do not return records for CHAINED collections. Default is the "
"opposite of --flatten-chains: include either CHAINED collections or their children, but "
"not both.")
@click.option("--chains",
default="table",
help=unwrap("""Affects how results are presented. TABLE lists each dataset in a row with
chained datasets' children listed in a Definition column. TREE lists children below
their parent in tree form. FLATTEN lists all datasets, including child datasets in
one list.Defaults to TABLE. """),
callback=to_upper,
type=click.Choice(("TABLE", "TREE", "FLATTEN"), case_sensitive=False))
@options_file_option()
def query_collections(*args, **kwargs):
"""Get the collections whose names match an expression."""
print(yaml.dump(cli_handle_exception(script.queryCollections, *args, **kwargs)))
table = cli_handle_exception(script.queryCollections, *args, **kwargs)
# The unit test that mocks script.queryCollections does not return a table
# so we need the following `if`.
if table:
# When chains==TREE, the children of chained datasets are indented
# relative to their parents. For this to work properly the table must
# be left-aligned.
table.pprint_all(align="<")


@click.command()
Expand Down
12 changes: 7 additions & 5 deletions python/lsst/daf/butler/cli/opt/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@
from ..utils import MWArgumentDecorator, split_commas


collection_argument = MWArgumentDecorator("collection")

dimensions_argument = MWArgumentDecorator("dimensions",
callback=split_commas,
nargs=-1)

directory_argument = MWArgumentDecorator("directory",
help="DIRECTORY is the folder containing dataset files.")

Expand All @@ -30,12 +36,8 @@
help="GLOB is one or more strings to apply to the search.",
nargs=-1)

repo_argument = MWArgumentDecorator("repo")

locations_argument = MWArgumentDecorator("locations",
callback=split_commas,
nargs=-1)

dimensions_argument = MWArgumentDecorator("dimensions",
callback=split_commas,
nargs=-1)
repo_argument = MWArgumentDecorator("repo")
7 changes: 4 additions & 3 deletions python/lsst/daf/butler/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,13 +449,14 @@ def cli_handle_exception(func, *args, **kwargs):
if mockEnvVarKey in os.environ:
Mocker(*args, **kwargs)
return

try:
return func(*args, **kwargs)
except Exception:
except Exception as e:
msg = io.StringIO()
msg.write("An error occurred during command execution:\n")
traceback.print_exc(file=msg)
raise click.ClickException(msg.getvalue())
log.debug(msg.getvalue())
raise click.ClickException(e) from e


class option_section: # noqa: N801
Expand Down
15 changes: 14 additions & 1 deletion python/lsst/daf/butler/script/pruneCollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from .. import Butler
from .. import (
PurgeWithoutUnstorePruneCollectionsError,
RunWithoutPurgePruneCollectionsError,
PurgeUnsupportedPruneCollectionsError,
)


def pruneCollection(repo, collection, purge, unstore):
Expand All @@ -37,4 +42,12 @@ def pruneCollection(repo, collection, purge, unstore):
Same as the ``unstore`` argument to ``Butler.pruneCollection``.
"""
butler = Butler(repo, writeable=True)
butler.pruneCollection(collection, purge, unstore)
try:
butler.pruneCollection(collection, purge, unstore)
except PurgeWithoutUnstorePruneCollectionsError as e:
raise TypeError("Cannot pass --purge without --unstore.") from e
except RunWithoutPurgePruneCollectionsError as e:
raise TypeError(f"Cannot prune RUN collection {e.collectionType.name} without --purge.") from e
except PurgeUnsupportedPruneCollectionsError as e:
raise TypeError(
f"Cannot prune {e.collectionType} collection {e.collectionType.name} with --purge.") from e
95 changes: 79 additions & 16 deletions python/lsst/daf/butler/script/queryCollections.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,15 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from astropy.table import Table
import itertools
from numpy import array

from .. import Butler
from ..core.utils import globToRegex


def queryCollections(repo, glob, collection_type, flatten_chains, include_chains):
def queryCollections(repo, glob, collection_type, chains):
"""Get the collections whose names match an expression.
Parameters
Expand All @@ -36,19 +40,15 @@ def queryCollections(repo, glob, collection_type, flatten_chains, include_chains
the dataset type names to search for.
collection_type : `Iterable` [ `CollectionType` ], optional
If provided, only return collections of these types.
flatten_chains : `bool`
If `True` (`False` is default), recursively yield the child collections
of matching `~CollectionType.CHAINED` collections.
include_chains : `bool` or `None`
If `True`, yield records for matching `~CollectionType.CHAINED`
collections. Default is the opposite of ``flattenChains``: include
either CHAINED collections or their children, but not both.
chains : `str`
Must be one of "FLATTEN", "TABLE", or "TREE" (case sensitive).
Affects contents and formatting of results, see
``cli.commands.query_collections``.
Returns
-------
collections : `dict` [`str`, [`str`]]
A dict whose key is "collections" and whose value is a list of
collection names.
collections : `astropy.table.Table`
A table containing information about collections.
"""
butler = Butler(repo)
expression = globToRegex(glob)
Expand All @@ -57,8 +57,71 @@ def queryCollections(repo, glob, collection_type, flatten_chains, include_chains
kwargs = {}
if expression:
kwargs["expression"] = expression
collections = butler.registry.queryCollections(collectionTypes=frozenset(collection_type),
flattenChains=flatten_chains,
includeChains=include_chains,
**kwargs)
return {"collections": list(collections)}

if chains == "TABLE":
collectionNames = butler.registry.queryCollections(collectionTypes=frozenset(collection_type),
**kwargs)
collectionNames = list(collectionNames) # Materialize list for multiple use.
collectionTypes = [butler.registry.getCollectionType(c).name for c in collectionNames]
collectionDefinitions = [str(butler.registry.getCollectionChain(name)) if colType == "CHAINED" else ""
for name, colType in zip(collectionNames, collectionTypes)]

# Only add a definition column if at least one definition row is
# populated:
if any(collectionDefinitions):
return Table((collectionNames, collectionTypes, collectionDefinitions),
names=("Name", "Type", "Definition"))
return Table((collectionNames, collectionTypes), names=("Name", "Type"))
elif chains == "TREE":
def getCollections(collectionName, nesting=0):
"""Get a list of the name and type of the passed-in collection,
and its child collections, if it is a CHAINED collection. Child
collection names are indended from their parents by adding spaces
before the collection name.
Parameters
----------
collectionName : `str`
The name of the collection to get.
nesting : `int`
The amount of indent to apply before each collection.
Returns
-------
collections : `list` [`tuple` [`str`, `str`]]
Tuples of the collection name and its type. Starts with the
passed-in collection, and if it is a CHAINED collection, each
of its children follows, and so on.
"""
def nested(val):
stepDepth = 2
return " " * (stepDepth * nesting) + val

collectionType = butler.registry.getCollectionType(collectionName).name
if collectionType == "CHAINED":
# Get the child collections of the chained collection:
childCollections = list(butler.registry.getCollectionChain(collectionName))

# Fill in the child collections of the chained collection:
collections = itertools.chain(*[getCollections(child, nesting + 1)
for child in childCollections])

# Insert the chained (parent) collection at the beginning of
# the list, and return the list:
return [(nested(collectionName), "CHAINED")] + list(collections)
else:
return [(nested(collectionName), collectionType)]

collectionNames = butler.registry.queryCollections(collectionTypes=frozenset(collection_type),
**kwargs)
collections = itertools.chain(*[getCollections(name) for name in collectionNames])
return Table(array(list(collections)), names=("Name", "Type"))
elif chains == "FLATTEN":
collectionNames = list(butler.registry.queryCollections(collectionTypes=frozenset(collection_type),
flattenChains=True,
**kwargs))
collectionTypes = [butler.registry.getCollectionType(c).name for c in collectionNames]
return Table((collectionNames,
collectionTypes),
names=("Name", "Type"))
raise RuntimeError(f"Value for --chains not recognized: {chains}")

0 comments on commit 6353d3e

Please sign in to comment.