Skip to content

Commit

Permalink
add prune-datasets butler subcommand
Browse files Browse the repository at this point in the history
  • Loading branch information
n8pease committed Feb 2, 2021
1 parent 4404c8e commit 2fb787d
Show file tree
Hide file tree
Showing 10 changed files with 896 additions and 32 deletions.
2 changes: 2 additions & 0 deletions python/lsst/daf/butler/cli/cmd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"config_dump",
"config_validate",
"prune_collection",
"prune_datasets",
"query_collections",
"query_data_ids",
"query_dataset_types",
Expand All @@ -40,6 +41,7 @@
config_dump,
config_validate,
prune_collection,
prune_datasets,
query_collections,
query_data_ids,
query_dataset_types,
Expand Down
155 changes: 152 additions & 3 deletions python/lsst/daf/butler/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from ..opt import (
collection_type_option,
collection_argument,
collections_argument,
collections_option,
components_option,
dataset_type_option,
Expand All @@ -41,6 +42,9 @@

from ..utils import (
ButlerCommand,
MWOptionDecorator,
option_section,
printAstropyTables,
split_commas,
to_upper,
typeStrAcceptsMultiple,
Expand Down Expand Up @@ -149,6 +153,153 @@ def prune_collection(**kwargs):
script.pruneCollection(**kwargs)


pruneDatasets_wouldRemoveMsg = "The following datasets would be removed:"
pruneDatasets_willRemoveMsg = "The following datasets will be removed:"
pruneDatasets_askContinueMsg = "Continue?"
pruneDatasets_didRemoveAforementioned = "The datasets were removed."
pruneDatasets_didNotRemoveAforementioned = "Did not remove the datasets."
pruneDatasets_didRemoveMsg = "Removed the following datasets:"
pruneDatasets_noDatasetsFound = "Did not find any datasets."
pruneDatasets_errPurgeAndDisassociate = unwrap(
""""--disassociate and --purge may not be used together: --disassociate purges from just the passed TAGged
collections, but --purge forces disassociation from all of them. """
)
pruneDatasets_errQuietWithDryRun = "Can not use --quiet and --dry-run together."
pruneDatasets_errNoCollectionRestriction = unwrap(
"""Must indicate collections from which to prune datasets by passing COLLETION arguments (select all
collections by passing '*', or consider using 'butler prune-collections'), by using --purge to pass a run
collection, or by using --disassociate to select a tagged collection.""")
pruneDatasets_errPruneOnNotRun = "Can not prune a collection that is not a RUN collection: {collection}"
pruneDatasets_errNoOp = "No operation: one of --purge, --unstore, or --disassociate must be provided."

disassociate_option = MWOptionDecorator(
"--disassociate", "disassociate_tags",
help=unwrap("""Disassociate pruned datasets from the given tagged collections. May not be used with
--purge."""),
multiple=True,
callback=split_commas,
metavar="TAG"
)


purge_option = MWOptionDecorator(
"--purge", "purge_run",
help=unwrap("""Completely remove the dataset from the given RUN in the Registry. May not be used with
--disassociate. Note, this may remove provenance information from datasets other than those
provided, and should be used with extreme care."""),
metavar="RUN"
)


find_all_option = MWOptionDecorator(
"--find-all", is_flag=True,
help=unwrap("""Purge the dataset results from all of the collections in which a dataset of that dataset
type + data id combination appear. (By default only the first found dataset type + data id is
purged, according to the order of COLLECTIONS passed in).""")
)


unstore_option = MWOptionDecorator(
"--unstore",
is_flag=True,
help=unwrap("""Remove these datasets from all datastores configured with this data repository. If
--disassociate and --purge are not used then --unstore will be used by default. Note that
--unstore will make it impossible to retrieve these datasets even via other collections.
Datasets that are already not stored are ignored by this option.""")
)


dry_run_option = MWOptionDecorator(
"--dry-run",
is_flag=True,
help=unwrap("""Display the datasets that would be removed but do not remove them.
Note that a dataset can be in collections other than its RUN-type collection, and removing it
will remove it from all of them, even though the only one this will show is its RUN
collection.""")
)


confirm_option = MWOptionDecorator(
"--confirm/--no-confirm",
default=True,
help="Print expected action and a confirmation prompt before executing. Default is --confirm."
)


quiet_option = MWOptionDecorator(
"--quiet",
is_flag=True,
help=unwrap("""Makes output quiet. Implies --no-confirm. Requires --dry-run not be passed.""")
)


@click.command(cls=ButlerCommand, short_help="Remove datasets.")
@repo_argument(required=True)
@collections_argument(help=unwrap("""COLLECTIONS is or more expressions that identify the collections to
search for datasets. Glob-style expressions may be used but only if the
--find-all flag is also passed."""))
@option_section("Query Datasets Options:")
@datasets_option(help="One or more glob-style expressions that identify the dataset types to be pruned.",
multiple=True,
callback=split_commas)
@find_all_option()
@where_option(help=whereHelp)
@option_section("Prune Options:")
@disassociate_option()
@purge_option()
@unstore_option()
@option_section("Execution Options:")
@dry_run_option()
@confirm_option()
@quiet_option()
@option_section("Other Options:")
@options_file_option()
def prune_datasets(**kwargs):
"""Query for and remove one or more datasets from a collection and/or
storage.
"""
quiet = kwargs.pop("quiet", False)
if quiet:
if kwargs["dry_run"]:
raise click.ClickException(pruneDatasets_errQuietWithDryRun)
kwargs["confirm"] = False

result = script.pruneDatasets(**kwargs)

if result.errPurgeAndDisassociate:
raise click.ClickException(pruneDatasets_errPurgeAndDisassociate)
return
if result.errNoCollectionRestriction:
raise click.ClickException(pruneDatasets_errNoCollectionRestriction)
if result.errPruneOnNotRun:
raise click.ClickException(pruneDatasets_errPruneOnNotRun.format(**result.errDict))
if result.errNoOp:
raise click.ClickException(pruneDatasets_errNoOp)
if result.dryRun:
print(pruneDatasets_wouldRemoveMsg)
printAstropyTables(result.tables)
return
if result.confirm:
if not result.tables:
print(pruneDatasets_noDatasetsFound)
return
print(pruneDatasets_willRemoveMsg)
printAstropyTables(result.tables)
doContinue = click.confirm(pruneDatasets_askContinueMsg, default=False)
if doContinue:
result.onConfirmation()
print(pruneDatasets_didRemoveAforementioned)
else:
print(pruneDatasets_didNotRemoveAforementioned)
return
if result.finished:
if not quiet:
print(pruneDatasets_didRemoveMsg)
printAstropyTables(result.tables)
return


@click.command(short_help="Search for collections.", cls=ButlerCommand)
@repo_argument(required=True)
@glob_argument(help="GLOB is one or more glob-style expressions that fully or partially identify the "
Expand Down Expand Up @@ -217,9 +368,7 @@ def remove_dataset_type(*args, **kwargs):
@options_file_option()
def query_datasets(**kwargs):
"""List the datasets in a repository."""
tables = script.queryDatasets(**kwargs)

for table in tables:
for table in script.QueryDatasets(**kwargs).getTables():
print("")
table.pprint_all()
print("")
Expand Down
4 changes: 4 additions & 0 deletions python/lsst/daf/butler/cli/opt/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@

collection_argument = MWArgumentDecorator("collection")

collections_argument = MWArgumentDecorator("collections",
callback=split_commas,
nargs=-1)

dimensions_argument = MWArgumentDecorator("dimensions",
callback=split_commas,
nargs=-1)
Expand Down
24 changes: 24 additions & 0 deletions python/lsst/daf/butler/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,30 @@
split_kv_separator = "="


def astropyTablesToStr(tables):
"""Render astropy tables to string as they are displayed in the CLI.
Output formatting matches ``printAstropyTables``.
"""
ret = ""
for table in tables:
ret += "\n"
table.pformat_all()
ret += "\n"
return ret


def printAstropyTables(tables):
"""Print astropy tables to be displayed in the CLI.
Output formatting matches ``astropyTablesToStr``.
"""
for table in tables:
print("")
table.pprint_all()
print("")


def textTypeStr(multiple):
"""Get the text type string for CLI help documentation.
Expand Down
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/script/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
from .pruneCollection import pruneCollection
from .queryCollections import queryCollections
from .queryDataIds import queryDataIds
from .queryDatasets import queryDatasets
from .queryDatasets import QueryDatasets
from ._pruneDatasets import pruneDatasets # depends on QueryDatasets
from .queryDatasetTypes import queryDatasetTypes
from .queryDimensionRecords import queryDimensionRecords
from .removeDatasetType import removeDatasetType

0 comments on commit 2fb787d

Please sign in to comment.