Skip to content

Commit

Permalink
add prune-datasets butler subcommand
Browse files Browse the repository at this point in the history
  • Loading branch information
n8pease committed Jan 12, 2021
1 parent 22f64db commit e55d966
Show file tree
Hide file tree
Showing 10 changed files with 832 additions and 42 deletions.
2 changes: 2 additions & 0 deletions python/lsst/daf/butler/cli/cmd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"config_dump",
"config_validate",
"prune_collection",
"prune_datasets",
"query_collections",
"query_data_ids",
"query_dataset_types",
Expand All @@ -40,6 +41,7 @@
config_dump,
config_validate,
prune_collection,
prune_datasets,
query_collections,
query_data_ids,
query_dataset_types,
Expand Down
149 changes: 146 additions & 3 deletions python/lsst/daf/butler/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from ..opt import (
collection_type_option,
collection_argument,
collections_argument,
collections_option,
components_option,
dataset_type_option,
Expand All @@ -41,6 +42,9 @@

from ..utils import (
ButlerCommand,
MWOptionDecorator,
option_section,
printAstropyTables,
split_commas,
to_upper,
typeStrAcceptsMultiple,
Expand Down Expand Up @@ -149,6 +153,147 @@ def prune_collection(**kwargs):
script.pruneCollection(**kwargs)


pruneDatasets_wouldRemoveMsg = "The following datasets would be removed:"
pruneDatasets_willRemoveMsg = "The following datasets will be removed:"
pruneDatasets_askContinueMsg = "Continue?"
pruneDatasets_didRemoveAforementioned = "The datasets were removed."
pruneDatasets_didNotRemoveAforementioned = "Did not remove the datasets."
pruneDatasets_didRemoveMsg = "Removed the following datasets:"
pruneDatasets_noDatasetsFound = "Did not find any datasets."
pruneDatasets_errPurgeAndDisassociate = unwrap(
""""--disassociate and --purge may not be used together: --disassociate purges from just the passed TAGged
collections, but --purge forces disassociation from all of them. """
)
pruneDatasets_errQuietWithDryRun = "Can not use --quiet and --dry-run together."
pruneDatasets_errNoCollectionRestriction = unwrap(
"""Must indicate collections from which to prune datasets by passing COLLETION arguments (select all
collections by passing '*', or consider using 'butler prune-collections'), by using --purge to pass a run
collection, or by using --disassociate to select a tagged collection.""")

disassociate_option = MWOptionDecorator(
"--disassociate", "disassociate_tags",
help=unwrap("""Disassociate pruned datasets from the given tagged collections. May not be used with
--purge."""),
multiple=True,
callback=split_commas,
metavar="TAG"
)


purge_option = MWOptionDecorator(
"--purge", "purge_run",
help=unwrap("""Completely remove the dataset from the given RUN in the Registry. May not be used with
--disassociate. Note, this may remove provenance information from datasets other than those
provided, and should be used with extreme care."""),
metavar="RUN"
)


find_all_option = MWOptionDecorator(
"--find-all", is_flag=True,
help=unwrap("""Purge the dataset results from all of the collections in which a dataset of that dataset
type + data id combination appear. (By default only the first found dataset type + data id is
purged, according to the order of COLLECTIONS passed in).""")
)


unstore_option = MWOptionDecorator(
"--unstore",
is_flag=True,
# TODO "known to this butler", on the next line, reads funny on the CLI.
# Can we say "related to this repo?" or similar?
help=unwrap("""Remove these datasets from all datastores known to this butler. Note that this will make
it impossible to retrieve these datasets even via other collections. Datasets that are already
not stored are ignored by this option.""")
)


dry_run_option = MWOptionDecorator(
"--dry-run",
is_flag=True,
help=unwrap("""Display the datasets that would be removed but do not remove them.
Note that a dataset can be in collections other than its RUN-type collection, and removing it
will remove it from all of them, even though the only one this will show is its RUN
collection.""")
)


confirm_option = MWOptionDecorator(
"--confirm/--no-confirm",
default=True,
help="Print expected action and a confirmation prompt before executing. Default is --confirm."
)


quiet_option = MWOptionDecorator(
"--quiet",
is_flag=True,
help=unwrap("""Makes output quiet. Implies --no-confirm. Requires --dry-run not be passed.""")
)


@click.command(cls=ButlerCommand, short_help="Remove datasets.")
@repo_argument(required=True)
@collections_argument(help=unwrap("""COLLECTIONS is or more expressions that identify the collections to
search for datasets. Glob-style expressions may be used but only if the
--find-all flag is also passed."""))
@option_section("Query Datasets Options:")
@datasets_option(help="One or more glob-style expressions that identify the dataset types to be pruned.",
multiple=True,
callback=split_commas)
@find_all_option()
@where_option(help=whereHelp)
@option_section("Prune Options:")
@disassociate_option()
@purge_option()
@unstore_option()
@option_section("Execution Options:")
@dry_run_option()
@confirm_option()
@quiet_option()
@option_section("Other Options:")
@options_file_option()
def prune_datasets(**kwargs):
"""Query for and remove one or more datasets from a collection and/or
storage.
"""
quiet = kwargs.pop("quiet", False)
if quiet:
if kwargs["dry_run"]:
raise click.ClickException(pruneDatasets_errQuietWithDryRun)
kwargs["confirm"] = False
result = script.pruneDatasets(**kwargs)

if result.errPurgeAndDisassociate:
raise click.ClickException(pruneDatasets_errPurgeAndDisassociate)
return
if result.errNoCollectionRestriction:
raise click.ClickException(pruneDatasets_errNoCollectionRestriction)
if result.dryRun:
print(pruneDatasets_wouldRemoveMsg)
printAstropyTables(result.tables)
return
if result.confirm:
if not result.tables:
print(pruneDatasets_noDatasetsFound)
return
print(pruneDatasets_willRemoveMsg)
printAstropyTables(result.tables)
doContinue = click.confirm(pruneDatasets_askContinueMsg, default=False)
if doContinue:
result.onConfirmation()
print(pruneDatasets_didRemoveAforementioned)
else:
print(pruneDatasets_didNotRemoveAforementioned)
return
if result.finished:
if not quiet:
print(pruneDatasets_didRemoveMsg)
printAstropyTables(result.tables)
return


@click.command(short_help="Search for collections.", cls=ButlerCommand)
@repo_argument(required=True)
@glob_argument(help="GLOB is one or more glob-style expressions that fully or partially identify the "
Expand Down Expand Up @@ -217,9 +362,7 @@ def remove_dataset_type(*args, **kwargs):
@options_file_option()
def query_datasets(**kwargs):
"""List the datasets in a repository."""
tables = script.queryDatasets(**kwargs)

for table in tables:
for table in script.QueryDatasets(**kwargs).getTables():
print("")
table.pprint_all()
print("")
Expand Down
4 changes: 4 additions & 0 deletions python/lsst/daf/butler/cli/opt/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@

collection_argument = MWArgumentDecorator("collection")

collections_argument = MWArgumentDecorator("collections",
callback=split_commas,
nargs=-1)

dimensions_argument = MWArgumentDecorator("dimensions",
callback=split_commas,
nargs=-1)
Expand Down
24 changes: 24 additions & 0 deletions python/lsst/daf/butler/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,30 @@
split_kv_separator = "="


def astropyTablesToStr(tables):
"""Render astropy tables to string as they are displayed in the CLI.
Output formatting matches ``printAstropyTables``.
"""
ret = ""
for table in tables:
ret += "\n"
table.pformat_all()
ret += "\n"
return ret


def printAstropyTables(tables):
"""Print astropy tables to be displayed in the CLI.
Output formatting matches ``astropyTablesToStr``.
"""
for table in tables:
print("")
table.pprint_all()
print("")


def textTypeStr(multiple):
"""Get the text type string for CLI help documentation.
Expand Down
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/script/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
from .pruneCollection import pruneCollection
from .queryCollections import queryCollections
from .queryDataIds import queryDataIds
from .queryDatasets import queryDatasets
from .queryDatasets import QueryDatasets
from ._pruneDatasets import pruneDatasets # depends on QueryDatasets
from .queryDatasetTypes import queryDatasetTypes
from .queryDimensionRecords import queryDimensionRecords
from .removeDatasetType import removeDatasetType

0 comments on commit e55d966

Please sign in to comment.