Skip to content

Commit

Permalink
Merge branch 'tickets/DM-33619'
Browse files Browse the repository at this point in the history
  • Loading branch information
n8pease committed Mar 17, 2022
2 parents bc02ca6 + 3f61768 commit 73c254b
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 20 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-33619.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The butler CLI command "remove-runs" can now unlink RUN collections from parent CHAINED collections.
53 changes: 46 additions & 7 deletions python/lsst/daf/butler/cli/cmd/_remove_runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.


from typing import Mapping, Sequence
from typing import Iterator, Mapping, Sequence

import click

from ... import script
from ..opt import collection_argument, confirm_option, options_file_option, repo_argument
from ..utils import ButlerCommand
from .commands import existingRepoHelp

# messages emitted by remove-runs, defined separately for use in unit
# tests.
Expand All @@ -37,9 +38,21 @@
didRemoveDatasetsMsg = "The following datasets were removed:"
removedRunsMsg = "Removed collections"
abortedMsg = "Aborted."
requiresConfirmationMsg = (
"Removing runs that are in parent CHAINED collections requires confirmation. "
"\nTry again without --no-confirm to confirm removal of RUN collections from parents, "
"or add the --force flag to skip confirmation."
)
willUnlinkMsg = "{run}: will be unlinked from {parents}"
didUnlinkMsg = "{run}: was removed and unlinked from {parents}"
mustBeUnlinkedMsg = "{run}: must be unlinked from {parents}"


def _quoted(items: Sequence[str]) -> Iterator[str]:
return [f'"{i}"' for i in items]

def _print_remove(will: bool, runs: Sequence[str], datasets: Mapping[str, int]):

def _print_remove(will: bool, runs: Sequence[script.RemoveRun], datasets: Mapping[str, int]) -> None:
"""Print the formatted remove statement.
Parameters
Expand All @@ -52,25 +65,45 @@ def _print_remove(will: bool, runs: Sequence[str], datasets: Mapping[str, int]):
The dataset types & count that will be or were removed.
"""
print(willRemoveRunsMsg if will else didRemoveRunsMsg)
print(", ".join(runs))
print(willRemoveDatasetsMsg if will else didRemoveDatasetsMsg)
unlinkMsg = willUnlinkMsg if will else didUnlinkMsg
for run in runs:
if run.parents:
print(unlinkMsg.format(run=run.name, parents=", ".join(_quoted(run.parents))))
else:
print(run.name)
print("\n" + willRemoveDatasetsMsg if will else didRemoveDatasetsMsg)
print(", ".join([f"{i[0]}({i[1]})" for i in datasets.items()]))


def _print_requires_confirmation(runs: Sequence[script.RemoveRun], datasets: Mapping[str, int]) -> None:
print(requiresConfirmationMsg)
for run in runs:
if run.parents:
print(mustBeUnlinkedMsg.format(run=run.name, parents=", ".join(_quoted(run.parents))))


@click.command(cls=ButlerCommand)
@repo_argument(required=True)
@click.pass_context
@repo_argument(
help=existingRepoHelp,
required=True,
)
@collection_argument(
help="COLLECTION is a glob-style expression that identifies the RUN collection(s) to remove."
)
@confirm_option()
@click.option(
"--force",
is_flag=True,
help="Required to remove RUN collections from parent collections if using --no-confirm.",
)
@options_file_option()
def remove_runs(**kwargs):
def remove_runs(context, confirm, force, **kwargs):
"""Remove one or more RUN collections.
This command can be used to remove RUN collections and the datasets within
them.
"""
confirm = kwargs.pop("confirm")
result = script.removeRuns(**kwargs)
canRemoveRuns = len(result.runs)
if not canRemoveRuns:
Expand All @@ -85,5 +118,11 @@ def remove_runs(**kwargs):
else:
print(abortedMsg)
else:
# if the user opted out of confirmation but there are runs with
# parent collections then they must confirm; print a message
# and exit.
if any(run.parents for run in result.runs) and not force:
_print_requires_confirmation(result.runs, result.datasets)
context.exit(1)
result.onConfirmation()
_print_remove(False, result.runs, result.datasets)
3 changes: 3 additions & 0 deletions python/lsst/daf/butler/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ def addArgumentHelp(doc, helpText):
# (Lines after the first blank line will be argument help.)
while len(doclines) > 1 and doclines[1]:
doclines[0] = " ".join((doclines[0], doclines.pop(1).strip()))
# Add standard indent to help text for proper alignment with command
# function documentation:
helpText = " " + helpText
doclines.insert(1, helpText)
doclines.insert(1, "\n")
doc = "\n".join(doclines)
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/script/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,6 @@
from .register_dataset_type import register_dataset_type
from .removeCollections import removeCollections
from .removeDatasetType import removeDatasetType
from .removeRuns import removeRuns
from .removeRuns import RemoveRun, removeRuns
from .retrieveArtifacts import retrieveArtifacts
from .transferDatasets import transferDatasets
31 changes: 24 additions & 7 deletions python/lsst/daf/butler/script/removeRuns.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,16 @@
from ..registry.queries import DatasetQueryResults


@dataclass
class RemoveRun:
"""Represents a RUN collection to remove."""

# the name of the run:
name: str
# parent CHAINED collections the RUN belongs to:
parents: List[str]


@dataclass
class RemoveRunsResult:
"""Container to return to the cli command.
Expand All @@ -42,15 +52,15 @@ class RemoveRunsResult:
# the callback function to do the removal
onConfirmation: Callable[[], None]
# list of the run collections that will be removed
runs: Sequence[str]
runs: Sequence[RemoveRun]
# mapping of dataset type name to how many will be removed.
datasets: Mapping[str, int]


def _getCollectionInfo(
repo: str,
collection: str,
) -> Tuple[List[str], Mapping[str, int]]:
) -> Tuple[List[RemoveRun], Mapping[str, int]]:
"""Get the names and types of collections that match the collection
string.
Expand All @@ -64,8 +74,8 @@ def _getCollectionInfo(
Returns
-------
runs : `list` of `str`
The runs that will be removed.
runs : `list` of `RemoveRun`
Describes the runs that will be removed.
datasets : `dict` [`str`, `int`]
The dataset types and and how many will be removed.
"""
Expand All @@ -84,7 +94,8 @@ def _getCollectionInfo(
datasets: Dict[str, int] = defaultdict(int)
for collectionName in collectionNames:
assert butler.registry.getCollectionType(collectionName).name == "RUN"
runs.append(collectionName)
parents = butler.registry.getCollectionParentChains(collectionName)
runs.append(RemoveRun(collectionName, list(parents)))
all_results = butler.registry.queryDatasets(..., collections=collectionName)
assert isinstance(all_results, DatasetQueryResults)
for r in all_results.byParentDatasetType():
Expand Down Expand Up @@ -113,10 +124,16 @@ def removeRuns(
"""
runs, datasets = _getCollectionInfo(repo, collection)

def doRemove(runs: Sequence[str]) -> None:
def doRemove(runs: Sequence[RemoveRun]) -> None:
"""Perform the remove step."""
butler = Butler(repo, writeable=True)
butler.removeRuns(runs, unstore=True)
with butler.transaction():
for run in runs:
for parent in run.parents:
children = list(butler.registry.getCollectionChain(parent))
children.remove(run.name)
butler.registry.setCollectionChain(parent, children, flatten=False)
butler.removeRuns([r.name for r in runs], unstore=True)

result = RemoveRunsResult(
onConfirmation=partial(doRemove, runs),
Expand Down
74 changes: 69 additions & 5 deletions tests/test_cliCmdRemoveRuns.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@
abortedMsg,
didRemoveDatasetsMsg,
didRemoveRunsMsg,
mustBeUnlinkedMsg,
noRunCollectionsMsg,
removedRunsMsg,
willRemoveDatasetsMsg,
willRemoveRunsMsg,
willUnlinkMsg,
)
from lsst.daf.butler.cli.utils import LogCliRunner, clickResultMsg
from lsst.daf.butler.tests.utils import MetricTestRepo
Expand All @@ -59,35 +61,97 @@ def test_removeRuns(self):
DatasetType("no_datasets", repo.butler.registry.dimensions.empty, "StructuredDataDict")
)

# Execute cmd but say no, check for expected outputs.
# Execute remove-runs but say no, check for expected outputs.
result = self.runner.invoke(butler.cli, ["remove-runs", root, "ingest*"], input="no")
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
self.assertIn(willRemoveRunsMsg, result.output)
self.assertIn(abortedMsg, result.output)
self.assertNotIn("no_datasets", result.output)
self.assertIn(
"ingest/run",
list(repo.butler.registry.queryCollections()),
)

# Add the run to a CHAINED collection.
parentCollection = "aParentCollection"
result = self.runner.invoke(
butler.cli, f"collection-chain {root} {parentCollection} ingest/run".split()
)
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
result = self.runner.invoke(butler.cli, ["query-collections", root])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
self.assertIn(parentCollection, result.output)

# Execute remove-runs but say no, check for expected outputs
# including the CHAINED collection.
result = self.runner.invoke(butler.cli, ["remove-runs", root, "ingest*"], input="no")
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
self.assertIn(willRemoveRunsMsg, result.output)
self.assertIn(willRemoveDatasetsMsg, result.output)
self.assertIn(
willUnlinkMsg.format(run="ingest/run", parents=f'"{parentCollection}"'), result.output
)
self.assertIn(abortedMsg, result.output)
self.assertNotIn("no_datasets", result.output)
result = self.runner.invoke(butler.cli, ["query-collections", root])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
self.assertIn("ingest/run", result.output)
self.assertIn(parentCollection, result.output)

# ...say yes
# Do the same remove-runs command, but say yes.
result = self.runner.invoke(butler.cli, ["remove-runs", root, "ingest*"], input="yes")
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
self.assertIn(willRemoveRunsMsg, result.output)
self.assertIn(willRemoveDatasetsMsg, result.output)
self.assertIn(removedRunsMsg, result.output)
result = self.runner.invoke(butler.cli, ["query-collections", root])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
self.assertNotIn("ingest/run", result.output)
self.assertIn(parentCollection, result.output)

# now they've been deleted, try again and check for "none found"
# Now they've been deleted, try again and check for "none found".
result = self.runner.invoke(butler.cli, ["remove-runs", root, "ingest*"])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
self.assertIn(noRunCollectionsMsg, result.output)

# remake the repo and check --no-confirm option
# Remake the repo and check --no-confirm option.
root = "repo1"
MetricTestRepo(root, configFile=os.path.join(TESTDIR, "config/basic/butler.yaml"))

# Execute cmd but say no, check for expected outputs.
# Add the run to a CHAINED collection.
parentCollection = "parent"
result = self.runner.invoke(
butler.cli, f"collection-chain {root} {parentCollection} ingest/run".split()
)
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
result = self.runner.invoke(butler.cli, ["query-collections", root])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
self.assertIn("ingest/run", result.output)
self.assertIn(parentCollection, result.output)

# Execute remove-runs with --no-confirm, should fail because there
# is a parent CHAINED collection.
result = self.runner.invoke(butler.cli, ["remove-runs", root, "ingest*", "--no-confirm"])
self.assertNotEqual(result.exit_code, 0, clickResultMsg(result))
self.assertIn(
mustBeUnlinkedMsg.format(run="ingest/run", parents=f'"{parentCollection}"'), result.output
)
result = self.runner.invoke(butler.cli, ["query-collections", root])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
self.assertIn("ingest/run", result.output)
self.assertIn(parentCollection, result.output)

# Execute remove-runs with --no-confirm and --force
result = self.runner.invoke(
butler.cli, ["remove-runs", root, "ingest*", "--no-confirm", "--force"]
)
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
self.assertIn(didRemoveRunsMsg, result.output)
self.assertIn(didRemoveDatasetsMsg, result.output)
result = self.runner.invoke(butler.cli, ["query-collections", root])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
self.assertNotIn("ingest/run", result.output)
self.assertIn(parentCollection, result.output)

# Execute cmd looking for a non-existant collection
result = self.runner.invoke(butler.cli, ["remove-runs", root, "foo"])
Expand Down

0 comments on commit 73c254b

Please sign in to comment.