Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-26688: Add command-line tool for Registry.associate #459

Merged
merged 2 commits into from
Feb 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 6 additions & 4 deletions python/lsst/daf/butler/cli/cmd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ("butler_import",
__all__ = ("associate",
"butler_import",
"certify_calibrations",
"create",
"config_dump",
Expand All @@ -32,10 +33,11 @@
"query_datasets",
"query_dimension_records",
"remove_dataset_type",
)
)


from .commands import (butler_import,
from .commands import (associate,
butler_import,
certify_calibrations,
create,
config_dump,
Expand All @@ -48,4 +50,4 @@
query_datasets,
query_dimension_records,
remove_dataset_type,
)
)
39 changes: 19 additions & 20 deletions python/lsst/daf/butler/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
element_argument,
glob_argument,
options_file_option,
query_datasets_options,
repo_argument,
transfer_option,
verbose_option,
Expand All @@ -49,16 +50,26 @@
to_upper,
typeStrAcceptsMultiple,
unwrap,
where_help,
)

from ... import script


willCreateRepoHelp = "REPO is the URI or path to the new repository. Will be created if it does not exist."
existingRepoHelp = "REPO is the URI or path to an existing data repository root or configuration file."
whereHelp = unwrap("""A string expression similar to a SQL WHERE clause. May involve any column of a dimension
table or a dimension name as a shortcut for the primary key column of a dimension
table.""")


@click.command(cls=ButlerCommand, short_help="Add existing datasets to a tagged collection.")
@repo_argument(required=True)
@collection_argument(help="COLLECTION is the collection the datasets should be associated with.")
@query_datasets_options(repo=False, showUri=False, useArguments=False)
@options_file_option()
def associate(**kwargs):
"""Add existing datasets to a tagged collection; searches for datasets with
the options and adds them to the named COLLECTION.
"""
script.associate(**kwargs)


# The conversion from the import command name to the butler_import function
Expand Down Expand Up @@ -251,7 +262,7 @@ def prune_collection(**kwargs):
multiple=True,
callback=split_commas)
@find_all_option()
@where_option(help=whereHelp)
@where_option(help=where_help)
@option_section("Prune Options:")
@disassociate_option()
@purge_option()
Expand Down Expand Up @@ -364,20 +375,7 @@ def remove_dataset_type(*args, **kwargs):


@click.command(cls=ButlerCommand)
@repo_argument(required=True)
@glob_argument(help="GLOB is one or more glob-style expressions that fully or partially identify the "
"dataset types to be queried.")
@collections_option()
@where_option(help=whereHelp)
@click.option("--find-first",
is_flag=True,
help=unwrap("""For each result data ID, only yield one DatasetRef of each DatasetType, from the
first collection in which a dataset of that dataset type appears (according to the
order of 'collections' passed in). If used, 'collections' must specify at least one
expression and must not contain wildcards."""))
@click.option("--show-uri",
is_flag=True,
help="Show the dataset URI in results.")
@query_datasets_options()
@options_file_option()
def query_datasets(**kwargs):
"""List the datasets in a repository."""
Expand All @@ -401,6 +399,7 @@ def query_datasets(**kwargs):
@click.option("--search-all-inputs", is_flag=True, default=False,
help=unwrap("""Search all children of the inputCollection if it is a CHAINED collection,
instead of just the most recent one."""))
@options_file_option()
def certify_calibrations(*args, **kwargs):
"""Certify calibrations in a repository.
"""
Expand All @@ -417,7 +416,7 @@ def certify_calibrations(*args, **kwargs):
constrain the yielded "instrument", "exposure", "detector", and
"physical_filter" values to only those for which at least one "raw" dataset
exists in "collections"."""))
@where_option(help=whereHelp)
@where_option(help=where_help)
@options_file_option()
def query_data_ids(**kwargs):
"""List the data IDs in a repository.
Expand All @@ -439,7 +438,7 @@ def query_data_ids(**kwargs):
constrain the yielded records. Only affects results when used with
--collections."""))
@collections_option(help=collections_option.help + " Only affects results when used with --datasets.")
@where_option(help=whereHelp)
@where_option(help=where_help)
@click.option("--no-check", is_flag=True,
help=unwrap("""Don't check the query before execution. By default the query is checked before it
executed, this may reject some valid queries that resemble common mistakes."""))
Expand Down
1 change: 1 addition & 0 deletions python/lsst/daf/butler/cli/opt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@

from .arguments import *
from .options import *
from .optionGroups import *
63 changes: 63 additions & 0 deletions python/lsst/daf/butler/cli/opt/optionGroups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# This file is part of ctrl_mpexec.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import click

from . import (
collections_option,
dataset_type_option,
glob_argument,
repo_argument,
where_option,
)
from ..utils import OptionGroup, unwrap, where_help


class query_datasets_options(OptionGroup): # noqa: N801

def __init__(self, repo=True, showUri=True, useArguments=True):
self.decorators = []
if repo:
if not useArguments:
raise RuntimeError("repo as an option is not currently supported.")
self.decorators.append(repo_argument(required=True))
if useArguments:
self.decorators.append(glob_argument(
help=unwrap("""GLOB is one or more glob-style expressions that fully or partially identify the
dataset type names to be queried.""")))
else:
self.decorators.append(dataset_type_option(
help=unwrap("""One or more glob-style expressions that fully or partially identify the dataset
type names to be queried.""")))
self.decorators.extend([
collections_option(),
where_option(help=where_help),
click.option("--find-first",
is_flag=True,
help=unwrap("""For each result data ID, only yield one DatasetRef of each
DatasetType, from the first collection in which a dataset of that dataset
type appears (according to the order of 'collections' passed in). If
used, 'collections' must specify at least one expression and must not
contain wildcards."""))])
if showUri:
self.decorators.append(click.option("--show-uri",
is_flag=True,
help="Show the dataset URI in results."))
16 changes: 16 additions & 0 deletions python/lsst/daf/butler/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@
split_kv_separator = "="


# The standard help string for the --where option when it takes a WHERE clause.
where_help = "A string expression similar to a SQL WHERE clause. May involve any column of a " \
"dimension table or a dimension name as a shortcut for the primary key column of a " \
"dimension table."


def astropyTablesToStr(tables):
"""Render astropy tables to string as they are displayed in the CLI.

Expand Down Expand Up @@ -670,6 +676,16 @@ class ButlerCommand(MWCommand):
extra_epilog = "See 'butler --help' for more options."


class OptionGroup:
"""Base class for an option group decorator. Requires the option group
subclass to have a property called `decorator`."""

def __call__(self, f):
for decorator in reversed(self.decorators):
f = decorator(f)
return f


class MWCtxObj():
"""Helper object for managing the `click.Context.obj` parameter, allows
obj data to be managed in a consistent way.
Expand Down
1 change: 1 addition & 0 deletions python/lsst/daf/butler/script/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from .queryCollections import queryCollections
from .queryDataIds import queryDataIds
from .queryDatasets import QueryDatasets
from ._associate import associate # depends on QueryDatasets
from ._pruneDatasets import pruneDatasets # depends on QueryDatasets
from .queryDatasetTypes import queryDatasetTypes
from .queryDimensionRecords import queryDimensionRecords
Expand Down
44 changes: 44 additions & 0 deletions python/lsst/daf/butler/script/_associate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from .. import Butler, CollectionType
from ..script import QueryDatasets


def associate(repo, collection, dataset_type, collections, where, find_first):
"""Add existing datasets to a CHAINED collection.
"""

butler = Butler(repo, writeable=True)

butler.registry.registerCollection(collection, CollectionType.TAGGED)

results = QueryDatasets(
butler=butler,
glob=dataset_type,
collections=collections,
where=where,
find_first=find_first,
show_uri=False,
repo=None
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the idea of calling the script rather than the native method to share logic like transforming globs to regexes. Do we need to worry about the script also doing unnecessary things (like making astropy Tables) at all?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

QueryDatasets generates astropy tables on demand via the getter (getTables), so it's structured to not create these if they're not needed. (I do see a mistake in passing the QueryDatasets results to the butler without using the accessor, will fix)


butler.registry.associate(collection, results.getDatasets())
14 changes: 10 additions & 4 deletions python/lsst/daf/butler/script/queryDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,13 @@ class QueryDatasets:

Parameters
----------
repo : `str`
repo : `str` or `None`
URI to the location of the repo or URI to a config file describing the
repo and its location.
repo and its location. One of `repo` and `butler` must be `None` and
the other must not be `None`.
butler : ``lsst.daf.butler.Butler`` or `None`
The butler to use to query. One of `repo` and `butler` must be `None`
and the other must not be `None`.
glob : iterable [`str`]
A list of glob-style search string that fully or partially identify
the dataset type names to search for.
Expand All @@ -133,8 +137,10 @@ class QueryDatasets:
If True, include the dataset URI in the output.
"""

def __init__(self, repo, glob, collections, where, find_first, show_uri):
self.butler = Butler(repo)
def __init__(self, glob, collections, where, find_first, show_uri, repo=None, butler=None):
if (repo and butler) or (not repo and not butler):
raise RuntimeError("One of repo and butler must be provided and the other must be None.")
self.butler = butler or Butler(repo)
self._getDatasets(glob, collections, where, find_first)
self.showUri = show_uri

Expand Down
81 changes: 81 additions & 0 deletions tests/test_cliCmdAssociate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Unit tests for daf_butler CLI prune-datasets subcommand.
"""

import unittest
from unittest.mock import patch

from lsst.daf.butler.cli.butler import cli as butlerCli
from lsst.daf.butler.cli.utils import clickResultMsg, LogCliRunner


class AssociateTestCase(unittest.TestCase):
"""Tests the ``associate`` ``butler`` subcommand.

``script.associate`` contains no logic, so instead of mocking the
internals, just mock the call to that function to test for expected inputs
and input types.
"""

def setUp(self):
self.runner = LogCliRunner()

@patch("lsst.daf.butler.script.associate")
def test_defaults(self, mockAssociate):
"""Test the expected default values & types for optional options.
"""
result = self.runner.invoke(
butlerCli, ["associate", "myRepo", "myCollection"])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
mockAssociate.assert_called_once_with(
repo="myRepo",
collection="myCollection",
dataset_type=tuple(),
collections=tuple(),
where=None,
find_first=False
)

@patch("lsst.daf.butler.script.associate")
def test_values(self, mockAssociate):
"""Test expected values & types when passing in options.
"""
result = self.runner.invoke(
butlerCli, ["associate", "myRepo", "myCollection",
"--dataset-type", "myDatasetType",
"--collections", "myCollection,otherCollection",
"--where", "'a=b'",
"--find-first"])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
mockAssociate.assert_called_once_with(
repo="myRepo",
collection="myCollection",
dataset_type=("myDatasetType",),
collections=("myCollection", "otherCollection"),
where="'a=b'",
find_first=True
)


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion tests/test_cliCmdQueryDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class QueryDatasetsTest(unittest.TestCase, ButlerTestHelper):

@staticmethod
def _queryDatasets(repo, glob=(), collections=(), where="", find_first=False, show_uri=False):
return script.QueryDatasets(repo, glob, collections, where, find_first, show_uri).getTables()
return script.QueryDatasets(glob, collections, where, find_first, show_uri, repo=repo).getTables()

def setUp(self):
self.root = makeTestTempDir(TESTDIR)
Expand Down