Skip to content

Commit

Permalink
add query-dataset-types command
Browse files Browse the repository at this point in the history
  • Loading branch information
n8pease committed Jun 5, 2020
1 parent 883eb17 commit bca80f8
Show file tree
Hide file tree
Showing 5 changed files with 194 additions and 5 deletions.
6 changes: 4 additions & 2 deletions python/lsst/daf/butler/cli/cmd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ["butler_import", "create", "config_dump", "config_validate", "query_collections"]
__all__ = ["butler_import", "create", "config_dump", "config_validate", "query_collections",
"query_dataset_types"]


from .commands import butler_import, create, config_dump, config_validate, query_collections
from .commands import (butler_import, create, config_dump, config_validate, query_collections,
query_dataset_types)
16 changes: 13 additions & 3 deletions python/lsst/daf/butler/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@
import click
import yaml

from ..opt import (collection_type_option, dataset_type_option, directory_argument, repo_argument, run_option,
transfer_option)
from ..opt import (collection_type_option, dataset_type_option, directory_argument, glob_parameter,
repo_argument, run_option, transfer_option, verbose_option)
from ..utils import split_commas, cli_handle_exception, typeStrAcceptsMultiple
from ...script import butlerImport, createRepo, configDump, configValidate, queryCollections
from ...script import (butlerImport, createRepo, configDump, configValidate, queryCollections,
queryDatasetTypes)


# The conversion from the import command name to the butler_import function
Expand Down Expand Up @@ -106,3 +107,12 @@ def config_validate(*args, **kwargs):
def query_collections(*args, **kwargs):
"""Get the collections whose names match an expression."""
print(yaml.dump(cli_handle_exception(queryCollections, *args, **kwargs)))


@click.command()
@repo_argument(required=True)
@glob_parameter(parameterType=glob_parameter.ARGUMENT, multiple=True)
@verbose_option(help="Include dataset type name, dimensions, and storage class in output.")
def query_dataset_types(*args, **kwargs):
"""Get the dataset types in a repository."""
print(yaml.dump(cli_handle_exception(queryDatasetTypes, *args, **kwargs), sort_keys=False))
1 change: 1 addition & 0 deletions python/lsst/daf/butler/script/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@
from .configDump import configDump
from .configValidate import configValidate
from .queryCollections import queryCollections
from .queryDatasetTypes import queryDatasetTypes
79 changes: 79 additions & 0 deletions python/lsst/daf/butler/script/queryDatasetTypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import fnmatch
import re

from .. import Butler


def _translateExpr(expr):
"""Translate glob-style search terms to regex.
Parameters
----------
expr : `str` or `...`
A glob-style pattern string to convert, or an Ellipsis.
Returns
-------
expressions : [`str` or `...`]
A list of expressions that are either regex or Ellipsis.
"""
if expr == ...:
return expr
return re.compile(fnmatch.translate(expr))


def queryDatasetTypes(repo, verbose, glob):
"""Get the dataset types in a repository.
Parameters
----------
repo : `str`
URI to the location of the repo or URI to a config file describing the
repo and its location.
verbose : `bool`
If false only return the name of the dataset types. If false return
name, dimensions, and storage class of each dataset type.
glob : [`str`]
A list of glob-style search string that fully or partially identify
the dataset type names to search for.
Returns
-------
collections : `dict` [`str`, [`str`]]
A dict whose key is 'datasetTypes' and whose value is a list of
collection names.
"""
butler = Butler(repo)
kwargs = dict()
if glob:
kwargs['expression'] = [_translateExpr(g) for g in glob]
datasetTypes = butler.registry.queryDatasetTypes(**kwargs)
if verbose:
info = [dict(name=datasetType.name,
dimensions=list(datasetType.dimensions.names),
storageClass=datasetType.storageClass.name)
for datasetType in datasetTypes]
else:
info = [datasetType.name for datasetType in datasetTypes]
return {'datasetTypes': info}
97 changes: 97 additions & 0 deletions tests/test_cliCmdQueryDatasetTypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Unit tests for daf_butler CLI query-collections command.
"""

import click
import unittest
import yaml

from lsst.daf.butler import Butler, DatasetType, StorageClass
from lsst.daf.butler.cli.butler import cli
from lsst.daf.butler.cli.utils import clickResultMsg
from lsst.daf.butler.tests.mockeredTest import MockeredTestBase


class QueryDatasetTypesCmdTest(MockeredTestBase):

defaultExpected = dict(repo=None,
verbose=False,
glob=())

def test_minimal(self):
"""Test only required parameters.
"""
self.run_test(["query-dataset-types", "here"],
self.makeExpected(repo="here"))

def test_requiredMissing(self):
"""Test that if the required parameter is missing it fails"""
self.run_missing(["query-dataset-types"], 'Error: Missing argument "REPO".')

def test_all(self):
"""Test all parameters."""
self.run_test(["query-dataset-types", "here", "--verbose", "foo*"],
self.makeExpected(repo="here", verbose=True, glob=("foo*", )))


class QueryDatasetTypesScriptTest(unittest.TestCase):

def testQueryDatasetTypes(self):
self.maxDiff = None
datasetName = "test"
instrumentDimension = "instrument"
visitDimension = "visit"
storageClassName = "testDatasetType"
expectedNotVerbose = {"datasetTypes": [datasetName]}
runner = click.testing.CliRunner()
with runner.isolated_filesystem():
butlerCfg = Butler.makeRepo("here")
butler = Butler(butlerCfg, writeable=True)
storageClass = StorageClass(storageClassName)
butler.registry.storageClasses.registerStorageClass(storageClass)
dimensions = butler.registry.dimensions.extract((instrumentDimension, visitDimension))
datasetType = DatasetType(datasetName, dimensions, storageClass)
butler.registry.registerDatasetType(datasetType)
# check not-verbose output:
result = runner.invoke(cli, ["query-dataset-types", "here"])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
self.assertEqual(expectedNotVerbose, yaml.safe_load(result.output))
# check glob output:
result = runner.invoke(cli, ["query-dataset-types", "here", "t*"])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
self.assertEqual(expectedNotVerbose, yaml.safe_load(result.output))
# check verbose output:
result = runner.invoke(cli, ["query-dataset-types", "here", "--verbose"])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
response = yaml.safe_load(result.output)
# output dimension names contain all required dimensions, more than
# the registered dimensions, so verify the expected components
# individually.
self.assertEqual(response["datasetTypes"][0]["name"], datasetName)
self.assertEqual(response["datasetTypes"][0]["storageClass"], storageClassName)
self.assertIn(instrumentDimension, response["datasetTypes"][0]["dimensions"])
self.assertIn(visitDimension, response["datasetTypes"][0]["dimensions"])


if __name__ == "__main__":
unittest.main()

0 comments on commit bca80f8

Please sign in to comment.