add query-dataset-types command

lsst · Jun 5, 2020 · bca80f8 · bca80f8
1 parent 883eb17
commit bca80f8
Show file tree

Hide file tree

Showing 5 changed files with 194 additions and 5 deletions.
diff --git a/python/lsst/daf/butler/cli/cmd/__init__.py b/python/lsst/daf/butler/cli/cmd/__init__.py
@@ -19,7 +19,9 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-__all__ = ["butler_import", "create", "config_dump", "config_validate", "query_collections"]
+__all__ = ["butler_import", "create", "config_dump", "config_validate", "query_collections",
+           "query_dataset_types"]
 
 
-from .commands import butler_import, create, config_dump, config_validate, query_collections
+from .commands import (butler_import, create, config_dump, config_validate, query_collections,
+                       query_dataset_types)
diff --git a/python/lsst/daf/butler/cli/cmd/commands.py b/python/lsst/daf/butler/cli/cmd/commands.py
@@ -22,10 +22,11 @@
 import click
 import yaml
 
-from ..opt import (collection_type_option, dataset_type_option, directory_argument, repo_argument, run_option,
-                   transfer_option)
+from ..opt import (collection_type_option, dataset_type_option, directory_argument, glob_parameter,
+                   repo_argument, run_option, transfer_option, verbose_option)
 from ..utils import split_commas, cli_handle_exception, typeStrAcceptsMultiple
-from ...script import butlerImport, createRepo, configDump, configValidate, queryCollections
+from ...script import (butlerImport, createRepo, configDump, configValidate, queryCollections,
+                       queryDatasetTypes)
 
 
 # The conversion from the import command name to the butler_import function
@@ -106,3 +107,12 @@ def config_validate(*args, **kwargs):
 def query_collections(*args, **kwargs):
     """Get the collections whose names match an expression."""
     print(yaml.dump(cli_handle_exception(queryCollections, *args, **kwargs)))
+
+
+@click.command()
+@repo_argument(required=True)
+@glob_parameter(parameterType=glob_parameter.ARGUMENT, multiple=True)
+@verbose_option(help="Include dataset type name, dimensions, and storage class in output.")
+def query_dataset_types(*args, **kwargs):
+    """Get the dataset types in a repository."""
+    print(yaml.dump(cli_handle_exception(queryDatasetTypes, *args, **kwargs), sort_keys=False))
diff --git a/python/lsst/daf/butler/script/__init__.py b/python/lsst/daf/butler/script/__init__.py
@@ -24,3 +24,4 @@
 from .configDump import configDump
 from .configValidate import configValidate
 from .queryCollections import queryCollections
+from .queryDatasetTypes import queryDatasetTypes
diff --git a/python/lsst/daf/butler/script/queryDatasetTypes.py b/python/lsst/daf/butler/script/queryDatasetTypes.py
@@ -0,0 +1,79 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import fnmatch
+import re
+
+from .. import Butler
+
+
+def _translateExpr(expr):
+    """Translate glob-style search terms to regex.
+
+    Parameters
+    ----------
+    expr : `str` or `...`
+        A glob-style pattern string to convert, or an Ellipsis.
+
+    Returns
+    -------
+    expressions : [`str` or `...`]
+        A list of expressions that are either regex or Ellipsis.
+    """
+    if expr == ...:
+        return expr
+    return re.compile(fnmatch.translate(expr))
+
+
+def queryDatasetTypes(repo, verbose, glob):
+    """Get the dataset types in a repository.
+
+    Parameters
+    ----------
+    repo : `str`
+        URI to the location of the repo or URI to a config file describing the
+        repo and its location.
+    verbose : `bool`
+        If false only return the name of the dataset types. If false return
+        name, dimensions, and storage class of each dataset type.
+    glob : [`str`]
+        A list of glob-style search string that fully or partially identify
+        the dataset type names to search for.
+
+    Returns
+    -------
+    collections : `dict` [`str`, [`str`]]
+        A dict whose key is 'datasetTypes' and whose value is a list of
+        collection names.
+    """
+    butler = Butler(repo)
+    kwargs = dict()
+    if glob:
+        kwargs['expression'] = [_translateExpr(g) for g in glob]
+    datasetTypes = butler.registry.queryDatasetTypes(**kwargs)
+    if verbose:
+        info = [dict(name=datasetType.name,
+                     dimensions=list(datasetType.dimensions.names),
+                     storageClass=datasetType.storageClass.name)
+                for datasetType in datasetTypes]
+    else:
+        info = [datasetType.name for datasetType in datasetTypes]
+    return {'datasetTypes': info}
diff --git a/tests/test_cliCmdQueryDatasetTypes.py b/tests/test_cliCmdQueryDatasetTypes.py
@@ -0,0 +1,97 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Unit tests for daf_butler CLI query-collections command.
+"""
+
+import click
+import unittest
+import yaml
+
+from lsst.daf.butler import Butler, DatasetType, StorageClass
+from lsst.daf.butler.cli.butler import cli
+from lsst.daf.butler.cli.utils import clickResultMsg
+from lsst.daf.butler.tests.mockeredTest import MockeredTestBase
+
+
+class QueryDatasetTypesCmdTest(MockeredTestBase):
+
+    defaultExpected = dict(repo=None,
+                           verbose=False,
+                           glob=())
+
+    def test_minimal(self):
+        """Test only required parameters.
+        """
+        self.run_test(["query-dataset-types", "here"],
+                      self.makeExpected(repo="here"))
+
+    def test_requiredMissing(self):
+        """Test that if the required parameter is missing it fails"""
+        self.run_missing(["query-dataset-types"], 'Error: Missing argument "REPO".')
+
+    def test_all(self):
+        """Test all parameters."""
+        self.run_test(["query-dataset-types", "here", "--verbose", "foo*"],
+                      self.makeExpected(repo="here", verbose=True, glob=("foo*", )))
+
+
+class QueryDatasetTypesScriptTest(unittest.TestCase):
+
+    def testQueryDatasetTypes(self):
+        self.maxDiff = None
+        datasetName = "test"
+        instrumentDimension = "instrument"
+        visitDimension = "visit"
+        storageClassName = "testDatasetType"
+        expectedNotVerbose = {"datasetTypes": [datasetName]}
+        runner = click.testing.CliRunner()
+        with runner.isolated_filesystem():
+            butlerCfg = Butler.makeRepo("here")
+            butler = Butler(butlerCfg, writeable=True)
+            storageClass = StorageClass(storageClassName)
+            butler.registry.storageClasses.registerStorageClass(storageClass)
+            dimensions = butler.registry.dimensions.extract((instrumentDimension, visitDimension))
+            datasetType = DatasetType(datasetName, dimensions, storageClass)
+            butler.registry.registerDatasetType(datasetType)
+            # check not-verbose output:
+            result = runner.invoke(cli, ["query-dataset-types", "here"])
+            self.assertEqual(result.exit_code, 0, clickResultMsg(result))
+            self.assertEqual(expectedNotVerbose, yaml.safe_load(result.output))
+            # check glob output:
+            result = runner.invoke(cli, ["query-dataset-types", "here", "t*"])
+            self.assertEqual(result.exit_code, 0, clickResultMsg(result))
+            self.assertEqual(expectedNotVerbose, yaml.safe_load(result.output))
+            # check verbose output:
+            result = runner.invoke(cli, ["query-dataset-types", "here", "--verbose"])
+            self.assertEqual(result.exit_code, 0, clickResultMsg(result))
+            response = yaml.safe_load(result.output)
+            # output dimension names contain all required dimensions, more than
+            # the registered dimensions, so verify the expected components
+            # individually.
+            self.assertEqual(response["datasetTypes"][0]["name"], datasetName)
+            self.assertEqual(response["datasetTypes"][0]["storageClass"], storageClassName)
+            self.assertIn(instrumentDimension, response["datasetTypes"][0]["dimensions"])
+            self.assertIn(visitDimension, response["datasetTypes"][0]["dimensions"])
+
+
+if __name__ == "__main__":
+    unittest.main()