lsst · n8pease · Nov 24, 2020 · Nov 18, 2020 · Nov 20, 2020 · Nov 20, 2020
diff --git a/python/lsst/daf/butler/cli/cmd/__init__.py b/python/lsst/daf/butler/cli/cmd/__init__.py
@@ -29,6 +29,7 @@
            "query_data_ids",
            "query_dataset_types",
            "query_datasets",
+           "query_dimension_records",
            "remove_dataset_type",
            )
 
@@ -43,5 +44,6 @@
                        query_data_ids,
                        query_dataset_types,
                        query_datasets,
+                       query_dimension_records,
                        remove_dataset_type,
                        )
diff --git a/python/lsst/daf/butler/cli/cmd/commands.py b/python/lsst/daf/butler/cli/cmd/commands.py
@@ -20,16 +20,17 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import click
-import yaml
 
 from ..opt import (
     collection_type_option,
     collection_argument,
     collections_option,
+    components_option,
     dataset_type_option,
     datasets_option,
     dimensions_argument,
     directory_argument,
+    element_argument,
     glob_argument,
     options_file_option,
     repo_argument,
@@ -179,17 +180,15 @@ def query_collections(*args, **kwargs):
 @glob_argument(help="GLOB is one or more glob-style expressions that fully or partially identify the "
                     "dataset types to return.")
 @verbose_option(help="Include dataset type name, dimensions, and storage class in output.")
-@click.option("--components/--no-components",
-              default=None,
-              help="For --components, apply all expression patterns to component dataset type names as well. "
-                   "For --no-components, never apply patterns to components. Default (where neither is "
-                   "specified) is to apply patterns to components only if their parent datasets were not "
-                   "matched by the expression. Fully-specified component datasets (`str` or `DatasetType` "
-                   "instances) are always included.")
+@components_option()
 @options_file_option()
 def query_dataset_types(*args, **kwargs):
     """Get the dataset types in a repository."""
-    print(yaml.dump(cli_handle_exception(script.queryDatasetTypes, *args, **kwargs), sort_keys=False))
+    table = cli_handle_exception(script.queryDatasetTypes, *args, **kwargs)
+    if table:
+        table.pprint_all()
+    else:
+        print("No results. Try --help for more information.")
 
 
 @click.command(cls=ButlerCommand)
@@ -269,3 +268,24 @@ def query_data_ids(**kwargs):
             print("No results. Try requesting some dimensions or datasets, see --help for more information.")
         else:
             print("No results. Try --help for more information.")
+
+
+@click.command(cls=ButlerCommand)
+@repo_argument(required=True)
+@element_argument(required=True)
+@datasets_option(help=unwrap("""An expression that fully or partially identifies dataset types that should
+                             constrain the yielded records. Only affects results when used with
+                             --collections."""))
+@collections_option(help=collections_option.help + " Only affects results when used with --datasets.")
+@where_option(help=whereHelp)
+@click.option("--no-check", is_flag=True,
+              help=unwrap("""Don't check the query before execution. By default the query is checked before it
+                          executed, this may reject some valid queries that resemble common mistakes."""))
+@options_file_option()
+def query_dimension_records(**kwargs):
+    """Query for dimension information."""
+    table = cli_handle_exception(script.queryDimensionRecords, **kwargs)
+    if table:
+        table.pprint_all()
+    else:
+        print("No results. Try --help for more information.")
diff --git a/python/lsst/daf/butler/cli/opt/arguments.py b/python/lsst/daf/butler/cli/opt/arguments.py
@@ -31,6 +31,11 @@
 directory_argument = MWArgumentDecorator("directory",
                                          help="DIRECTORY is the folder containing dataset files.")
 
+
+element_argument = MWArgumentDecorator("element",
+                                       help="ELEMENT is the dimension element to obtain.")
+
+
 glob_argument = MWArgumentDecorator("glob",
                                     callback=split_commas,
                                     help="GLOB is one or more strings to apply to the search.",

diff --git a/python/lsst/daf/butler/cli/opt/options.py b/python/lsst/daf/butler/cli/opt/options.py
@@ -62,12 +62,23 @@ def makeCollectionTypes(context, param, value):
 
 collections_option = MWOptionDecorator("--collections",
                                        help=unwrap("""One or more expressions that fully or partially identify
-                                                   the collections to search for datasets.If not provided all
+                                                   the collections to search for datasets. If not provided all
                                                    datasets are returned."""),
                                        multiple=True,
                                        callback=split_commas)
 
 
+components_option = MWOptionDecorator("--components/--no-components",
+                                      default=None,
+                                      help=unwrap("""For --components, apply all expression patterns to
+                                                  component dataset type names as well. For --no-components,
+                                                  never apply patterns to components. Default (where neither
+                                                  is specified) is to apply patterns to components only if
+                                                  their parent datasets were not matched by the expression.
+                                                  Fully-specified component datasets (`str` or `DatasetType`
+                                                  instances) are always included."""))
+
+
 config_option = MWOptionDecorator("-c", "--config",
                                   callback=split_kv,
                                   help="Config override, as a key-value pair.",

diff --git a/python/lsst/daf/butler/cli/utils.py b/python/lsst/daf/butler/cli/utils.py
@@ -638,6 +638,12 @@ def opts(self):
         line."""
         return self._opts
 
+    @property
+    def help(self):
+        """Get the help text for this option. Returns an empty string if no
+        help was defined."""
+        return self.partialOpt.keywords.get("help", "")
+
     def __call__(self, *args, **kwargs):
         return self.partialOpt(*args, **kwargs)
 

diff --git a/python/lsst/daf/butler/script/__init__.py b/python/lsst/daf/butler/script/__init__.py
@@ -20,6 +20,7 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 from .butlerImport import butlerImport
+from .certifyCalibrations import certifyCalibrations
 from .createRepo import createRepo
 from .configDump import configDump
 from .configValidate import configValidate
@@ -28,5 +29,5 @@
 from .queryDataIds import queryDataIds
 from .queryDatasets import queryDatasets
 from .queryDatasetTypes import queryDatasetTypes
+from .queryDimensionRecords import queryDimensionRecords
 from .removeDatasetType import removeDatasetType
-from .certifyCalibrations import certifyCalibrations
diff --git a/python/lsst/daf/butler/script/queryDatasetTypes.py b/python/lsst/daf/butler/script/queryDatasetTypes.py
@@ -22,6 +22,9 @@
 
 from typing import Any, List
 
+from astropy.table import Table
+from numpy import array
+
 from .. import Butler
 from ..core.utils import globToRegex
 
@@ -63,10 +66,11 @@ def queryDatasetTypes(repo, verbose, glob, components):
     datasetTypes = butler.registry.queryDatasetTypes(components=components, **kwargs)
     info: List[Any]
     if verbose:
-        info = [dict(name=datasetType.name,
-                     dimensions=list(datasetType.dimensions.names),
-                     storageClass=datasetType.storageClass.name)
-                for datasetType in datasetTypes]
+        table = Table(array([(d.name, str(list(d.dimensions.names)) or "None", d.storageClass.name)
+                             for d in datasetTypes]),
+                      names=("name", "dimensions", "storage class"))
     else:
-        info = [datasetType.name for datasetType in datasetTypes]
-    return {"datasetTypes": info}
+        rows = ([d.name for d in datasetTypes],)
+        table = Table(rows, names=("name",))
+    table.sort("name")
+    return table
diff --git a/python/lsst/daf/butler/script/queryDimensionRecords.py b/python/lsst/daf/butler/script/queryDimensionRecords.py
@@ -0,0 +1,51 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from astropy.table import Table
+
+from .. import Butler
+from ..core.utils import globToRegex
+
+
+def queryDimensionRecords(repo, element, datasets, collections, where, no_check):
+    # Docstring for supported parameters is the same as
+    # Registry.queryDimensionRecords except for ``no_check``, which is the
+    # inverse of ``check``.
+
+    if collections:
+        collections = globToRegex(collections)
+    else:
+        collections = ...
+
+    butler = Butler(repo)
+
+    records = list(butler.registry.queryDimensionRecords(element,
+                                                         datasets=datasets,
+                                                         collections=collections,
+                                                         where=where,
+                                                         check=not no_check))
+    if not records:
+        return None
+
+    records.sort(key=lambda r: r.dataId)  # use the dataId to sort the rows
+    keys = records[0].fields.names  # order the columns the same as the record's `field.names`
+
+    return Table([[getattr(record, key, None) for record in records] for key in keys], names=keys)
diff --git a/python/lsst/daf/butler/tests/utils.py b/python/lsst/daf/butler/tests/utils.py
@@ -87,6 +87,7 @@ def readTable(textTable):
     """
     return AstropyTable.read(textTable,
                              format="ascii",
+                             data_start=2,  # skip the header row and the header row underlines.
                              fill_values=[("", 0, "")])
 
 

diff --git a/tests/test_cliCmdQueryDatasetTypes.py b/tests/test_cliCmdQueryDatasetTypes.py
@@ -22,14 +22,16 @@
 """Unit tests for daf_butler CLI query-collections command.
 """
 
+from astropy.table import Table as AstropyTable
+from numpy import array
 import unittest
-import yaml
 
 from lsst.daf.butler import Butler, DatasetType, StorageClass
 from lsst.daf.butler.cli.butler import cli
 from lsst.daf.butler.cli.cmd import query_dataset_types
 from lsst.daf.butler.cli.utils import clickResultMsg, LogCliRunner
 from lsst.daf.butler.tests import CliCmdTestBase
+from lsst.daf.butler.tests.utils import ButlerTestHelper, readTable
 
 
 class QueryDatasetTypesCmdTest(CliCmdTestBase, unittest.TestCase):
@@ -63,15 +65,15 @@ def test_all(self):
                       self.makeExpected(repo="here", verbose=True, glob=("foo*", ), components=False))
 
 
-class QueryDatasetTypesScriptTest(unittest.TestCase):
+class QueryDatasetTypesScriptTest(ButlerTestHelper, unittest.TestCase):
 
     def testQueryDatasetTypes(self):
         self.maxDiff = None
         datasetName = "test"
         instrumentDimension = "instrument"
         visitDimension = "visit"
         storageClassName = "testDatasetType"
-        expectedNotVerbose = {"datasetTypes": [datasetName]}
+        expectedNotVerbose = AstropyTable((("test",),), names=("name",))
         runner = LogCliRunner()
         with runner.isolated_filesystem():
             butlerCfg = Butler.makeRepo("here")
@@ -84,22 +86,20 @@ def testQueryDatasetTypes(self):
             # check not-verbose output:
             result = runner.invoke(cli, ["query-dataset-types", "here"])
             self.assertEqual(result.exit_code, 0, clickResultMsg(result))
-            self.assertEqual(expectedNotVerbose, yaml.safe_load(result.output))
+            self.assertAstropyTablesEqual(readTable(result.output), expectedNotVerbose)
             # check glob output:
             result = runner.invoke(cli, ["query-dataset-types", "here", "t*"])
             self.assertEqual(result.exit_code, 0, clickResultMsg(result))
-            self.assertEqual(expectedNotVerbose, yaml.safe_load(result.output))
+            self.assertAstropyTablesEqual(readTable(result.output), expectedNotVerbose)
             # check verbose output:
             result = runner.invoke(cli, ["query-dataset-types", "here", "--verbose"])
             self.assertEqual(result.exit_code, 0, clickResultMsg(result))
-            response = yaml.safe_load(result.output)
-            # output dimension names contain all required dimensions, more than
-            # the registered dimensions, so verify the expected components
-            # individually.
-            self.assertEqual(response["datasetTypes"][0]["name"], datasetName)
-            self.assertEqual(response["datasetTypes"][0]["storageClass"], storageClassName)
-            self.assertIn(instrumentDimension, response["datasetTypes"][0]["dimensions"])
-            self.assertIn(visitDimension, response["datasetTypes"][0]["dimensions"])
+            expected = AstropyTable(array((
+                "test",
+                "['band', 'instrument', 'physical_filter', 'visit_system', 'visit']",
+                "testDatasetType")),
+                names=("name", "dimensions", "storage class"))
+            self.assertAstropyTablesEqual(readTable(result.output), expected)
 
             # Now remove and check that it was removed
             # First a non-existent one
@@ -113,7 +113,7 @@ def testQueryDatasetTypes(self):
             # and check that it has gone
             result = runner.invoke(cli, ["query-dataset-types", "here"])
             self.assertEqual(result.exit_code, 0, clickResultMsg(result))
-            self.assertEqual({"datasetTypes": []}, yaml.safe_load(result.output))
+            self.assertIn("No results", result.output)
 
 
 if __name__ == "__main__":