lsst · timj · Aug 26, 2021 · Aug 12, 2021 · Aug 13, 2021 · parejkoj
diff --git a/doc/changes/DM-31367.feature.rst b/doc/changes/DM-31367.feature.rst
@@ -0,0 +1 @@
+Add ``butler register-dataset-type`` command to register a new dataset type.
diff --git a/python/lsst/daf/butler/cli/cmd/__init__.py b/python/lsst/daf/butler/cli/cmd/__init__.py
@@ -34,6 +34,7 @@
            "query_dataset_types",
            "query_datasets",
            "query_dimension_records",
+           "register_dataset_type",
            "retrieve_artifacts",
            "remove_dataset_type",
            "transfer_datasets",
@@ -55,6 +56,7 @@
                        query_dataset_types,
                        query_datasets,
                        query_dimension_records,
+                       register_dataset_type,
                        retrieve_artifacts,
                        remove_dataset_type,
                        transfer_datasets,

diff --git a/python/lsst/daf/butler/cli/cmd/commands.py b/python/lsst/daf/butler/cli/cmd/commands.py
@@ -599,3 +599,33 @@ def ingest_files(**kwargs):
     raw camera exposures.
     """
     script.ingest_files(**kwargs)
+
+
+@click.command(cls=ButlerCommand)
+@repo_argument(required=True)
+@click.argument("dataset_type", required=True)
+@click.argument("storage_class", required=True)
+@click.argument("dimensions", required=False, nargs=-1)
+@click.option("--is-calibration/--no-is-calibration", is_flag=True, default=False,
+              help="Indicate that this dataset type can be part of a calibration collection.")
+def register_dataset_type(**kwargs):
+    """Register a new dataset type with this butler repository.
+
+    DATASET_TYPE is the name of the dataset type.
+
+    STORAGE_CLASS is the name of the StorageClass to be associated with
+    this dataset type.
+
+    DIMENSIONS is a list of all the dimensions relevant to this
+    dataset type. It can be an empty list.
+
+    A component dataset type (such as "something.component") is not a
+    real dataset type and so can not be defined by this command. They are
+    automatically derived from the composite dataset type when a composite
+    storage class is specified.
+    """
+    inserted = script.register_dataset_type(**kwargs)
+    if inserted:
+        print("Dataset type successfully registered.")
+    else:
+        print("Dataset type already existed in identical form.")
diff --git a/python/lsst/daf/butler/script/__init__.py b/python/lsst/daf/butler/script/__init__.py
@@ -33,6 +33,7 @@
 from ._pruneDatasets import pruneDatasets # depends on QueryDatasets
 from .queryDatasetTypes import queryDatasetTypes
 from .queryDimensionRecords import queryDimensionRecords
+from .register_dataset_type import register_dataset_type
 from .removeDatasetType import removeDatasetType
 from .retrieveArtifacts import retrieveArtifacts
 from .transferDatasets import transferDatasets

diff --git a/python/lsst/daf/butler/script/register_dataset_type.py b/python/lsst/daf/butler/script/register_dataset_type.py
@@ -0,0 +1,79 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+from __future__ import annotations
+
+__all__ = ("register_dataset_type",)
+
+from typing import Tuple
+
+from .. import Butler
+from ..core import DatasetType
+
+
+def register_dataset_type(repo: str, dataset_type: str, storage_class: str,
+                          dimensions: Tuple[str, ...], is_calibration: bool = False) -> bool:
+    """Register a new dataset type.
+
+    Parameters
+    ----------
+    repo : `str`
+        URI string of the Butler repo to use.
+    dataset_type : `str`
+        The name of the new dataset type.
+    storage_class : `str`
+        The name of the storage class associated with this dataset type.
+    dimensions : `tuple` [`str`]
+        Dimensions associated with this dataset type. Can be empty.
+    is_calibration : `bool`
+        If `True` this dataset type may be included in calibration
+        collections.
+
+    Returns
+    -------
+    inserted : `bool`
+        `True` if the dataset type was added; `False` if it was already
+        there.
+
+    Raises
+    ------
+    ValueError
+        Raised if an attempt is made to register a component dataset type.
+        Component dataset types are not real dataset types and so can not
+        be created by this command. They are always derived from the composite
+        dataset type.
+    """
+
+    butler = Butler(repo, writeable=True)
+
+    composite, component = DatasetType.splitDatasetTypeName(dataset_type)
+    if component:
+        raise ValueError("Component dataset types are created automatically when the composite is created.")
+
+    # mypy does not think that Tuple[str, ...] is allowed for DatasetType
+    # constructor so we have to do the conversion here.
+    graph = butler.registry.dimensions.extract(dimensions)
+
+    datasetType = DatasetType(dataset_type, graph, storage_class,
+                              parentStorageClass=None,
+                              isCalibration=is_calibration,
+                              universe=butler.registry.dimensions)
+
+    return butler.registry.registerDatasetType(datasetType)
diff --git a/tests/test_cliCmdQueryDatasetTypes.py b/tests/test_cliCmdQueryDatasetTypes.py
@@ -26,7 +26,6 @@
 from numpy import array
 import unittest
 
-from lsst.daf.butler import Butler, DatasetType, StorageClass
 from lsst.daf.butler.cli.butler import cli
 from lsst.daf.butler.cli.cmd import query_dataset_types
 from lsst.daf.butler.cli.utils import clickResultMsg, LogCliRunner
@@ -74,17 +73,28 @@ def testQueryDatasetTypes(self):
         datasetName = "test"
         instrumentDimension = "instrument"
         visitDimension = "visit"
-        storageClassName = "testDatasetType"
+        storageClassName = "StructuredDataDict"
         expectedNotVerbose = AstropyTable((("test",),), names=("name",))
         runner = LogCliRunner()
         with runner.isolated_filesystem():
-            butlerCfg = Butler.makeRepo("here")
-            butler = Butler(butlerCfg, writeable=True)
-            storageClass = StorageClass(storageClassName)
-            butler.registry.storageClasses.registerStorageClass(storageClass)
-            dimensions = butler.registry.dimensions.extract((instrumentDimension, visitDimension))
-            datasetType = DatasetType(datasetName, dimensions, storageClass)
-            butler.registry.registerDatasetType(datasetType)
+            result = runner.invoke(cli, ["create", "here"])
+            self.assertEqual(result.exit_code, 0, clickResultMsg(result))
+            # Create the dataset type.
+            result = runner.invoke(cli, ["register-dataset-type", "here", datasetName,
+                                         storageClassName, instrumentDimension, visitDimension])
+            self.assertEqual(result.exit_code, 0, clickResultMsg(result))
+            # Okay to create it again identically.
+            result = runner.invoke(cli, ["register-dataset-type", "here", datasetName,
+                                         storageClassName, instrumentDimension, visitDimension])
+            self.assertEqual(result.exit_code, 0, clickResultMsg(result))
+            # Not okay to create a different version of it.
+            result = runner.invoke(cli, ["register-dataset-type", "here", datasetName,
+                                         storageClassName, instrumentDimension])
+            self.assertNotEqual(result.exit_code, 0, clickResultMsg(result))
+            # Not okay to try to create a component dataset type.
+            result = runner.invoke(cli, ["register-dataset-type", "here", "a.b",
+                                         storageClassName, instrumentDimension])
+            self.assertNotEqual(result.exit_code, 0, clickResultMsg(result))
             # check not-verbose output:
             result = runner.invoke(cli, ["query-dataset-types", "here"])
             self.assertEqual(result.exit_code, 0, clickResultMsg(result))
@@ -99,7 +109,7 @@ def testQueryDatasetTypes(self):
             expected = AstropyTable(array((
                 "test",
                 "['band', 'instrument', 'physical_filter', 'visit_system', 'visit']",
-                "testDatasetType")),
+                storageClassName)),
                 names=("name", "dimensions", "storage class"))
             self.assertAstropyTablesEqual(readTable(result.output), expected)