Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-31367: Add butler register-dataset-type subcommand #558

Merged
merged 2 commits into from
Aug 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changes/DM-31367.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add ``butler register-dataset-type`` command to register a new dataset type.
2 changes: 2 additions & 0 deletions python/lsst/daf/butler/cli/cmd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"query_dataset_types",
"query_datasets",
"query_dimension_records",
"register_dataset_type",
"retrieve_artifacts",
"remove_dataset_type",
"transfer_datasets",
Expand All @@ -55,6 +56,7 @@
query_dataset_types,
query_datasets,
query_dimension_records,
register_dataset_type,
retrieve_artifacts,
remove_dataset_type,
transfer_datasets,
Expand Down
30 changes: 30 additions & 0 deletions python/lsst/daf/butler/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,3 +599,33 @@ def ingest_files(**kwargs):
raw camera exposures.
"""
script.ingest_files(**kwargs)


@click.command(cls=ButlerCommand)
@repo_argument(required=True)
@click.argument("dataset_type", required=True)
@click.argument("storage_class", required=True)
@click.argument("dimensions", required=False, nargs=-1)
@click.option("--is-calibration/--no-is-calibration", is_flag=True, default=False,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"no" instead of "not?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The convention is --x to enable and --no-x to disable and that seems to be what click uses elsewhere. I think I could leave off the negating option but then you'd have no way to set it explicitly rather than letting it be the default.

help="Indicate that this dataset type can be part of a calibration collection.")
def register_dataset_type(**kwargs):
"""Register a new dataset type with this butler repository.

DATASET_TYPE is the name of the dataset type.

STORAGE_CLASS is the name of the StorageClass to be associated with
this dataset type.

DIMENSIONS is a list of all the dimensions relevant to this
dataset type. It can be an empty list.

A component dataset type (such as "something.component") is not a
real dataset type and so can not be defined by this command. They are
automatically derived from the composite dataset type when a composite
storage class is specified.
"""
inserted = script.register_dataset_type(**kwargs)
if inserted:
print("Dataset type successfully registered.")
else:
print("Dataset type already existed in identical form.")
1 change: 1 addition & 0 deletions python/lsst/daf/butler/script/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from ._pruneDatasets import pruneDatasets # depends on QueryDatasets
from .queryDatasetTypes import queryDatasetTypes
from .queryDimensionRecords import queryDimensionRecords
from .register_dataset_type import register_dataset_type
from .removeDatasetType import removeDatasetType
from .retrieveArtifacts import retrieveArtifacts
from .transferDatasets import transferDatasets
Expand Down
79 changes: 79 additions & 0 deletions python/lsst/daf/butler/script/register_dataset_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import annotations

__all__ = ("register_dataset_type",)

from typing import Tuple

from .. import Butler
from ..core import DatasetType


def register_dataset_type(repo: str, dataset_type: str, storage_class: str,
dimensions: Tuple[str, ...], is_calibration: bool = False) -> bool:
"""Register a new dataset type.

Parameters
----------
repo : `str`
URI string of the Butler repo to use.
dataset_type : `str`
The name of the new dataset type.
storage_class : `str`
The name of the storage class associated with this dataset type.
dimensions : `tuple` [`str`]
Dimensions associated with this dataset type. Can be empty.
is_calibration : `bool`
If `True` this dataset type may be included in calibration
collections.

Returns
-------
inserted : `bool`
`True` if the dataset type was added; `False` if it was already
there.

Raises
------
ValueError
Raised if an attempt is made to register a component dataset type.
Component dataset types are not real dataset types and so can not
be created by this command. They are always derived from the composite
dataset type.
"""

butler = Butler(repo, writeable=True)

composite, component = DatasetType.splitDatasetTypeName(dataset_type)
if component:
raise ValueError("Component dataset types are created automatically when the composite is created.")

# mypy does not think that Tuple[str, ...] is allowed for DatasetType
# constructor so we have to do the conversion here.
graph = butler.registry.dimensions.extract(dimensions)

datasetType = DatasetType(dataset_type, graph, storage_class,
parentStorageClass=None,
isCalibration=is_calibration,
universe=butler.registry.dimensions)

return butler.registry.registerDatasetType(datasetType)
30 changes: 20 additions & 10 deletions tests/test_cliCmdQueryDatasetTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from numpy import array
import unittest

from lsst.daf.butler import Butler, DatasetType, StorageClass
from lsst.daf.butler.cli.butler import cli
from lsst.daf.butler.cli.cmd import query_dataset_types
from lsst.daf.butler.cli.utils import clickResultMsg, LogCliRunner
Expand Down Expand Up @@ -74,17 +73,28 @@ def testQueryDatasetTypes(self):
datasetName = "test"
instrumentDimension = "instrument"
visitDimension = "visit"
storageClassName = "testDatasetType"
storageClassName = "StructuredDataDict"
expectedNotVerbose = AstropyTable((("test",),), names=("name",))
runner = LogCliRunner()
with runner.isolated_filesystem():
butlerCfg = Butler.makeRepo("here")
butler = Butler(butlerCfg, writeable=True)
storageClass = StorageClass(storageClassName)
butler.registry.storageClasses.registerStorageClass(storageClass)
dimensions = butler.registry.dimensions.extract((instrumentDimension, visitDimension))
datasetType = DatasetType(datasetName, dimensions, storageClass)
butler.registry.registerDatasetType(datasetType)
result = runner.invoke(cli, ["create", "here"])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
# Create the dataset type.
result = runner.invoke(cli, ["register-dataset-type", "here", datasetName,
storageClassName, instrumentDimension, visitDimension])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
# Okay to create it again identically.
result = runner.invoke(cli, ["register-dataset-type", "here", datasetName,
storageClassName, instrumentDimension, visitDimension])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
# Not okay to create a different version of it.
result = runner.invoke(cli, ["register-dataset-type", "here", datasetName,
storageClassName, instrumentDimension])
self.assertNotEqual(result.exit_code, 0, clickResultMsg(result))
# Not okay to try to create a component dataset type.
result = runner.invoke(cli, ["register-dataset-type", "here", "a.b",
storageClassName, instrumentDimension])
self.assertNotEqual(result.exit_code, 0, clickResultMsg(result))
# check not-verbose output:
result = runner.invoke(cli, ["query-dataset-types", "here"])
self.assertEqual(result.exit_code, 0, clickResultMsg(result))
Expand All @@ -99,7 +109,7 @@ def testQueryDatasetTypes(self):
expected = AstropyTable(array((
"test",
"['band', 'instrument', 'physical_filter', 'visit_system', 'visit']",
"testDatasetType")),
storageClassName)),
names=("name", "dimensions", "storage class"))
self.assertAstropyTablesEqual(readTable(result.output), expected)

Expand Down