Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-27147: rework collection names and add levels of indirection for long-lived repos #327

Merged
merged 15 commits into from
Nov 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
212 changes: 158 additions & 54 deletions python/lsst/obs/base/_instrument.py

Large diffs are not rendered by default.

17 changes: 13 additions & 4 deletions python/lsst/obs/base/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,23 @@
@click.option("--calibs",
help="Path to the gen 2 calibration repo. It can be absolute or relative to gen2root.")
@click.option("--reruns", multiple=True, callback=split_commas, metavar=typeStrAcceptsMultiple,
help="List of gen 2 reruns to convert.")
help=("List of rerun paths to convert. Output collection names will be "
"guessed, which can fail if the Gen2 repository paths do not follow a "
"recognized convention. In this case, the command-line interface cannot "
"be used."))
@transfer_option(help="Mode to use to transfer files into the new repository.")
@processes_option()
@config_file_option(help="Path to a `ConvertRepoConfig` override to be included after the Instrument config "
"overrides are applied.")
@options_file_option()
def convert(*args, **kwargs):
"""Convert a Butler gen 2 repository into a gen 3 repository."""
"""Convert one or more Butler gen 2 repositories into a gen 3 repository.

This is a highly simplified interface that should only be used to convert
suites of gen 2 repositories that contain at most one calibration repo and
has no chained reruns. Custom scripts that call ConvertRepoTask should be
used on more complex suites of repositories.
"""
cli_handle_exception(script.convert, *args, **kwargs)


Expand Down Expand Up @@ -120,8 +129,8 @@ def register_instrument(*args, **kwargs):
@instrument_argument(required=True)
@click.option("--collection", required=False,
help="Name of the calibration collection that associates datasets with validity ranges.")
@click.option("--suffix", required=False,
help=("Name suffix to append (with an automatic delimiter) to all RUN collection names "
@click.option("--label", "labels", multiple=True,
help=("Extra strings to include (with automatic delimiters) in all RUN collection names, "
"as well as the calibration collection name if it is not provided via --collection."))
@options_file_option()
def write_curated_calibrations(*args, **kwargs):
Expand Down
47 changes: 40 additions & 7 deletions python/lsst/obs/base/gen2to3/calibRepoConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from collections import defaultdict
import os
import sqlite3
from typing import TYPE_CHECKING, Dict, Iterator, List, Mapping, Tuple, Optional
from typing import TYPE_CHECKING, Dict, Iterator, List, Mapping, Sequence, Tuple, Optional

import astropy.time
import astropy.units as u
Expand All @@ -49,15 +49,21 @@ class CalibRepoConverter(RepoConverter):
mapper : `CameraMapper`
Gen2 mapper for the data repository. The root associated with the
mapper is ignored and need not match the root of the repository.
kwds
labels : `Sequence` [ `str` ]
Strings injected into the names of the collections that calibration
datasets are written and certified into (forwarded as the ``extra``
argument to `Instrument` methods that generate collection names and
write curated calibrations).
**kwargs
Additional keyword arguments are forwarded to (and required by)
`RepoConverter`.
"""

def __init__(self, *, mapper: CameraMapper, collection: str, **kwds):
super().__init__(run=None, **kwds)
def __init__(self, *, mapper: CameraMapper, labels: Sequence[str] = (), **kwargs):
super().__init__(run=None, **kwargs)
self.mapper = mapper
self.collection = collection
self.collection = self.task.instrument.makeCalibrationCollectionName(*labels)
self._labels = tuple(labels)
self._datasetTypes = set()

def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool:
Expand Down Expand Up @@ -89,7 +95,29 @@ def makeRepoWalkerTarget(self, datasetTypeName: str, template: str, keys: Dict[s

def _queryGen2CalibRegistry(self, db: sqlite3.Connection, datasetType: DatasetType, calibDate: str
) -> Iterator[sqlite3.Row]:
# TODO: docs
"""Query the Gen2 calibration registry for the validity ranges and
optionally detectors and filters associated with the given dataset type
and ``calibDate``.

Parameters
----------
db : `sqlite3.Connection`
DBAPI connection to the Gen2 ``calibRegistry.sqlite3`` file.
datasetType : `DatasetType`
Gen3 dataset type being queried.
calibDate : `str`
String extracted from the ``calibDate`` template entry in Gen2
filenames.

Yields
------
row : `sqlite3.Row`
SQLite result object; will have ``validStart`` and ``validEnd``
columns, may have a detector column (named
``self.task.config.ccdKey``) and/or a ``filter`` column, depending
on whether ``datasetType.dimensions`` includes ``detector`` and
``physical_filter``, respectively.
"""
fields = ["validStart", "validEnd"]
if "detector" in datasetType.dimensions.names:
fields.append(self.task.config.ccdKey)
Expand All @@ -116,6 +144,7 @@ def _queryGen2CalibRegistry(self, db: sqlite3.Connection, datasetType: DatasetTy
yield from results

def _finish(self, datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]]):
# Docstring inherited from RepoConverter.
# Read Gen2 calibration repository and extract validity ranges for
# all datasetType + calibDate combinations we ingested.
calibFile = os.path.join(self.root, "calibRegistry.sqlite3")
Expand Down Expand Up @@ -263,10 +292,14 @@ def _finish(self, datasets: Mapping[DatasetType, Mapping[Optional[str], List[Fil
self.task.registry.certify(self.collection, refs, timespan)

def getRun(self, datasetTypeName: str, calibDate: Optional[str] = None) -> str:
# Docstring inherited from RepoConverter.
if calibDate is None:
return super().getRun(datasetTypeName)
else:
return self.instrument.makeCollectionName("calib", "gen2", calibDate)
return self.instrument.makeCalibrationCollectionName(
*self._labels,
self.instrument.formatCollectionTimestamp(calibDate),
)

# Class attributes that will be shadowed by public instance attributes;
# defined here only for documentation purposes.
Expand Down