Skip to content

Commit

Permalink
Allow gen3 formatter to be associated with a dataset type in 2to3
Browse files Browse the repository at this point in the history
This lets obs_decam declare cpBias and cpFlat use non-standard
formatters.
  • Loading branch information
timj committed Mar 13, 2020
1 parent 940be9f commit ca9bb26
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 17 deletions.
8 changes: 5 additions & 3 deletions python/lsst/obs/base/gen2to3/calibRepoConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import os
import sqlite3
from datetime import datetime, timedelta
from typing import TYPE_CHECKING, Dict, Iterator, Tuple
from typing import TYPE_CHECKING, Dict, Iterator, Tuple, Type, Union

from lsst.daf.butler import Butler as Butler3

Expand All @@ -34,7 +34,7 @@
from .translators import makeCalibrationLabel

if TYPE_CHECKING:
from lsst.daf.butler import StorageClass
from lsst.daf.butler import StorageClass, Formatter
from ..cameraMapper import CameraMapper
from ..mapping import Mapping as CameraMapperMapping # disambiguate from collections.abc.Mapping

Expand Down Expand Up @@ -66,7 +66,8 @@ def iterMappings(self) -> Iterator[Tuple[str, CameraMapperMapping]]:
yield from self.mapper.calibrations.items()

def makeRepoWalkerTarget(self, datasetTypeName: str, template: str, keys: Dict[str, type],
storageClass: StorageClass) -> RepoWalker.Target:
storageClass: StorageClass, formatter: Union[None, str, Type[Formatter]] = None
) -> RepoWalker.Target:
# Docstring inherited from RepoConverter.
target = RepoWalker.Target(
datasetTypeName=datasetTypeName,
Expand All @@ -75,6 +76,7 @@ def makeRepoWalkerTarget(self, datasetTypeName: str, template: str, keys: Dict[s
keys=keys,
instrument=self.task.instrument.getName(),
universe=self.task.registry.dimensions,
formatter=formatter,
)
self._datasetTypes.add(target.datasetType)
return target
Expand Down
8 changes: 8 additions & 0 deletions python/lsst/obs/base/gen2to3/convertRepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,14 @@ class ConvertRepoConfig(Config):
"SkyWcs": "Wcs",
}
)
formatterClasses = DictField(
"Mapping from dataset type name to formatter class. "
"By default these are derived from the formatters listed in the"
" Gen3 datastore configuration.",
keytype=str,
itemtype=str,
default={}
)
doRegisterInstrument = Field(
"If True (default), add dimension records for the Instrument and its "
"filters and detectors to the registry instead of assuming they are "
Expand Down
9 changes: 7 additions & 2 deletions python/lsst/obs/base/gen2to3/repoConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
Set,
Tuple,
Union,
Type,
TYPE_CHECKING,
)

Expand All @@ -46,7 +47,7 @@
if TYPE_CHECKING:
from ..mapping import Mapping as CameraMapperMapping # disambiguate from collections.abc.Mapping
from .convertRepo import ConvertRepoTask
from lsst.daf.butler import StorageClass, Registry, SkyPixDimension
from lsst.daf.butler import StorageClass, Registry, SkyPixDimension, Formatter


@dataclass
Expand Down Expand Up @@ -251,7 +252,8 @@ def iterMappings(self) -> Iterator[Tuple[str, CameraMapperMapping]]:

@abstractmethod
def makeRepoWalkerTarget(self, datasetTypeName: str, template: str, keys: Dict[str, type],
storageClass: StorageClass) -> RepoWalker.Target:
storageClass: StorageClass,
formatter: Union[None, str, Type[Formatter]] = None) -> RepoWalker.Target:
"""Make a struct that identifies a dataset type to be extracted by
walking the repo directory structure.
Expand All @@ -265,6 +267,8 @@ def makeRepoWalkerTarget(self, datasetTypeName: str, template: str, keys: Dict[s
A dictionary mapping Gen2 data ID key to the type of its value.
storageClass : `lsst.daf.butler.StorageClass`
Gen3 storage class for this dataset type.
formatter : `lsst.daf.butler.Formatter` or `str`, optional
A Gen 3 formatter class or fully-qualified name.
Returns
-------
Expand Down Expand Up @@ -357,6 +361,7 @@ class implementation at some point in their own logic.
template=template+extension,
keys=mapping.keys(),
storageClass=storageClass,
formatter=self.task.config.formatterClasses.get(datasetTypeName),
)
self.task.log.debug("Adding template to walker: %s", template)
walkerInputs.append(walkerInput)
Expand Down
18 changes: 13 additions & 5 deletions python/lsst/obs/base/gen2to3/repoWalker/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@
List,
Optional,
Tuple,
Type,
Union,
)

from lsst.daf.butler import DatasetType, DimensionUniverse, StorageClass
from lsst.daf.butler import DatasetType, DimensionUniverse, StorageClass, Formatter
from ..translators import Translator
from .parser import PathElementParser
from .scanner import PathElementHandler, DirectoryScanner
Expand Down Expand Up @@ -192,18 +194,22 @@ class BuilderTargetInput(BuilderInput):
`StorageClass` for the Gen3 dataset type.
universe : `DimensionUniverse`
All candidate dimensions for the Gen3 dataset type.
formatter : `lsst.daf.butler.Formatter` or `str`, optional
A Gen 3 formatter class or fully-qualified name.
kwargs:
Additional keyword arguments are passed to `Translator.makeMatching`,
in along with ``datasetTypeName`` and ``keys``.
"""
def __init__(self, *, datasetTypeName: str, template: str, keys: Dict[str, type],
storageClass: StorageClass, universe: DimensionUniverse, **kwargs: Any):
storageClass: StorageClass, universe: DimensionUniverse,
formatter: Union[None, str, Type[Formatter]], **kwargs: Any):
# strip off [%HDU] identifiers from e.g. DECAM Community Pipeline products
template = template.split('[%(')[0]
super().__init__(template=template, keys=keys)
self._translator = Translator.makeMatching(datasetTypeName, keys, **kwargs)
self.datasetType = DatasetType(datasetTypeName, dimensions=self._translator.dimensionNames,
storageClass=storageClass, universe=universe)
self._formatter = formatter

def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeKeys: Dict[str, type], *,
fileIgnoreRegEx: Optional[re.Pattern], dirIgnoreRegEx: Optional[re.Pattern]
Expand All @@ -214,16 +220,18 @@ def build(self, parser: PathElementParser, allKeys: Dict[str, type], cumulativeK
# stored as multi-extension FITS files.
return MultiExtensionFileHandler(parser=parser,
translator=self._translator,
datasetType=self.datasetType)
datasetType=self.datasetType,
formatter=self._formatter)
else:
return TargetFileHandler(parser=parser, translator=self._translator, datasetType=self.datasetType)
return TargetFileHandler(parser=parser, translator=self._translator, datasetType=self.datasetType,
formatter=self._formatter)

def prune(self) -> Tuple[BuilderNode, List[str], bool]:
# Docstring inherited from BuilderNode.
return self, [], False

datasetType: DatasetType
"""The Gen3 dataset type extracted by the hander this object builds
"""The Gen3 dataset type extracted by the handler this object builds
(`lsst.daf.butler.DatasetType`).
"""

Expand Down
20 changes: 16 additions & 4 deletions python/lsst/obs/base/gen2to3/repoWalker/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
List,
Mapping,
Optional,
Union,
Type,
TYPE_CHECKING
)

import lsst.afw.fits
Expand All @@ -48,6 +51,9 @@
from .parser import PathElementParser
from .scanner import PathElementHandler, DirectoryScanner

if TYPE_CHECKING:
from lsst.daf.butler import Formatter


class IgnoreHandler(PathElementHandler):
"""A `PathElementHandler` that matches via a regular expression, and does
Expand Down Expand Up @@ -142,6 +148,8 @@ def handle(self, path: str, nextDataId2: dict, datasets: Mapping[DatasetType, Li
A callable taking a single `DataCoordinate` argument and returning
`bool`, indicating whether that (Gen3) data ID represents one
that should be included in the scan.
formatterMap : `dict`, optional
Map dataset type to specialist formatter.
"""
raise NotImplementedError()

Expand Down Expand Up @@ -258,13 +266,17 @@ class TargetFileHandler(ParsedPathElementHandler):
Object that translates data IDs from Gen2 to Gen3.
datasetType : `lsst.daf.butler.DatasetType`
Gen3 dataset type for the datasets this handler matches.
formatter : `lsst.daf.butler.Formatter` or `str`, optional
A Gen 3 formatter class or fully-qualified name.
"""
def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType):
def __init__(self, parser: PathElementParser, translator: Translator, datasetType: DatasetType,
formatter: Union[None, str, Type[Formatter]] = None):
super().__init__(parser=parser)
self._translator = translator
self._datasetType = datasetType
self._formatter = formatter

__slots__ = ("_translator", "_datasetType")
__slots__ = ("_translator", "_datasetType", "_formatter")

def isForFiles(self) -> bool:
# Docstring inherited from PathElementHandler.
Expand All @@ -276,7 +288,7 @@ def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[Fil
dataId3 = self.translate(nextDataId2, partial=False, log=log)
if predicate(dataId3):
datasets[self._datasetType].append(FileDataset(refs=[DatasetRef(self._datasetType, dataId3)],
path=path))
path=path, formatter=self._formatter))

def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
# Docstring inherited from PathElementHandler.
Expand Down Expand Up @@ -323,7 +335,7 @@ def get_detectors(filename):
calibration_label=label)
refs.append(DatasetRef(self._datasetType, newDataId3))

datasets[self._datasetType].append(FileDataset(refs=refs, path=path))
datasets[self._datasetType].append(FileDataset(refs=refs, path=path, formatter=self._formatter))

def translate(self, dataId2: dict, *, partial: bool = False, log: Log) -> Optional[DataCoordinate]:
assert partial is True, "We always require partial, to ignore 'ccdnum'"
Expand Down
8 changes: 5 additions & 3 deletions python/lsst/obs/base/gen2to3/standardRepoConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

import os.path
from dataclasses import dataclass
from typing import TYPE_CHECKING, Dict, Iterator, Optional, Tuple
from typing import TYPE_CHECKING, Dict, Iterator, Optional, Tuple, Union, Type

from lsst.log import Log
from lsst.log.utils import temporaryLogLevel
Expand All @@ -39,7 +39,7 @@

if TYPE_CHECKING:
from lsst.skymap import BaseSkyMap
from lsst.daf.butler import StorageClass
from lsst.daf.butler import StorageClass, Formatter
from .cameraMapper import CameraMapper
from ..mapping import Mapping as CameraMapperMapping # disambiguate from collections.abc.Mapping

Expand Down Expand Up @@ -164,7 +164,8 @@ def findMatchingSkyMap(self, datasetTypeName: str) -> Tuple[Optional[BaseSkyMap]
return None, None

def makeRepoWalkerTarget(self, datasetTypeName: str, template: str, keys: Dict[str, type],
storageClass: StorageClass) -> RepoWalker.Target:
storageClass: StorageClass,
formatter: Union[None, str, Type[Formatter]] = None) -> RepoWalker.Target:
# Docstring inherited from RepoConverter.
skyMap, skyMapName = self.findMatchingSkyMap(datasetTypeName)
return RepoWalker.Target(
Expand All @@ -176,6 +177,7 @@ def makeRepoWalkerTarget(self, datasetTypeName: str, template: str, keys: Dict[s
instrument=self.task.instrument.getName(),
skyMap=skyMap,
skyMapName=skyMapName,
formatter=formatter,
)

def iterDatasets(self) -> Iterator[FileDataset]:
Expand Down

0 comments on commit ca9bb26

Please sign in to comment.