Skip to content

Commit

Permalink
Move per-DatasetType collection logic down to StandardRepoConverter.
Browse files Browse the repository at this point in the history
This lets us move some datasets to their own RUN collection during
conversion rather than forcing us to put them in the main RUN for that
Gen2 rerun.  That's useful in at least ci_hsc, where we have to put
the brightObjectMasks in the rerun in Gen2 because that's where the
skymap is defined, but we'd rather put them in a top-level collection
in Gen3.
  • Loading branch information
TallJimbo committed Sep 25, 2020
1 parent b8ecd45 commit aff2274
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 17 deletions.
9 changes: 8 additions & 1 deletion python/lsst/obs/base/gen2to3/convertRepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,13 +480,15 @@ def run(self, root: str, *,
mapper=rootConverter.mapper,
subset=rootConverter.subset)
converters.append(converter)
rerunConverters = {}
for spec in reruns:
runRoot = spec.path
if not os.path.isabs(runRoot):
runRoot = os.path.join(rootConverter.root, runRoot)
converter = StandardRepoConverter(task=self, root=runRoot, run=spec.runName,
instrument=self.instrument, subset=rootConverter.subset)
converters.append(converter)
rerunConverters[spec.runName] = converter

# Register the instrument if we're configured to do so.
if self.config.doRegisterInstrument:
Expand Down Expand Up @@ -565,7 +567,12 @@ def run(self, root: str, *,
if spec.chainName is not None:
self.butler3.registry.registerCollection(spec.chainName, type=CollectionType.CHAINED)
chain = [spec.runName]
chain.extend(spec.parents)
chain.extend(rerunConverters[spec.runName].getCollectionChain())
for parent in spec.parents:
chain.append(spec.parent)
parentConverter = rerunConverters.get(parent)
if parentConverter is not None:
chain.extend(parentConverter.getCollectionChain())
chain.extend(rootConverter.getCollectionChain())
self.log.info("Defining %s from chain %s.", spec.chainName, chain)
self.butler3.registry.setCollectionChain(spec.chainName, chain)
16 changes: 1 addition & 15 deletions python/lsst/obs/base/gen2to3/rootRepoConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import os
import re
import itertools
from typing import TYPE_CHECKING, Iterator, Optional, Tuple, List, Set
from typing import TYPE_CHECKING, Iterator, Optional, Tuple, List

from lsst.skymap import BaseSkyMap
from lsst.daf.butler import DatasetType, DatasetRef, DimensionGraph, FileDataset
Expand Down Expand Up @@ -79,7 +79,6 @@ def __init__(self, **kwds):
self._rootSkyMap = self.task.config.skyMaps[self.task.config.rootSkyMapName].skyMap.apply()
else:
self._rootSkyMap = None # All access to _rootSkyMap is guarded
self._chain = {}
self._rawRefs = []

def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool:
Expand Down Expand Up @@ -183,16 +182,3 @@ def iterDatasets(self) -> Iterator[FileDataset]:
yield FileDataset(path=os.path.join(self.root, "ref_cats", refCat, f"{htmId}.fits"),
refs=DatasetRef(datasetType, dataId))
yield from super().iterDatasets()

def getRun(self, datasetTypeName: str, calibDate: Optional[str] = None) -> str:
# Docstring inherited from RepoConverter.
run = self.task.config.runs[datasetTypeName]
self._chain.setdefault(run, set()).add(datasetTypeName)
return run

def getCollectionChain(self) -> List[Tuple[str, Set[str]]]:
"""Return tuples of run name and associated dataset type names that
can be used to construct a chained collection that refers to the
converted root repository (`list` [ `tuple` ]).
"""
return list(self._chain.items())
22 changes: 21 additions & 1 deletion python/lsst/obs/base/gen2to3/standardRepoConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

import os.path
from dataclasses import dataclass
from typing import TYPE_CHECKING, Dict, Iterator, Optional, Tuple
from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Set, Tuple

from lsst.log import Log
from lsst.log.utils import temporaryLogLevel
Expand Down Expand Up @@ -91,6 +91,7 @@ def __init__(self, **kwds):
self.butler2 = Butler2(self.root)
self.mapper = self.butler2.getMapperClass(self.root)(root=self.root)
self._foundSkyMapsByCoaddName = {}
self._chain = {}

def isDatasetTypeSpecial(self, datasetTypeName: str) -> bool:
# Docstring inherited from RepoConverter.
Expand Down Expand Up @@ -192,6 +193,25 @@ def iterDatasets(self) -> Iterator[FileDataset]:
yield FileDataset(path=os.path.join(self.root, struct.filename), refs=struct.ref)
yield from super().iterDatasets()

def getRun(self, datasetTypeName: str, calibDate: Optional[str] = None) -> str:
# Docstring inherited from RepoConverter.
run = self.task.config.runs.get(datasetTypeName)
if run is not None:
self._chain.setdefault(run, set()).add(datasetTypeName)
elif self._run is None:
raise ValueError(f"No default run for repo at {self.root}, and no "
f"override for dataset {datasetTypeName}.")
else:
run = self._run
return run

def getCollectionChain(self) -> List[Tuple[str, Set[str]]]:
"""Return tuples of run name and associated dataset type names that
can be used to construct a chained collection that refers to the
converted repository (`list` [ `tuple` ]).
"""
return list(self._chain.items())

# Class attributes that will be shadowed by public instance attributes;
# defined here only for documentation purposes.

Expand Down

0 comments on commit aff2274

Please sign in to comment.