-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add subpackage for converting Gen2->Gen3.
- Loading branch information
Showing
8 changed files
with
1,371 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
mappers: | ||
HscMapper: | ||
# Gen3 Camera name associated with this Gen2 Mapper | ||
camera: HSC | ||
# VisitInfo objects are used to populate Gen3 Visit and Exposure DataUnit | ||
# entries; options here say how to get those from a Gen2 repo. | ||
VisitInfo: | ||
# The Gen2 DatasetType to read when trying to create a VisitInfo. | ||
# (we actually add a "_md" suffix, because we just read the metadata). | ||
DatasetType: raw | ||
# Gen2 Data ID keys for the above DatasetType that are used | ||
# to construct Gen3 Exposure/Visit identifiers. | ||
# At present, only one key can be provided and this is used directly | ||
# as the Gen3 value. | ||
uniqueKeys: | ||
- visit | ||
collections: | ||
substitutions: | ||
# Regular expression patterns and replacement strings (passed directly | ||
# to Python's re.sub) applied in order to all Gen2 absolute repository | ||
# paths in order to construct names for the Collections they go into. | ||
# This can be used to merge Gen2 repositories into a single Collection | ||
# by making them reduce to the same name. | ||
- | ||
pattern: "^(.+)/rerun/private/" | ||
repl: "u/" | ||
- | ||
pattern: "^(.+)/rerun/" | ||
repl: "shared/" | ||
overrides: | ||
# A dictionary of DatasetType -> Collection mappings that force all | ||
# Datasets with that DatasetType into a particular Collection. | ||
# Collection names can be str.format patterns that utilize any of | ||
# the Gen3 DataUnits or Gen2 DataIds associated with the Dataset. | ||
raw: raw/{camera} | ||
ref_cat: ref/{name} | ||
ref_cat_config: ref/{name} | ||
runs: | ||
# Names of Collections (after processing via the above section) | ||
# that should be assigned to a particular Run. | ||
# Note that the *first* Collection a Dataset is added to determines | ||
# its Run; this will be the one corresponding to the Gen2 repository | ||
# that originally contained the file, unless that has been overridden. | ||
raw/HSC: 1 | ||
ref/ps1_pv3_3pi_20170110: 2 | ||
|
||
skymaps: {} # dictionary mapping repository roots to Gen3 SkyMap names | ||
|
||
storageClasses: | ||
# dictionary mapping Gen2 Mapping.persistable to Gen3 StorageClass name | ||
{} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# This file is part of daf_butler. | ||
# | ||
# Developed for the LSST Data Management System. | ||
# This product includes software developed by the LSST Project | ||
# (http://www.lsst.org). | ||
# See the COPYRIGHT file at the top-level directory of this distribution | ||
# for details of code ownership. | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from .walker import ConversionWalker | ||
from .writer import ConversionWriter | ||
from .translators import KeyHandler, Translator |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
# This file is part of daf_butler. | ||
# | ||
# Developed for the LSST Data Management System. | ||
# This product includes software developed by the LSST Project | ||
# (http://www.lsst.org). | ||
# See the COPYRIGHT file at the top-level directory of this distribution | ||
# for details of code ownership. | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
import re | ||
from collections import OrderedDict | ||
|
||
from .structures import Gen2Dataset, Gen2DatasetType | ||
|
||
__all__ = ("Extractor",) | ||
|
||
|
||
TEMPLATE_RE = re.compile(r'\%\((?P<name>\w+)\).*?(?P<type>[idrs])') | ||
|
||
|
||
class FilePathParser: | ||
"""A callable object that extracts Gen2Dataset instances from filenames | ||
corresponding to a particular Gen2 DatasetType. | ||
External code should use the `fromMapping` method to construct instances. | ||
Parameters | ||
---------- | ||
datasetType : `Gen2DatasetType` | ||
Information about the DatasetType this parser processes. | ||
regex : regular expression object | ||
Regular expression pattern with named groups for all data ID keys. | ||
""" | ||
|
||
@classmethod | ||
def fromMapping(cls, mapping): | ||
"""Construct a FilePathParser instance from a Gen2 | ||
`lsst.obs.base.Mapping` instance. | ||
""" | ||
try: | ||
template = mapping.template | ||
except RuntimeError: | ||
return None | ||
datasetType = Gen2DatasetType(name=mapping.datasetType, | ||
keys={}, | ||
persistable=mapping.persistable, | ||
python=mapping.python) | ||
iterator = TEMPLATE_RE.finditer(template) | ||
try: | ||
m1 = iterator.__next__() | ||
except StopIteration: | ||
regex = re.compile(re.escape(template)) | ||
return cls(datasetType=datasetType, regex=regex) | ||
terms = [template[:m1.start()]] | ||
allKeys = mapping.keys() | ||
try: | ||
while True: | ||
name = m1.group("name") | ||
if name == "patch": | ||
pattern = r"\d+,\d+" | ||
elif m1.group("type") in "id": # integers | ||
pattern = r"0*\d+" | ||
else: | ||
pattern = ".+" | ||
# only use named groups for the first occurence of a key | ||
if name not in datasetType.keys: | ||
terms.append(r"(?P<%s>%s)" % (name, pattern)) | ||
datasetType.keys[name] = allKeys[name] | ||
else: | ||
terms.append(r"(%s)" % pattern) | ||
m2 = iterator.__next__() | ||
terms.append(re.escape(template[m1.end():m2.start()])) | ||
m1 = m2 | ||
except StopIteration: | ||
pass | ||
terms.append(re.escape(template[m1.end():])) | ||
return cls(datasetType=datasetType, regex=re.compile("".join(terms))) | ||
|
||
def __init__(self, datasetType, regex): | ||
self.datasetType = datasetType | ||
self.regex = regex | ||
|
||
def __call__(self, filePath, root): | ||
"""Extract a Gen2Dataset instance from the given path. | ||
Parameters | ||
---------- | ||
filePath : `str` | ||
Path and filename relative to `root`. | ||
root : `str` | ||
Absolute path to the root of the Gen2 data repository containing | ||
this file. | ||
""" | ||
m = self.regex.fullmatch(filePath) | ||
if m is None: | ||
return None | ||
dataId = {k: v(m.group(k)) for k, v in self.datasetType.keys.items()} | ||
return Gen2Dataset(datasetType=self.datasetType, dataId=dataId, | ||
filePath=filePath, root=root) | ||
|
||
|
||
class Extractor: | ||
"""An object that parses Gen2 paths into Gen2Dataset instance for a | ||
particular Gen2 data repository. | ||
Parameters | ||
---------- | ||
repo : `Gen2Repo` | ||
Structure describing the repository this Extractor will process. | ||
""" | ||
|
||
def __init__(self, repo): | ||
self.repo = repo | ||
self.parsers = OrderedDict() | ||
for mapping in self.repo.mapper.mappings.values(): | ||
parser = FilePathParser.fromMapping(mapping) | ||
if parser is not None: | ||
self.parsers[parser.datasetType.name] = parser | ||
|
||
def __call__(self, filePath): | ||
"""Parse a file path and return a Gen2Dataset that represents it. | ||
Parameters | ||
---------- | ||
filePath : `str` | ||
A path relative to the root of the data repository. | ||
Returns | ||
------- | ||
dataset : `Gen2Dataset` or None | ||
A Gen2Dataset instance, or None if the file path is not recognized | ||
by this mapper. | ||
""" | ||
for parser in self.parsers.values(): | ||
dataset = parser(filePath, root=self.repo.root) | ||
if dataset is not None: | ||
break | ||
else: | ||
return None | ||
# Move the parser we just used to the front of the OrderedDict so we | ||
# always try them in MRU order. | ||
self.parsers.move_to_end(dataset.datasetType.name, last=False) | ||
return dataset | ||
|
||
def getDatasetTypes(self): | ||
"""Return a dict mapping DatasetType name to Gen2DatasetType instance.""" | ||
return {parser.datasetType.name: parser.datasetType for parser in self.parsers.values()} |
Oops, something went wrong.