Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 47 additions & 59 deletions bin.src/make_latiss_export.py → bin.src/make_export.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess I'd worry a little about whether any variations in make_hsc_rc2_export and make_template_export were captured, but great idea!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly I'm a little worried about that too. This probably should have been done before DM-41241 and we could have felt more reassured.

Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.


"""Selectively export the contents of the LATISS dataset.
"""Selectively export some contents from a butler repo.

This script selects some LATISS data in a source butler repo, and makes an export
This script selects some data in a source butler repo, and makes an export
file for importing to the test central prompt processing repository.
"""

Expand All @@ -32,6 +32,7 @@
import logging
import sys
import tempfile
import yaml

import lsst.daf.butler as daf_butler
from lsst.utils.timer import time_this
Expand All @@ -54,6 +55,13 @@ def _make_parser():
"exported from the source repo. If no target repo is given, all "
"selected datasets in the source repo will be exported.",
)
parser.add_argument(
"--select",
required=True,
help="URI to a YAML file containing expressions to identify the "
"datasets and collections to be exported. An example is at "
"etc/export_latiss.yaml."
)
return parser


Expand All @@ -62,6 +70,8 @@ def main():

args = _make_parser().parse_args()
src_butler = daf_butler.Butler(args.src_repo)
with open(args.select, "r") as file:
wants = yaml.safe_load(file)

with tempfile.TemporaryDirectory() as temp_repo:
if args.target_repo:
Expand All @@ -72,10 +82,10 @@ def main():
target_butler = daf_butler.Butler(config)

with time_this(msg="Datasets and collections exported", level=logging.INFO):
_export_for_copy(src_butler, target_butler)
_export_for_copy(src_butler, target_butler, wants)


def _export_for_copy(butler, target_butler):
def _export_for_copy(butler, target_butler, wants):
"""Export selected data to make copies in another butler repo.

Parameters
Expand All @@ -86,65 +96,43 @@ def _export_for_copy(butler, target_butler):
The target Butler to which datasets are exported. It is checked
to avoid exporting existing datasets. No checks are done to
verify if datasets are really identical.
wants : `dict`
A dictionary to identify selections with optional keys:

``"datasets"``, optional
A list of dataset selection expressions (`list` of `dict`).
The list is iterated over to find matching datasets in the butler,
with the matching criteria provided via the selection expressions.
Each selection expression has the keyworded argument dictionary to
be passed to butler to query datasets; it has the same meanings
as the parameters of `lsst.daf.butler.Registry.queryDatasets`.
``"collections"``, optional
A list of collection selection expressions (`list` of `dict`).
The list is iterated over to find matching collections in the butler,
with the matching criteria provided via the selection expressions.
Each selection expression has the keyworded argument dictionary to
be passed to butler to query collectionss; it has the same meanings
as the parameters of `lsst.daf.butler.Registry.queryCollections`.
"""
with butler.export(format="yaml") as contents:
logging.debug("Selecting goodSeeingCoadd datasets")
records = _filter_datasets(
butler,
target_butler,
datasetType="goodSeeingCoadd",
collections="LATISS/templates",
)
contents.saveDatasets(records)

refcats = {"atlas_refcat2_20220201", "gaia_dr3_20230707"}
logging.debug(f"Selecting refcats datasets {refcats}")
records = _filter_datasets(
butler, target_butler, datasetType=refcats, collections="refcats*"
)
contents.saveDatasets(records)

logging.debug("Selecting skymaps dataset")
records = _filter_datasets(
butler, target_butler, datasetType="skyMap", collections="skymaps"
)
contents.saveDatasets(records)

logging.debug("Selecting datasets in LATISS/calib")
records = _filter_datasets(
butler,
target_butler,
datasetType=...,
# Workaround: use a matching expression rather than a specific
# string "LATISS/calib" for the collection argument, so to avoid
# MissingCollectionError when the collection does not exist in
# the target repo.
collections="*LATISS/calib",
)
contents.saveDatasets(records)

logging.debug("Selecting pretrained ML model dataset")
records = _filter_datasets(
butler, target_butler, datasetType="pretrainedModelPackage", collections="pretrained_models"
)
contents.saveDatasets(records)
if "datasets" in wants:
for selection in wants["datasets"]:
logging.debug(f"Selecting datasets: {selection}")
if "datasetType" not in selection:
selection["datasetType"] = ...
records = _filter_datasets(butler, target_butler, **selection)
contents.saveDatasets(records)

# Save selected collections and chains
for collection in butler.registry.queryCollections(
expression="LATISS/calib",
flattenChains=True,
includeChains=True,
) + [
"LATISS/templates",
"LATISS/calib/unbounded",
"pretrained_models",
]:
logging.debug(f"Selecting collection {collection}")
try:
target_butler.registry.queryCollections(collection)
except daf_butler.registry.MissingCollectionError:
# MissingCollectionError is raised if the collection does not exist in target_butler.
contents.saveCollection(collection)
if "collections" in wants:
for selection in wants["collections"]:
for collection in butler.registry.queryCollections(**selection):
logging.debug(f"Selecting collection {collection}")
try:
target_butler.registry.queryCollections(collection)
except daf_butler.registry.MissingCollectionError:
# MissingCollectionError is raised if the collection does not exist in target_butler.
contents.saveCollection(collection)


if __name__ == "__main__":
Expand Down
106 changes: 0 additions & 106 deletions bin.src/make_hsc_rc2_export.py

This file was deleted.

43 changes: 0 additions & 43 deletions bin.src/make_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def _make_parser():
help="The export file containing the repository contents. Defaults to ./export.yaml.")
parser.add_argument("--instrument",
help="The short name of the instrument (HSC, LATISS, etc).")
parser.add_argument("--hsc-rc2", action="store_true", help="Extra fix up for HSC-RC2 dataset.")
return parser


Expand Down Expand Up @@ -97,46 +96,6 @@ def _add_chains(butler, instrument_name):
)


def _hsc_rc2(butler):
"""fix up some specifics of the HSC-RC2 dataset export

Parameters
----------
butler: `lsst.daf.butler.Butler`
The source Butler from which datasets are exported
"""
# Chain calibration collections
instrument = Instrument.fromName("HSC", butler.registry)
butler.registry.setCollectionChain(
instrument.makeCalibrationCollectionName(),
[
"HSC/calib/DM-32378",
"HSC/calib/gen2/20180117",
"HSC/calib/DM-28636",
],
)

butler.registry.registerCollection(
instrument.makeUnboundedCalibrationRunName(),
type=CollectionType.CHAINED
)
butler.registry.setCollectionChain(
instrument.makeUnboundedCalibrationRunName(),
[
"HSC/calib/gen2/20180117/unbounded",
"HSC/calib/DM-28636/unbounded",
],
)
# Chain rerun collections to templates
# The export script should have guaranteed that there are only coadds in these collections.
current = butler.registry.getCollectionChain("templates")
addition = butler.registry.queryCollections("HSC/runs/*",
collectionTypes=CollectionType.RUN)
butler.registry.setCollectionChain("templates",
list(addition) + list(current),
flatten=False)


def main():
logging.basicConfig(level=logging.INFO, stream=sys.stdout)

Expand All @@ -150,8 +109,6 @@ def main():
with time_this(msg="Import", level=logging.INFO):
butler.import_(directory=args.src_repo, filename=args.export_file, transfer="auto")
_add_chains(butler, args.instrument)
if args.hsc_rc2:
_hsc_rc2(butler)


if __name__ == "__main__":
Expand Down
Loading