Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions bin.src/make_preloaded_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env python
# This file is part of prompt_prototype.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.


"""Selectively export the contents of an ap_verify dataset.

This script selects the subset of an ap_verify dataset's preloaded repository that
matches what the central prompt processing repository ought to look like.
"""


import argparse
import logging
import os
import re
import sys
import time

import lsst.daf.butler as daf_butler


def _make_parser():
parser = argparse.ArgumentParser()
parser.add_argument("--src-repo", required=True,
help="The location of the repository to be exported.")
return parser


def main():
logging.basicConfig(level=logging.INFO, stream=sys.stdout)

args = _make_parser().parse_args()
gen3_repo = os.path.abspath(args.src_repo)

logging.info("Exporting Gen 3 registry to configure new repos...")
start = time.time_ns()
_export_for_copy(gen3_repo)
end = time.time_ns()
logging.info("Export finished in %.3fs.", 1e-9 * (end - start))


def _get_dataset_types():
"""Identify the dataset types that should be marked for export.

Returns
-------
types : iterable [`str` or `re.Pattern`]
The dataset types to include
"""
# Everything except raws and SS ephemerides
return [re.compile("^(?!raw|visitSsObjects).*")]


def _export_for_copy(repo):
"""Export a Gen 3 repository so that a dataset can make copies later.

Parameters
----------
repo : `str`
The location of the Gen 3 repository.
"""
butler = daf_butler.Butler(repo)
with butler.export(format="yaml") as contents:
# Need all detectors, even those without data, for visit definition
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I understand this comment. I'd think we could pretend for the purposes of any given repo that a camera has a subset of its actual detectors.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is something carried over from ap_verify. I don't know if it is still true, but visit definition used to fail if any of the detector IDs in the instrument (camera?) definition were missing from the registry.

contents.saveDataIds(butler.registry.queryDataIds({"detector"}).expanded())
contents.saveDatasets(butler.registry.queryDatasets(
datasetType=_get_dataset_types(), collections=...))
# Save calibration collection
for collection in butler.registry.queryCollections(
collectionTypes=daf_butler.CollectionType.CALIBRATION):
contents.saveCollection(collection)
# Do not export chains, as they will need to be reworked to satisfy
# prompt processing's assumptions.


if __name__ == "__main__":
main()
113 changes: 113 additions & 0 deletions bin.src/make_remote_butler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#!/usr/bin/env python
# This file is part of prompt_prototype.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.


"""Simple script for creating a repository at a remote URI, given
a source repository and export file.

For most values of --target-repo and --seed-config, this script is only useful
if run from the prompt-proto project on Google Cloud.

The user is responsible for clearing any old copies of the repository from
both the target URI and the registry database.
"""


import argparse
import logging
import os
import sys

from lsst.utils import getPackageDir
from lsst.utils.timer import time_this
from lsst.daf.butler import Butler, CollectionType, Config
from lsst.obs.base import Instrument


def _make_parser():
parser = argparse.ArgumentParser()
# Could reasonably be positional arguments, but keep them as keywords to
# prevent users from confusing --src-repo with --target-repo.
parser.add_argument("--src-repo", required=True,
help="The location of the repository whose files are to be copied.")
parser.add_argument("--target-repo", required=True,
help="The URI of the repository to create.")
parser.add_argument("--seed-config",
default=os.path.join(getPackageDir("prompt_prototype"), "etc", "db_butler.yaml"),
help="The config file to use for the new repository. Defaults to etc/db_butler.yaml.")
parser.add_argument("--export-file", default="export.yaml",
help="The export file containing the repository contents. Defaults to ./export.yaml.")
return parser


def _add_chains(butler):
"""Create collections to serve as a uniform interface.

Parameters
----------
butler : `lsst.daf.butler.Butler`
A Butler pointing to the repository to modify. Assumed to already contain the following collections:

- standard calibration collection
- standard skymap collection
- templates/*
- refcats/*
"""
butler.registry.registerCollection("templates", type=CollectionType.CHAINED)
butler.registry.setCollectionChain(
"templates",
list(butler.registry.queryCollections("templates/*", collectionTypes=CollectionType.RUN))
)

butler.registry.registerCollection("refcats", type=CollectionType.CHAINED)
butler.registry.setCollectionChain(
"refcats",
list(butler.registry.queryCollections("refcats/*", collectionTypes=CollectionType.RUN))
)

instrument = Instrument.fromName(list(butler.registry.queryDataIds("instrument"))[0]["instrument"],
butler.registry)
defaults = instrument.makeCollectionName("defaults")
butler.registry.registerCollection(defaults, type=CollectionType.CHAINED)
butler.registry.setCollectionChain(
defaults,
[instrument.makeCalibrationCollectionName(), "templates", "skymaps", "refcats"]
)


def main():
logging.basicConfig(level=logging.INFO, stream=sys.stdout)

args = _make_parser().parse_args()
seed_config = Config(args.seed_config)
logging.info("Creating repository at %s...", args.target_repo)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does the make_local_butler.py file exist? Isn't that identical to this version if the default value for --target-repo was "."? Have you considered making this command a butler pluggable command so that you could make use of the normal butler --help click infrastructure? (it's butler create + butler import).

Copy link
Member Author

@kfindeisen kfindeisen Mar 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not believe that integrating what should be a local, highly specific script into the butler ecosystem would be beneficial, even if we had the time to do so.

We are deliberately not using the butler command-line itself (i.e., this is not a shell script) because we need to understand what our overheads are, and the butler utility loses a lot of time on package import and Butler setup.

with time_this(msg="Repository creation", level=logging.INFO):
config = Butler.makeRepo(args.target_repo, config=seed_config, overwrite=False)
with time_this(msg="Butler creation", level=logging.INFO):
butler = Butler(config, writeable=True)
with time_this(msg="Import", level=logging.INFO):
butler.import_(directory=args.src_repo, filename=args.export_file, transfer="auto")
_add_chains(butler)


if __name__ == "__main__":
main()
13 changes: 9 additions & 4 deletions doc/playbook.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
Playbook for the Prompt Processing Proposal and Prototype
#########################################################

.. _DMTN-219: https://dmtn-219.lsst.io/

Table of Contents
=================

Expand Down Expand Up @@ -48,7 +50,7 @@ Pub/Sub Topics
==============

One Google Pub/Sub topic is used for ``nextVisit`` events.
Additional topics are used for images from each instrument, where the instrument is one of ``LSSTCam``, ``LSSTComCam``, ``LATISS``, or ``DECam``.
Additional topics are used for images from each instrument, where the instrument is one of ``LSSTCam``, ``LSSTComCam``, ``LATISS``, ``DECam``, or ``HSC``.

To create the topic, in the Google Cloud Console for the ``prompt-proto`` project:

Expand All @@ -65,11 +67,14 @@ On the other hand, using multiple topics is also simple to do.
Buckets
=======

A single bucket named ``rubin-prompt-proto-main`` has been created to hold incoming raw images.
A single bucket named ``rubin-prompt-proto-main`` has been created to hold the central repository described in `DMTN-219`_, as well as incoming raw images.

The bucket ``rubin-prompt-proto-support-data-template`` contains a pristine copy of the calibration datasets and templates.
This bucket is not intended for direct use by the prototype, but can be used to restore the central repository to its state at the start of an observing run.

An additional bucket will be needed eventually to hold a Butler repo containing calibration datasets and templates.
The bucket ``rubin-prompt-proto-unobserved`` contains raw files that the upload script(s) can draw from to create incoming raws for ``rubin-prompt-proto-main``.

The raw image bucket has had notifications configured for it; these publish to a Google Pub/Sub topic as mentioned in the previous section.
The ``-main`` bucket has had notifications configured for it; these publish to a Google Pub/Sub topic as mentioned in the previous section.
To configure these notifications, in a shell:

.. code-block:: sh
Expand Down
3 changes: 3 additions & 0 deletions etc/db_butler.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
registry:
db: postgresql://postgres@localhost:5432/
namespace: support_data_template
1 change: 1 addition & 0 deletions python/tester/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Instrument:
"LSSTComCam": Instrument(2, 9),
"LATISS": Instrument(1, 1),
"DECam": Instrument(1, 62),
"HSC": Instrument(1, 112),
}
EXPOSURE_INTERVAL = 18
SLEW_INTERVAL = 2
Expand Down
1 change: 1 addition & 0 deletions ups/prompt_prototype.table
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# the "base" package.
setupRequired(base)
setupRequired(sconsUtils)
setupRequired(utils) # Used by scripts in bin.src
setupRequired(daf_butler) # Used by middleware_interface module.
setupRequired(obs_base)
setupRequired(obs_lsst)
Expand Down