Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/build-service.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ jobs:
cd /home/lsst/prompt_prototype
source /opt/lsst/software/stack/loadLSST.bash
setup -r .
# Fix permissions; arg must be absolute path.
git config --global --add safe.directory /home/lsst/prompt_prototype
scons'

update-service-image:
Expand Down
4 changes: 3 additions & 1 deletion bin.src/make_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
a source repository and export file.

For most values of --target-repo and --seed-config, this script is only useful
if run from the prompt-proto project on Google Cloud.
if run from the prompt-proto project on Google Cloud (because of access
restrictions to **both** the repository's storage location and its registry
database).

The user is responsible for clearing any old copies of the repository from
both the target URI and the registry database.
Expand Down
40 changes: 29 additions & 11 deletions bin/prompt_prototype_upload_raws.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,37 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

# This script uploads the raw files from the ap_verify_ci_cosmos_pdr2 dataset
# to Google Storage. It renames the files to match prompt_prototype conventions.
# The user must have gsutil already configured, and must have
# ap_verify_ci_cosmos_pdr2 set up.
# This script uploads the raw files from the HSC PDR2 run to Google Storage. It
# renames the files to match prompt_prototype conventions. The user must have
# gsutil already configured.

set -e # Abort on any error

RAW_DIR="${AP_VERIFY_CI_COSMOS_PDR2_DIR:?'dataset is not set up'}/raw"
RAW_DIR="/datasets/hsc/raw/ssp_pdr2/2016-03-07"
UPLOAD_BUCKET=rubin-prompt-proto-unobserved

# Filename format is defined in tester/upload.py and activator/activator.py:
# instrument/detector/group/snap/instrument-group-snap-exposureId-filter-detector
gsutil cp "${RAW_DIR}/HSC-0059150-050.fits.gz" \
gs://${UPLOAD_BUCKET}/HSC/50/2016030700001/0/HSC-2016030700001-0-0059150-HSC-G-50.fits.gz
gsutil cp "${RAW_DIR}/HSC-0059160-051.fits.gz" \
gs://${UPLOAD_BUCKET}/HSC/51/2016030700002/0/HSC-2016030700002-0-0059160-HSC-G-51.fits.gz
# Filename format is defined in activator/raw.py:
# instrument/detector/group/snap/exposureId/filter/instrument-group-snap-exposureId-filter-detector
gsutil cp "${RAW_DIR}/HSCA05913553.fits" \
gs://${UPLOAD_BUCKET}/HSC/0/2016030700001/0/0059134/HSC-G/HSC-2016030700001-0-0059134-HSC-G-0.fits
gsutil cp "${RAW_DIR}/HSCA05913542.fits" \
gs://${UPLOAD_BUCKET}/HSC/4/2016030700001/0/0059134/HSC-G/HSC-2016030700001-0-0059134-HSC-G-4.fits
gsutil cp "${RAW_DIR}/HSCA05913543.fits" \
gs://${UPLOAD_BUCKET}/HSC/5/2016030700001/0/0059134/HSC-G/HSC-2016030700001-0-0059134-HSC-G-5.fits

gsutil cp "${RAW_DIR}/HSCA05914353.fits" \
gs://${UPLOAD_BUCKET}/HSC/0/2016030700002/0/0059142/HSC-G/HSC-2016030700002-0-0059142-HSC-G-0.fits
gsutil cp "${RAW_DIR}/HSCA05914343.fits" \
gs://${UPLOAD_BUCKET}/HSC/5/2016030700002/0/0059142/HSC-G/HSC-2016030700002-0-0059142-HSC-G-5.fits
gsutil cp "${RAW_DIR}/HSCA05914337.fits" \
gs://${UPLOAD_BUCKET}/HSC/11/2016030700002/0/0059142/HSC-G/HSC-2016030700002-0-0059142-HSC-G-11.fits

gsutil cp "${RAW_DIR}/HSCA05915112.fits" \
gs://${UPLOAD_BUCKET}/HSC/50/2016030700003/0/0059150/HSC-G/HSC-2016030700003-0-0059150-HSC-G-50.fits
gsutil cp "${RAW_DIR}/HSCA05915116.fits" \
gs://${UPLOAD_BUCKET}/HSC/58/2016030700003/0/0059150/HSC-G/HSC-2016030700003-0-0059150-HSC-G-58.fits

gsutil cp "${RAW_DIR}/HSCA05916109.fits" \
gs://${UPLOAD_BUCKET}/HSC/43/2016030700004/0/0059150/HSC-G/HSC-2016030700004-0-0059160-HSC-G-43.fits
gsutil cp "${RAW_DIR}/HSCA05916113.fits" \
gs://${UPLOAD_BUCKET}/HSC/51/2016030700004/0/0059150/HSC-G/HSC-2016030700004-0-0059160-HSC-G-51.fits
29 changes: 6 additions & 23 deletions pipelines/calibrate.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,15 @@
# Config override for lsst.pipe.tasks.calibrate.CalibrateTask
# This config is for the ap_verify datasets that use gaia/panstarrs as their
# refcat names.
from lsst.meas.algorithms import LoadIndexedReferenceObjectsTask

# Use gaia for astrometry (phot_g_mean for everything, as that is the broadest
# band with the most depth)
# Use panstarrs for photometry (grizy filters)
for refObjLoader in (config.astromRefObjLoader,
config.photoRefObjLoader,):
refObjLoader.retarget(LoadIndexedReferenceObjectsTask)
# band with the most depth).

config.connections.astromRefCat = "gaia"
config.astromRefObjLoader.ref_dataset_name = config.connections.astromRefCat
config.astromRefObjLoader.filterMap = {
"u": "phot_g_mean",
"g": "phot_g_mean",
"r": "phot_g_mean",
"i": "phot_g_mean",
"z": "phot_g_mean",
"y": "phot_g_mean",
"VR": "phot_g_mean"}
config.astromRefObjLoader.anyFilterMapsToThis = "phot_g_mean"
config.astromRefObjLoader.filterMap = {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two lines are technically not necessary, since DM-27013 is done.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't know that, but for configuration that depends on the input data, I prefer to be explicit instead of trusting that the defaults match up with I really need.


# Use panstarrs for photometry (grizy filters).
config.connections.photoRefCat = "panstarrs"
config.photoRefObjLoader.ref_dataset_name = config.connections.photoRefCat
config.photoRefObjLoader.filterMap = {
"u": "g",
# TODO: workaround for DM-29186
# "g": "g",
# "r": "r",
# "i": "i",
# "z": "z",
# "y": "y",
"VR": "g"}
13 changes: 4 additions & 9 deletions python/activator/activator.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from lsst.obs.base import Instrument
from .make_pgpass import make_pgpass
from .middleware_interface import MiddlewareInterface
from .raw import RAW_REGEXP
from .visit import Visit

PROJECT_ID = "prompt-proto"
Expand All @@ -46,12 +47,6 @@
active_instrument = Instrument.from_string(config_instrument)
calib_repo = os.environ["CALIB_REPO"]
image_bucket = os.environ["IMAGE_BUCKET"]
# Format for filenames of raws uploaded to image_bucket:
# instrument/detector/group/snap/instrument-group-snap-expid-filter-detector.(fits, fz, fits.gz)
oid_regexp = re.compile(
r"(?P<instrument>.*?)/(?P<detector>\d+)/(?P<group>.*?)/(?P<snap>\d+)/"
r"(?P=instrument)-(?P=group)-(?P=snap)-(?P<expid>.*?)-(?P<filter>.*?)-(?P=detector)\.f"
)
timeout = os.environ.get("IMAGE_TIMEOUT", 50)

logging.basicConfig(
Expand Down Expand Up @@ -121,7 +116,7 @@ def check_for_snap(
was found. If multiple files match, this function logs an error
but returns one of the files anyway.
"""
prefix = f"{instrument}/{detector}/{group}/{snap}/{instrument}-{group}-{snap}-"
prefix = f"{instrument}/{detector}/{group}/{snap}/"
_log.debug(f"Checking for '{prefix}'")
blobs = list(storage_client.list_blobs(image_bucket, prefix=prefix))
if not blobs:
Expand Down Expand Up @@ -185,7 +180,7 @@ def next_visit_handler() -> Tuple[str, int]:
expected_visit.detector,
)
if oid:
m = re.match(oid_regexp, oid)
m = re.match(RAW_REGEXP, oid)
mwi.ingest_image(oid)
expid_set.add(m.group('expid'))

Expand Down Expand Up @@ -218,7 +213,7 @@ def next_visit_handler() -> Tuple[str, int]:
for received in response.received_messages:
ack_list.append(received.ack_id)
oid = received.message.attributes["objectId"]
m = re.match(oid_regexp, oid)
m = re.match(RAW_REGEXP, oid)
if m:
instrument, detector, group, snap, expid = m.groups()
_log.debug("instrument, detector, group, snap, expid = %s", m.groups())
Expand Down
55 changes: 25 additions & 30 deletions python/activator/middleware_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ class MiddlewareInterface:
appropriate for use in the Google Cloud environment; typically only
change this when running local tests.
"""
_COLLECTION_TEMPLATE = "templates"
"""The collection used for templates.
"""
_COLLECTION_SKYMAP = "skymaps"
"""The collection used for skymaps.
"""

def __init__(self, central_butler: Butler, image_bucket: str, instrument: str,
butler: Butler,
Expand All @@ -90,7 +96,7 @@ def __init__(self, central_butler: Butler, image_bucket: str, instrument: str,
self._download_store = None
self.instrument = lsst.obs.base.Instrument.from_string(instrument)

self.output_collection = f"{self.instrument.getName()}/prompt"
self.output_collection = self.instrument.makeCollectionName("prompt")

self._init_local_butler(butler)
self._init_ingester()
Expand All @@ -104,7 +110,7 @@ def __init__(self, central_butler: Butler, image_bucket: str, instrument: str,
# This code will break once cameras start being versioned.
self.camera = self.central_butler.get(
"camera", instrument=self.instrument.getName(),
collections=self.instrument.makeCalibrationCollectionName("unbounded")
collections=self.instrument.makeUnboundedCalibrationRunName()
)
self.skymap = self.central_butler.get("skyMap")

Expand Down Expand Up @@ -170,9 +176,9 @@ def prep_butler(self, visit: Visit) -> None:
self._export_calibs(export, visit.detector, visit.filter)

# CHAINED collections
export.saveCollection("refcats")
export.saveCollection("templates")
export.saveCollection(self.instrument.makeCollectionName("defaults"))
export.saveCollection(self.instrument.makeRefCatCollectionName())
export.saveCollection(self._COLLECTION_TEMPLATE)
export.saveCollection(self.instrument.makeUmbrellaCollectionName())

self.butler.import_(filename=export_file.name,
directory=self.central_butler.datastore.root,
Expand Down Expand Up @@ -203,10 +209,11 @@ def _export_refcats(self, export, center, radius):
# collection, so we have to specify a list here. Replace this
# with another solution ASAP.
possible_refcats = ["gaia", "panstarrs", "gaia_dr2_20200414", "ps1_pv3_3pi_20170110"]
export.saveDatasets(self.central_butler.registry.queryDatasets(possible_refcats,
collections="refcats",
where=htm_where,
findFirst=True))
export.saveDatasets(self.central_butler.registry.queryDatasets(
possible_refcats,
collections=self.instrument.makeRefCatCollectionName(),
where=htm_where,
findFirst=True))

def _export_skymap_and_templates(self, export, center, detector, wcs):
"""Export the skymap and templates for this visit from the central
Expand All @@ -229,7 +236,7 @@ def _export_skymap_and_templates(self, export, center, detector, wcs):
# otherwise we get a UNIQUE constraint error when prepping for the
# second visit.
export.saveDatasets(self.central_butler.registry.queryDatasets("skyMap",
collections="skymaps",
collections=self._COLLECTION_SKYMAP,
findFirst=True))
# Getting only one tract should be safe: we're getting the
# tract closest to this detector, so we should be well within
Expand All @@ -247,7 +254,7 @@ def _export_skymap_and_templates(self, export, center, detector, wcs):
# TODO: alternately, can we just assume that there is exactly
# one coadd type in the central butler?
export.saveDatasets(self.central_butler.registry.queryDatasets("*Coadd",
collections="templates",
collections=self._COLLECTION_TEMPLATE,
where=template_where))

def _export_calibs(self, export, detector_id, filter):
Expand Down Expand Up @@ -295,7 +302,7 @@ def _prep_collections(self):
CollectionType.RUN)
self.butler.registry.registerCollection(self.output_run, CollectionType.RUN)
self.butler.registry.registerCollection(self.output_collection, CollectionType.CHAINED)
collections = [self.instrument.makeCollectionName("defaults"),
collections = [self.instrument.makeUmbrellaCollectionName(),
self.instrument.makeDefaultRawIngestRunName(),
self.output_run]
self.butler.registry.setCollectionChain(self.output_collection, collections)
Expand Down Expand Up @@ -386,28 +393,16 @@ def run_pipeline(self, visit: Visit, exposure_ids: set) -> None:
Group of snaps from one detector to be processed.
exposure_ids : `set`
Identifiers of the exposures that were received.
TODO: We need to be careful about the distinction between snap IDs
(a running series from 0 to N-1) and exposure IDs (which are more
complex and encode other info). Butler currently does not recognize
a snap ID, as such.
TODO: I believe this is unnecessary because it should be encoded
in the `visit` object, but we'll have to test how that works once
we implemented this with actual data.
"""
# TODO: we want to define visits earlier, but we have to ingest a
# faked raw file and appropriate SSO data during prep (and then
# cleanup when ingesting the real data).
# TODO: Also, using this approach (instead of saving the datasetRefs
# returned by ingest and using them to define visits) also requires
# pruning this list down to only the exposures that aren't already
# defined (otherwise defineVisits.run does extra "nothing" work).
exposures = set(self.butler.registry.queryDataIds(["exposure"]))
self.define_visits.run(exposures)

# TODO: temporary workaround for uploader and image header not agreeing
# on what the exposure ID is. We use the full exposure list here
# because we can't support multiple visits anyway.
exposure_ids = {data_id["exposure"] for data_id in exposures}
try:
self.define_visits.run({"instrument": self.instrument.getName(),
"exposure": exp} for exp in exposure_ids)
except lsst.daf.butler.registry.DataIdError as e:
# TODO: a good place for a custom exception?
raise RuntimeError("No data to process.") from e

# TODO: can we move this from_pipeline call to prep_butler?
where = f"detector={visit.detector} and exposure in ({','.join(str(x) for x in exposure_ids)})"
Expand Down
47 changes: 47 additions & 0 deletions python/activator/raw.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# This file is part of prompt_prototype.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

"""Common definitions of raw paths.
This module provides tools to convert raw paths into exposure metadata and
vice versa.
"""

__all__ = ["RAW_REGEXP", "get_raw_path"]

import re

# Format for filenames of raws uploaded to image bucket:
# instrument/detector/group/snap/expid/filter/*.(fits, fz, fits.gz)
RAW_REGEXP = re.compile(
r"(?P<instrument>.*?)/(?P<detector>\d+)/(?P<group>.*?)/(?P<snap>\d+)/(?P<expid>.*?)/(?P<filter>.*?)/"
r"[^/]+\.f"
)


def get_raw_path(instrument, detector, group, snap, exposure_id, filter):
"""The path on which to store raws in the image bucket.
"""
return (
f"{instrument}/{detector}/{group}/{snap}/{exposure_id}/{filter}"
f"/{instrument}-{group}-{snap}"
f"-{exposure_id}-{filter}-{detector}.fz"
)
Loading