From b0d937a8eacd764a975e2cd96d18bfb571cbaa28 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Thu, 12 Jan 2023 12:39:45 -0800 Subject: [PATCH 1/4] Work around changes in how unbounded calibs are handled. Broad queries for camera now always fail, so I picked the skymap as another "simple" dataset from the test data. --- tests/test_middleware_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_middleware_interface.py b/tests/test_middleware_interface.py index 81eba7a4..bba3e3b2 100644 --- a/tests/test_middleware_interface.py +++ b/tests/test_middleware_interface.py @@ -159,7 +159,7 @@ def test_get_butler(self): ]: # TODO: better way to test repo location? self.assertTrue( - butler.getURI("camera", instrument=instname, run="foo", predict=True).ospath + butler.getURI("skyMap", skymap="deepCoadd_skyMap", run="foo", predict=True).ospath .startswith(self.central_repo)) self.assertEqual(list(butler.collections), [f"{instname}/defaults"]) self.assertTrue(butler.isWriteable()) From 10b6b035aef29a76babb74d4af4f8bf88f53388f Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Wed, 11 Jan 2023 11:46:59 -0800 Subject: [PATCH 2/4] Add table of max exposure IDs to tester utils. This table can be used by exposure ID generation algorithms to ensure that their output IDs are always valid. --- python/tester/utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/tester/utils.py b/python/tester/utils.py index 475e862a..2e2306b9 100644 --- a/python/tester/utils.py +++ b/python/tester/utils.py @@ -30,6 +30,15 @@ _log = logging.getLogger("lsst." + __name__) _log.setLevel(logging.DEBUG) +max_exposure = { + "HSC": 21474800, +} +"""A mapping of instrument to exposure_max (`dict` [`str`, `int`]). + +The values are copied here so we can access them without a Butler. All +exposure IDs are in Middleware (integer) format, not native format. +""" + def get_last_group(bucket, instrument, date): """Identify the largest group number or a new group number. From 1d4b3d632e838e664b1dca8a06ee970582410f74 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Wed, 11 Jan 2023 15:11:46 -0800 Subject: [PATCH 3/4] Reimplement HSC exposure ID generation. The new ID generation algorithm is guaranteed to return an HSC ID in the correct range (0-21,474,800), and is less likely to have collisions from over-uploading. It is only valid until September 2024, but we hope that will be enough. --- python/tester/utils.py | 13 ++++++++----- tests/test_tester_utils.py | 37 ++++++++++++++++++++++++++++++++++++- ups/prompt_prototype.table | 1 + 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/python/tester/utils.py b/python/tester/utils.py index 2e2306b9..34654159 100644 --- a/python/tester/utils.py +++ b/python/tester/utils.py @@ -133,15 +133,18 @@ def make_hsc_id(group_num, snap): Notes ----- - The current implementation allows up to 1000 group numbers per day. - It can overflow with ~60 calls to upload.py on the same day or - upload_hsc_rc2.py with a large N_GROUPS. + The current implementation gives illegal exposure IDs after September 2024. + If this generator is still needed after that, it will need to be tweaked. """ # This is a bit too dependent on how group_num is generated, but I want the # group number to be discernible even after compressing to 8 digits. - night_id = (group_num // 100_000) % 2020_00_00 # Always 5 digits + year = (group_num // 100_00_00000) - 2023 # Always 1 digit, 0-1 + night_id = (group_num % 100_00_00000) // 100000 # Always 4 digits up to 1231 run_id = group_num % 100_000 # Up to 5 digits, but usually 2-3 - exposure_id = (night_id * 1000) + (run_id % 1000) # Always 8 digits + exposure_id = (year*1200 + night_id) * 10000 + (run_id % 10000) # Always 8 digits + if exposure_id > max_exposure["HSC"]: + raise RuntimeError(f"{group_num} translated to expId {exposure_id}, " + f"max allowed is { max_exposure['HSC']}.") return f"HSCE{exposure_id:08d}", exposure_id diff --git a/tests/test_tester_utils.py b/tests/test_tester_utils.py index 4fff0cd5..8849aa51 100644 --- a/tests/test_tester_utils.py +++ b/tests/test_tester_utils.py @@ -19,14 +19,19 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import tempfile import unittest import boto3 import botocore from moto import mock_s3 +import lsst.daf.butler.tests as butler_tests +from lsst.obs.base import ExposureIdInfo +from lsst.obs.subaru import HyperSuprimeCam + from activator.raw import get_raw_path -from tester.utils import get_last_group +from tester.utils import get_last_group, make_exposure_id class TesterUtilsTest(unittest.TestCase): @@ -76,3 +81,33 @@ def test_get_last_group(self): # Test the case of no match last_group = get_last_group(bucket, "TestCam", "20110101") self.assertEqual(last_group, int(20110101) * 100_000) + + def test_exposure_id_hsc(self): + group = "2023011100026" + # Need a Butler registry to test ExposureIdInfo + with tempfile.TemporaryDirectory() as repo: + butler = butler_tests.makeTestRepo(repo) + HyperSuprimeCam().register(butler.registry) + instruments = list(butler.registry.queryDimensionRecords( + "instrument", dataId={"instrument": "HSC"})) + self.assertEqual(len(instruments), 1) + exp_max = instruments[0].exposure_max + + _, str_exp_id, exp_id = make_exposure_id("HSC", int(group), 0) + butler_tests.addDataIdValue(butler, "visit", exp_id) + data_id = butler.registry.expandDataId({"instrument": "HSC", "visit": exp_id, "detector": 111}) + + self.assertEqual(str_exp_id, "HSCE%08d" % exp_id) + # Above assertion passes if exp_id has 9+ digits, but such IDs aren't valid. + self.assertEqual(len(str_exp_id[4:]), 8) + self.assertLessEqual(exp_id, exp_max) + # test that ExposureIdInfo.fromDataID does not raise + ExposureIdInfo.fromDataId(data_id, "visit_detector") + + def test_exposure_id_hsc_limits(self): + # Confirm that the exposure ID generator works as long as advertised: + # until the end of September 2024. + _, _, exp_id = make_exposure_id("HSC", 2024093009999, 0) + self.assertEqual(exp_id, 21309999) + with self.assertRaises(RuntimeError): + make_exposure_id("HSC", 2024100100000, 0) diff --git a/ups/prompt_prototype.table b/ups/prompt_prototype.table index 7fcd4f9e..96e24dd8 100644 --- a/ups/prompt_prototype.table +++ b/ups/prompt_prototype.table @@ -17,6 +17,7 @@ setupRequired(pipe_base) # used by tests setupRequired(obs_decam) +setupRequired(obs_subaru) envPrepend(PYTHONPATH, ${PRODUCT_DIR}/python) envPrepend(PATH, ${PRODUCT_DIR}/bin) From 9d972c8eee2a8a7e77e5862f3ac724070a891a48 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Thu, 12 Jan 2023 14:39:38 -0800 Subject: [PATCH 4/4] Add refreshes for central butler to MWI. While we shouldn't need refresh for the local repo, because there is only one Butler for it, the central repo has one Butler per worker, and any individual worker may be long- or short-lived. While it's not a proper synchronization API, call `central_butler.registry.refresh` before operations that read from or write to the central repo. --- python/activator/middleware_interface.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/activator/middleware_interface.py b/python/activator/middleware_interface.py index ebed5c51..36a10692 100644 --- a/python/activator/middleware_interface.py +++ b/python/activator/middleware_interface.py @@ -316,6 +316,10 @@ def prep_butler(self, visit: Visit) -> None: wcs = self._predict_wcs(detector, visit) center, radius = self._detector_bounding_circle(detector, wcs) + # central repo may have been modified by other MWI instances. + # TODO: get a proper synchronization API for Butler + self.central_butler.registry.refresh() + with tempfile.NamedTemporaryFile(mode="w+b", suffix=".yaml") as export_file: with self.central_butler.export(filename=export_file.name, format="yaml") as export: self._export_refcats(export, center, radius) @@ -764,6 +768,10 @@ def _export_subset(self, visit: Visit, exposure_ids: set[int], if not datasets: raise ValueError(f"No datasets match visit={visit} and exposures={exposure_ids}.") + # central repo may have been modified by other MWI instances. + # TODO: get a proper synchronization API for Butler + self.central_butler.registry.refresh() + with tempfile.NamedTemporaryFile(mode="w+b", suffix=".yaml") as export_file: # MUST NOT export governor dimensions, as this causes deadlocks in # central registry. Can omit most other dimensions (all dimensions,