lsst-dm · hsinfang · Mar 14, 2024 · Feb 24, 2024 · Feb 23, 2024 · Feb 23, 2024
diff --git a/bin.src/make_latiss_export.py → bin.src/make_export.py b/bin.src/make_latiss_export.py → bin.src/make_export.py
@@ -21,9 +21,9 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 
-"""Selectively export the contents of the LATISS dataset.
+"""Selectively export some contents from a butler repo.
 
-This script selects some LATISS data in a source butler repo, and makes an export
+This script selects some data in a source butler repo, and makes an export
 file for importing to the test central prompt processing repository.
 """
 
@@ -32,6 +32,7 @@
 import logging
 import sys
 import tempfile
+import yaml
 
 import lsst.daf.butler as daf_butler
 from lsst.utils.timer import time_this
@@ -54,6 +55,13 @@ def _make_parser():
              "exported from the source repo. If no target repo is given, all "
              "selected datasets in the source repo will be exported.",
     )
+    parser.add_argument(
+        "--select",
+        required=True,
+        help="URI to a YAML file containing expressions to identify the "
+             "datasets and collections to be exported. An example is at "
+             "etc/export_latiss.yaml."
+    )
     return parser
 
 
@@ -62,6 +70,8 @@ def main():
 
     args = _make_parser().parse_args()
     src_butler = daf_butler.Butler(args.src_repo)
+    with open(args.select, "r") as file:
+        wants = yaml.safe_load(file)
 
     with tempfile.TemporaryDirectory() as temp_repo:
         if args.target_repo:
@@ -72,10 +82,10 @@ def main():
             target_butler = daf_butler.Butler(config)
 
         with time_this(msg="Datasets and collections exported", level=logging.INFO):
-            _export_for_copy(src_butler, target_butler)
+            _export_for_copy(src_butler, target_butler, wants)
 
 
-def _export_for_copy(butler, target_butler):
+def _export_for_copy(butler, target_butler, wants):
     """Export selected data to make copies in another butler repo.
 
     Parameters
@@ -86,65 +96,43 @@ def _export_for_copy(butler, target_butler):
         The target Butler to which datasets are exported. It is checked
         to avoid exporting existing datasets. No checks are done to
         verify if datasets are really identical.
+    wants : `dict`
+        A dictionary to identify selections with optional keys:
+
+        ``"datasets"``, optional
+            A list of dataset selection expressions (`list` of `dict`).
+            The list is iterated over to find matching datasets in the butler,
+            with the matching criteria provided via the selection expressions.
+            Each selection expression has the keyworded argument dictionary to
+            be passed to butler to query datasets; it has the same meanings
+            as the parameters of `lsst.daf.butler.Registry.queryDatasets`.
+        ``"collections"``, optional
+            A list of collection selection expressions (`list` of `dict`).
+            The list is iterated over to find matching collections in the butler,
+            with the matching criteria provided via the selection expressions.
+            Each selection expression has the keyworded argument dictionary to
+            be passed to butler to query collectionss; it has the same meanings
+            as the parameters of `lsst.daf.butler.Registry.queryCollections`.
     """
     with butler.export(format="yaml") as contents:
-        logging.debug("Selecting goodSeeingCoadd datasets")
-        records = _filter_datasets(
-            butler,
-            target_butler,
-            datasetType="goodSeeingCoadd",
-            collections="LATISS/templates",
-        )
-        contents.saveDatasets(records)
-
-        refcats = {"atlas_refcat2_20220201", "gaia_dr3_20230707"}
-        logging.debug(f"Selecting refcats datasets {refcats}")
-        records = _filter_datasets(
-            butler, target_butler, datasetType=refcats, collections="refcats*"
-        )
-        contents.saveDatasets(records)
-
-        logging.debug("Selecting skymaps dataset")
-        records = _filter_datasets(
-            butler, target_butler, datasetType="skyMap", collections="skymaps"
-        )
-        contents.saveDatasets(records)
-
-        logging.debug("Selecting datasets in LATISS/calib")
-        records = _filter_datasets(
-            butler,
-            target_butler,
-            datasetType=...,
-            # Workaround: use a matching expression rather than a specific
-            # string "LATISS/calib" for the collection argument, so to avoid
-            # MissingCollectionError when the collection does not exist in
-            # the target repo.
-            collections="*LATISS/calib",
-        )
-        contents.saveDatasets(records)
-
-        logging.debug("Selecting pretrained ML model dataset")
-        records = _filter_datasets(
-            butler, target_butler, datasetType="pretrainedModelPackage", collections="pretrained_models"
-        )
-        contents.saveDatasets(records)
+        if "datasets" in wants:
+            for selection in wants["datasets"]:
+                logging.debug(f"Selecting datasets: {selection}")
+                if "datasetType" not in selection:
+                    selection["datasetType"] = ...
+                records = _filter_datasets(butler, target_butler, **selection)
+                contents.saveDatasets(records)
 
         # Save selected collections and chains
-        for collection in butler.registry.queryCollections(
-            expression="LATISS/calib",
-            flattenChains=True,
-            includeChains=True,
-        ) + [
-            "LATISS/templates",
-            "LATISS/calib/unbounded",
-            "pretrained_models",
-        ]:
-            logging.debug(f"Selecting collection {collection}")
-            try:
-                target_butler.registry.queryCollections(collection)
-            except daf_butler.registry.MissingCollectionError:
-                # MissingCollectionError is raised if the collection does not exist in target_butler.
-                contents.saveCollection(collection)
+        if "collections" in wants:
+            for selection in wants["collections"]:
+                for collection in butler.registry.queryCollections(**selection):
+                    logging.debug(f"Selecting collection {collection}")
+                    try:
+                        target_butler.registry.queryCollections(collection)
+                    except daf_butler.registry.MissingCollectionError:
+                        # MissingCollectionError is raised if the collection does not exist in target_butler.
+                        contents.saveCollection(collection)
 
 
 if __name__ == "__main__":

diff --git a/bin.src/make_hsc_rc2_export.py b/bin.src/make_hsc_rc2_export.py
diff --git a/bin.src/make_remote_butler.py b/bin.src/make_remote_butler.py
@@ -55,7 +55,6 @@ def _make_parser():
                         help="The export file containing the repository contents. Defaults to ./export.yaml.")
     parser.add_argument("--instrument",
                         help="The short name of the instrument (HSC, LATISS, etc).")
-    parser.add_argument("--hsc-rc2", action="store_true", help="Extra fix up for HSC-RC2 dataset.")
     return parser
 
 
@@ -97,46 +96,6 @@ def _add_chains(butler, instrument_name):
     )
 
 
-def _hsc_rc2(butler):
-    """fix up some specifics of the HSC-RC2 dataset export
-
-    Parameters
-    ----------
-    butler: `lsst.daf.butler.Butler`
-        The source Butler from which datasets are exported
-    """
-    # Chain calibration collections
-    instrument = Instrument.fromName("HSC", butler.registry)
-    butler.registry.setCollectionChain(
-        instrument.makeCalibrationCollectionName(),
-        [
-            "HSC/calib/DM-32378",
-            "HSC/calib/gen2/20180117",
-            "HSC/calib/DM-28636",
-        ],
-    )
-
-    butler.registry.registerCollection(
-        instrument.makeUnboundedCalibrationRunName(),
-        type=CollectionType.CHAINED
-    )
-    butler.registry.setCollectionChain(
-        instrument.makeUnboundedCalibrationRunName(),
-        [
-            "HSC/calib/gen2/20180117/unbounded",
-            "HSC/calib/DM-28636/unbounded",
-        ],
-    )
-    # Chain rerun collections to templates
-    # The export script should have guaranteed that there are only coadds in these collections.
-    current = butler.registry.getCollectionChain("templates")
-    addition = butler.registry.queryCollections("HSC/runs/*",
-                                                collectionTypes=CollectionType.RUN)
-    butler.registry.setCollectionChain("templates",
-                                       list(addition) + list(current),
-                                       flatten=False)
-
-
 def main():
     logging.basicConfig(level=logging.INFO, stream=sys.stdout)
 
@@ -150,8 +109,6 @@ def main():
     with time_this(msg="Import", level=logging.INFO):
         butler.import_(directory=args.src_repo, filename=args.export_file, transfer="auto")
     _add_chains(butler, args.instrument)
-    if args.hsc_rc2:
-        _hsc_rc2(butler)
 
 
 if __name__ == "__main__":