Put refcat format version in catalog headers, too.

lsst · Apr 1, 2019 · 7d430a2 · 7d430a2
1 parent 592aa27
commit 7d430a2
Show file tree

Hide file tree

Showing 7 changed files with 83 additions and 23 deletions.
diff --git a/python/lsst/meas/algorithms/ingestIndexReferenceTask.py b/python/lsst/meas/algorithms/ingestIndexReferenceTask.py
@@ -33,6 +33,7 @@
 import lsst.pipe.base as pipeBase
 import lsst.geom
 import lsst.afw.table as afwTable
+from lsst.daf.base import PropertyList
 from lsst.afw.image import fluxErrFromABMagErr
 from .indexerRegistry import IndexerRegistry
 from .readTextCatalogTask import ReadTextCatalogTask
@@ -45,6 +46,20 @@
 LATEST_FORMAT_VERSION = 1
 
 
+def addRefCatMetadata(catalog):
+    """Add metadata to a new (not yet populated) reference catalog.
+
+    Parameters
+    ----------
+    catalog - `~lsst.afw.table.SimpleCatalog`
+        Catalog to which metadata should be attached.  Will be modified
+        in-place.
+    """
+    md = PropertyList()
+    md.set("REFCAT_FORMAT_VERSION", LATEST_FORMAT_VERSION)
+    catalog.setMetadata(md)
+
+
 class IngestReferenceRunner(pipeBase.TaskRunner):
     """Task runner for the reference catalog ingester
 
@@ -516,7 +531,9 @@ def getCatalog(self, dataId, schema):
         """
         if self.butler.datasetExists('ref_cat', dataId=dataId):
             return self.butler.get('ref_cat', dataId=dataId)
-        return afwTable.SimpleCatalog(schema)
+        catalog = afwTable.SimpleCatalog(schema)
+        addRefCatMetadata(catalog)
+        return catalog
 
     def makeSchema(self, dtype):
         """Make the schema to use in constructing the persisted catalogs.

diff --git a/python/lsst/meas/algorithms/loadIndexedReferenceObjects.py b/python/lsst/meas/algorithms/loadIndexedReferenceObjects.py
@@ -23,7 +23,7 @@
 
 __all__ = ["LoadIndexedReferenceObjectsConfig", "LoadIndexedReferenceObjectsTask"]
 
-from .loadReferenceObjects import hasNanojanskyFluxUnits, convertToNanojansky
+from .loadReferenceObjects import hasNanojanskyFluxUnits, convertToNanojansky, getFormatVersionFromRefCat
 from lsst.meas.algorithms import getRefFluxField, LoadReferenceObjectsTask, LoadReferenceObjectsConfig
 import lsst.afw.table as afwTable
 import lsst.geom
@@ -92,6 +92,13 @@ def loadSkyCircle(self, ctrCoord, radius, filterName=None, epoch=None):
             self.log.warn("run `meas_algorithms/bin/convert_refcat_to_nJy.py` to convert fluxes to nJy.")
             self.log.warn("See RFC-575 for more details.")
             refCat = convertToNanojansky(refCat, self.log)
+        else:
+            # For version >= 1, the version should be in the catalog header,
+            # too, and should be consistent with the version in the config.
+            catVersion = getFormatVersionFromRefCat(refCat)
+            if catVersion != self.dataset_config.format_version:
+                raise RuntimeError(f"Format version in reference catalog ({catVersion}) does "
+                                   f"not match configuration ({self.dataset_config.format_version})")
 
         self._addFluxAliases(refCat.schema)
         fluxField = getRefFluxField(schema=refCat.schema, filterName=filterName)

diff --git a/python/lsst/meas/algorithms/loadReferenceObjects.py b/python/lsst/meas/algorithms/loadReferenceObjects.py
@@ -63,6 +63,29 @@ def hasNanojanskyFluxUnits(schema):
     return True
 
 
+def getFormatVersionFromRefCat(refCat):
+    """"Return the format version stored in a reference catalog header.
+
+    Parameters
+    ----------
+    refCat : `lsst.afw.table.SimpleCatalog`
+        Reference catalog to inspect.
+
+    Returns
+    -------
+    version : `int` or `None`
+        Format version integer, or `None` if the catalog has no metadata
+        or the metadata does not include a "REFCAT_FORMAT_VERSION" key.
+    """
+    md = refCat.getMetadata()
+    if md is None:
+        return None
+    try:
+        return md.getScalar("REFCAT_FORMAT_VERSION")
+    except KeyError:
+        return None
+
+
 def convertToNanojansky(catalog, log, doConvert=True):
     """Convert fluxes in a catalog from jansky to nanojansky.
 
@@ -113,8 +136,10 @@ def convertToNanojansky(catalog, log, doConvert=True):
     fluxFieldsStr = '; '.join("(%s, '%s')" % (field.getName(), field.getUnits()) for field in input_fields)
 
     if doConvert:
+        from .ingestIndexReferenceTask import addRefCatMetadata  # workaround for circular dependency
         newSchema = mapper.getOutputSchema()
         output = lsst.afw.table.SimpleCatalog(newSchema)
+        addRefCatMetadata(output)
         output.extend(catalog, mapper=mapper)
         for field in output_fields:
             output[field.getName()] *= 1e9
@@ -387,9 +412,9 @@ def loadRegion(self, region, filtFunc=None, filterName=None, epoch=None):
             else:
                 self.log.warn("Catalog pm_ra field is not an Angle; not applying proper motion")
 
-        # Verify the schema is in the correct units. In the future this should be replaced with
-        # the dimensions system of a gen3 registry on the ref_cat object
-        if not hasNanojanskyFluxUnits(refCat.schema):
+        # Verify the schema is in the correct units and has the correct version; automatically convert
+        # it with a warning if this is not the case.
+        if not hasNanojanskyFluxUnits(refCat.schema) or not getFormatVersionFromRefCat(refCat) >= 1:
             self.log.warn("Found version 0 reference catalog with old style units in schema.")
             self.log.warn("run `meas_algorithms/bin/convert_refcat_to_nJy.py` to convert fluxes to nJy.")
             self.log.warn("See RFC-575 for more details.")

diff --git a/tests/data/version1/config/IngestIndexedReferenceTask.py b/tests/data/version1/config/IngestIndexedReferenceTask.py
@@ -1,6 +1,10 @@
 import lsst.meas.algorithms.ingestIndexReferenceTask
 assert type(config)==lsst.meas.algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig, 'config is of type %s.%s instead of lsst.meas.algorithms.ingestIndexReferenceTask.IngestIndexedReferenceConfig' % (type(config).__module__, type(config).__name__)
-# Version number of the persisted on-disk storage format. Version 0 had Jy as flux units (default 0 for unversioned catalogs). Version 1 had nJy as flux units.
+import lsst.meas.algorithms.indexerRegistry
+import lsst.meas.algorithms.readTextCatalogTask
+# Version number of the persisted on-disk storage format.
+# Version 0 had Jy as flux units (default 0 for unversioned catalogs).
+# Version 1 had nJy as flux units.
 config.dataset_config.format_version=1
 
 # String to pass to the butler to retrieve persisted files.

diff --git a/tests/data/version1/ref_cats/cal_ref_cat/4022.fits b/tests/data/version1/ref_cats/cal_ref_cat/4022.fits
diff --git a/tests/data/version1/ref_cats/cal_ref_cat/config.py b/tests/data/version1/ref_cats/cal_ref_cat/config.py
@@ -1,6 +1,9 @@
 import lsst.meas.algorithms.ingestIndexReferenceTask
 assert type(config)==lsst.meas.algorithms.ingestIndexReferenceTask.DatasetConfig, 'config is of type %s.%s instead of lsst.meas.algorithms.ingestIndexReferenceTask.DatasetConfig' % (type(config).__module__, type(config).__name__)
-# Version number of the persisted on-disk storage format. Version 0 had Jy as flux units (default 0 for unversioned catalogs). Version 1 had nJy as flux units.
+import lsst.meas.algorithms.indexerRegistry
+# Version number of the persisted on-disk storage format.
+# Version 0 had Jy as flux units (default 0 for unversioned catalogs).
+# Version 1 had nJy as flux units.
 config.format_version=1
 
 # String to pass to the butler to retrieve persisted files.

diff --git a/tests/data/version1/ref_cats/cal_ref_cat/master_schema.fits b/tests/data/version1/ref_cats/cal_ref_cat/master_schema.fits