Merge pull request #257 from lsst/tickets/DM-13353

DM-13353: Add FITS compression support to gen3 formatter
lsst · Jun 9, 2020 · 769a877 · 769a877
2 parents 8c7e9e0 + bfe053f
commit 769a877
Show file tree

Hide file tree

Showing 5 changed files with 240 additions and 7 deletions.
diff --git a/doc/lsst.obs.base/index.rst b/doc/lsst.obs.base/index.rst
@@ -37,3 +37,12 @@ Python API reference
 
 .. automodapi:: lsst.obs.base
    :no-main-docstr:
+
+.. automodapi:: lsst.obs.base.fitsExposureFormatter
+   :no-main-docstr:
+
+.. automodapi:: lsst.obs.base.fitsGenericFormatter
+   :no-main-docstr:
+
+.. automodapi:: lsst.obs.base.fitsRawFormatterBase
+   :no-main-docstr:
diff --git a/python/lsst/obs/base/_instrument.py b/python/lsst/obs/base/_instrument.py
@@ -106,7 +106,7 @@ def getName(cls):
     def getCamera(self):
         """Retrieve the cameraGeom representation of this instrument.
 
-        This is a temporary API that should go away once obs_ packages have
+        This is a temporary API that should go away once ``obs_`` packages have
         a standardized approach to writing versioned cameras to a Gen3 repo.
         """
         raise NotImplementedError()

diff --git a/python/lsst/obs/base/filters.py b/python/lsst/obs/base/filters.py
@@ -34,11 +34,11 @@
 
 
 class FilterDefinitionCollection(collections.abc.Sequence):
-    """An order-preserving collection of `FilterDefinition`s.
+    """An order-preserving collection of multiple `FilterDefinition`.
 
     Parameters
     ----------
-    filters : sequence
+    filters : `~collections.abc.Sequence`
         The filters in this collection.
     """
 

diff --git a/python/lsst/obs/base/fitsExposureFormatter.py b/python/lsst/obs/base/fitsExposureFormatter.py
@@ -24,13 +24,60 @@
 from astro_metadata_translator import fix_header
 from lsst.daf.butler import Formatter
 from lsst.afw.image import ExposureFitsReader
+from lsst.daf.base import PropertySet
 
 
 class FitsExposureFormatter(Formatter):
     """Interface for reading and writing Exposures to and from FITS files.
+
+    This Formatter supports write recipes.
+
+    Each ``FitsExposureFormatter`` recipe for FITS compression should
+    define ``image``, ``mask`` and ``variance`` entries, each of which may
+    contain ``compression`` and ``scaling`` entries. Defaults will be
+    provided for any missing elements under ``compression`` and
+    ``scaling``.
+
+    The allowed entries under ``compression`` are:
+
+    * ``algorithm`` (`str`): compression algorithm to use
+    * ``rows`` (`int`): number of rows per tile (0 = entire dimension)
+    * ``columns`` (`int`): number of columns per tile (0 = entire dimension)
+    * ``quantizeLevel`` (`float`): cfitsio quantization level
+
+    The allowed entries under ``scaling`` are:
+
+    * ``algorithm`` (`str`): scaling algorithm to use
+    * ``bitpix`` (`int`): bits per pixel (0,8,16,32,64,-32,-64)
+    * ``fuzz`` (`bool`): fuzz the values when quantising floating-point values?
+    * ``seed`` (`int`): seed for random number generator when fuzzing
+    * ``maskPlanes`` (`list` of `str`): mask planes to ignore when doing
+      statistics
+    * ``quantizeLevel`` (`float`): divisor of the standard deviation for
+      ``STDEV_*`` scaling
+    * ``quantizePad`` (`float`): number of stdev to allow on the low side (for
+      ``STDEV_POSITIVE``/``NEGATIVE``)
+    * ``bscale`` (`float`): manually specified ``BSCALE``
+      (for ``MANUAL`` scaling)
+    * ``bzero`` (`float`): manually specified ``BSCALE``
+      (for ``MANUAL`` scaling)
+
+    A very simple example YAML recipe:
+
+    .. code-block:: yaml
+
+        lsst.obs.base.fitsExposureFormatter.FitsExposureFormatter:
+          default:
+            image: &default
+              compression:
+                algorithm: GZIP_SHUFFLE
+            mask: *default
+            variance: *default
+
     """
     extension = ".fits"
     _metadata = None
+    supportedWriteParameters = frozenset({"recipe"})
 
     @property
     def metadata(self):
@@ -143,7 +190,7 @@ def readFull(self, parameters=None):
         ----------
         parameters : `dict`, optional
             If specified a dictionary of slicing parameters that overrides
-            those in ``fileDescriptor`.
+            those in ``fileDescriptor``.
 
         Returns
         -------
@@ -202,7 +249,7 @@ def read(self, component=None, parameters=None):
                 raise ValueError("Storage class inconsistency ({} vs {}) but no"
                                  " component requested".format(fileDescriptor.readStorageClass.name,
                                                                fileDescriptor.storageClass.name))
-        return self.readFull()
+        return self.readFull(parameters=parameters)
 
     def write(self, inMemoryDataset):
         """Write a Python object to a file.
@@ -219,5 +266,133 @@ def write(self, inMemoryDataset):
         """
         # Update the location with the formatter-preferred file extension
         self.fileDescriptor.location.updateExtension(self.extension)
-        inMemoryDataset.writeFits(self.fileDescriptor.location.path)
+        outputPath = self.fileDescriptor.location.path
+
+        # check to see if we have a recipe requested
+        recipeName = self.writeParameters.get("recipe")
+        recipe = self.getImageCompressionSettings(recipeName)
+        if recipe:
+            # Can not construct a PropertySet from a hierarchical
+            # dict but can update one.
+            ps = PropertySet()
+            ps.update(recipe)
+            inMemoryDataset.writeFitsWithOptions(outputPath, options=ps)
+        else:
+            inMemoryDataset.writeFits(outputPath)
         return self.fileDescriptor.location.pathInStore
+
+    def getImageCompressionSettings(self, recipeName):
+        """Retrieve the relevant compression settings for this recipe.
+
+        Parameters
+        ----------
+        recipeName : `str`
+            Label associated with the collection of compression parameters
+            to select.
+
+        Returns
+        -------
+        settings : `dict`
+            The selected settings.
+        """
+        # if no recipe has been provided and there is no default
+        # return immediately
+        if not recipeName:
+            if "default" not in self.writeRecipes:
+                return {}
+            recipeName = "default"
+
+        if recipeName not in self.writeRecipes:
+            raise RuntimeError(f"Unrecognized recipe option given for compression: {recipeName}")
+
+        recipe = self.writeRecipes[recipeName]
+
+        # Set the seed based on dataId
+        seed = hash(tuple(self.dataId.items())) % 2**31
+        for plane in ("image", "mask", "variance"):
+            if plane in recipe and "scaling" in recipe[plane]:
+                scaling = recipe[plane]["scaling"]
+                if "seed" in scaling and scaling["seed"] == 0:
+                    scaling["seed"] = seed
+
+        return recipe
+
+    @classmethod
+    def validateWriteRecipes(cls, recipes):
+        """Validate supplied recipes for this formatter.
+
+        The recipes are supplemented with default values where appropriate.
+
+        TODO: replace this custom validation code with Cerberus (DM-11846)
+
+        Parameters
+        ----------
+        recipes : `dict`
+            Recipes to validate. Can be empty dict or `None`.
+
+        Returns
+        -------
+        validated : `dict`
+            Validated recipes. Returns what was given if there are no
+            recipes listed.
+
+        Raises
+        ------
+        RuntimeError
+            Raised if validation fails.
+        """
+        # Schemas define what should be there, and the default values (and by the default
+        # value, the expected type).
+        compressionSchema = {
+            "algorithm": "NONE",
+            "rows": 1,
+            "columns": 0,
+            "quantizeLevel": 0.0,
+        }
+        scalingSchema = {
+            "algorithm": "NONE",
+            "bitpix": 0,
+            "maskPlanes": ["NO_DATA"],
+            "seed": 0,
+            "quantizeLevel": 4.0,
+            "quantizePad": 5.0,
+            "fuzz": True,
+            "bscale": 1.0,
+            "bzero": 0.0,
+        }
+
+        if not recipes:
+            # We can not insist on recipes being specified
+            return recipes
+
+        def checkUnrecognized(entry, allowed, description):
+            """Check to see if the entry contains unrecognised keywords"""
+            unrecognized = set(entry) - set(allowed)
+            if unrecognized:
+                raise RuntimeError(
+                    f"Unrecognized entries when parsing image compression recipe {description}: "
+                    f"{unrecognized}")
+
+        validated = {}
+        for name in recipes:
+            checkUnrecognized(recipes[name], ["image", "mask", "variance"], name)
+            validated[name] = {}
+            for plane in ("image", "mask", "variance"):
+                checkUnrecognized(recipes[name][plane], ["compression", "scaling"],
+                                  f"{name}->{plane}")
+
+                np = {}
+                validated[name][plane] = np
+                for settings, schema in (("compression", compressionSchema),
+                                         ("scaling", scalingSchema)):
+                    np[settings] = {}
+                    if settings not in recipes[name][plane]:
+                        for key in schema:
+                            np[settings][key] = schema[key]
+                        continue
+                    entry = recipes[name][plane][settings]
+                    checkUnrecognized(entry, schema.keys(), f"{name}->{plane}->{settings}")
+                    for key in schema:
+                        value = type(schema[key])(entry[key]) if key in entry else schema[key]
+                        np[settings][key] = value
+        return validated
diff --git a/tests/test_butlerFits.py b/tests/test_butlerFits.py
@@ -56,6 +56,18 @@
   cls: lsst.daf.butler.datastores.posixDatastore.PosixDatastore
   formatters:
     ExposureCompositeF: lsst.obs.base.fitsExposureFormatter.FitsExposureFormatter
+    lossless:
+      formatter: lsst.obs.base.fitsExposureFormatter.FitsExposureFormatter
+      parameters:
+        recipe: lossless
+    uncompressed:
+      formatter: lsst.obs.base.fitsExposureFormatter.FitsExposureFormatter
+      parameters:
+        recipe: noCompression
+    lossy:
+      formatter: lsst.obs.base.fitsExposureFormatter.FitsExposureFormatter
+      parameters:
+        recipe: lossyBasic
   composites:
     disassembled:
       ExposureCompositeF: True
@@ -78,7 +90,7 @@ def setUpClass(cls):
         dataIds = {
             "instrument": ["DummyCam"],
             "physical_filter": ["d-r"],
-            "visit": [42],
+            "visit": [42, 43, 44],
         }
 
         cls.creatorButler = makeTestRepo(cls.root, dataIds, config=Config.fromYaml(BUTLER_CONFIG))
@@ -87,6 +99,9 @@ def setUpClass(cls):
         for datasetTypeName, storageClassName in (("calexp", "ExposureF"),
                                                   ("unknown", "ExposureCompositeF"),
                                                   ("testCatalog", "SourceCatalog"),
+                                                  ("lossless", "ExposureF"),
+                                                  ("uncompressed", "ExposureF"),
+                                                  ("lossy", "ExposureF"),
                                                   ):
             storageClass = cls.storageClassFactory.getStorageClass(storageClassName)
             addDatasetType(cls.creatorButler, datasetTypeName, set(dataIds), storageClass)
@@ -252,6 +267,40 @@ def runExposureCompositePutGetTest(self, datasetTypeName: str) -> DatasetRef:
 
         return ref
 
+    def putFits(self, exposure, datasetTypeName, visit):
+        """Put different datasetTypes and return information."""
+        dataId = {"visit": visit, "instrument": "DummyCam", "physical_filter": "d-r"}
+        refC = self.butler.put(exposure, datasetTypeName, dataId)
+        uriC = self.butler.getURI(refC)
+        stat = os.stat(uriC.path)
+        size = stat.st_size
+        meta = self.butler.get(f"{datasetTypeName}.metadata", dataId)
+        return meta, size
+
+    def testCompression(self):
+        """Test that we can write compressed and uncompressed FITS."""
+        example = os.path.join(TESTDIR, "data", "small.fits")
+        exposure = lsst.afw.image.ExposureF(example)
+
+        # Write a lossless compressed
+        metaC, sizeC = self.putFits(exposure, "lossless", 42)
+        self.assertEqual(metaC["TTYPE1"], "COMPRESSED_DATA")
+        self.assertEqual(metaC["ZCMPTYPE"], "GZIP_2")
+
+        # Write an uncompressed FITS file
+        metaN, sizeN = self.putFits(exposure, "uncompressed", 43)
+        self.assertNotIn("ZCMPTYPE", metaN)
+
+        # Write an uncompressed FITS file
+        metaL, sizeL = self.putFits(exposure, "lossy", 44)
+        self.assertEqual(metaL["TTYPE1"], "COMPRESSED_DATA")
+        self.assertEqual(metaL["ZCMPTYPE"], "RICE_1")
+
+        self.assertNotEqual(sizeC, sizeN)
+        # Data file is so small that Lossy and Compressed are dominated
+        # by the extra compression tables
+        self.assertEqual(sizeL, sizeC)
+
 
 if __name__ == "__main__":
     unittest.main()