Skip to content

Commit

Permalink
Merge pull request #257 from lsst/tickets/DM-13353
Browse files Browse the repository at this point in the history
DM-13353: Add FITS compression support to gen3 formatter
  • Loading branch information
timj committed Jun 9, 2020
2 parents 8c7e9e0 + bfe053f commit 769a877
Show file tree
Hide file tree
Showing 5 changed files with 240 additions and 7 deletions.
9 changes: 9 additions & 0 deletions doc/lsst.obs.base/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,12 @@ Python API reference

.. automodapi:: lsst.obs.base
:no-main-docstr:

.. automodapi:: lsst.obs.base.fitsExposureFormatter
:no-main-docstr:

.. automodapi:: lsst.obs.base.fitsGenericFormatter
:no-main-docstr:

.. automodapi:: lsst.obs.base.fitsRawFormatterBase
:no-main-docstr:
2 changes: 1 addition & 1 deletion python/lsst/obs/base/_instrument.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def getName(cls):
def getCamera(self):
"""Retrieve the cameraGeom representation of this instrument.
This is a temporary API that should go away once obs_ packages have
This is a temporary API that should go away once ``obs_`` packages have
a standardized approach to writing versioned cameras to a Gen3 repo.
"""
raise NotImplementedError()
Expand Down
4 changes: 2 additions & 2 deletions python/lsst/obs/base/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@


class FilterDefinitionCollection(collections.abc.Sequence):
"""An order-preserving collection of `FilterDefinition`s.
"""An order-preserving collection of multiple `FilterDefinition`.
Parameters
----------
filters : sequence
filters : `~collections.abc.Sequence`
The filters in this collection.
"""

Expand Down
181 changes: 178 additions & 3 deletions python/lsst/obs/base/fitsExposureFormatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,60 @@
from astro_metadata_translator import fix_header
from lsst.daf.butler import Formatter
from lsst.afw.image import ExposureFitsReader
from lsst.daf.base import PropertySet


class FitsExposureFormatter(Formatter):
"""Interface for reading and writing Exposures to and from FITS files.
This Formatter supports write recipes.
Each ``FitsExposureFormatter`` recipe for FITS compression should
define ``image``, ``mask`` and ``variance`` entries, each of which may
contain ``compression`` and ``scaling`` entries. Defaults will be
provided for any missing elements under ``compression`` and
``scaling``.
The allowed entries under ``compression`` are:
* ``algorithm`` (`str`): compression algorithm to use
* ``rows`` (`int`): number of rows per tile (0 = entire dimension)
* ``columns`` (`int`): number of columns per tile (0 = entire dimension)
* ``quantizeLevel`` (`float`): cfitsio quantization level
The allowed entries under ``scaling`` are:
* ``algorithm`` (`str`): scaling algorithm to use
* ``bitpix`` (`int`): bits per pixel (0,8,16,32,64,-32,-64)
* ``fuzz`` (`bool`): fuzz the values when quantising floating-point values?
* ``seed`` (`int`): seed for random number generator when fuzzing
* ``maskPlanes`` (`list` of `str`): mask planes to ignore when doing
statistics
* ``quantizeLevel`` (`float`): divisor of the standard deviation for
``STDEV_*`` scaling
* ``quantizePad`` (`float`): number of stdev to allow on the low side (for
``STDEV_POSITIVE``/``NEGATIVE``)
* ``bscale`` (`float`): manually specified ``BSCALE``
(for ``MANUAL`` scaling)
* ``bzero`` (`float`): manually specified ``BSCALE``
(for ``MANUAL`` scaling)
A very simple example YAML recipe:
.. code-block:: yaml
lsst.obs.base.fitsExposureFormatter.FitsExposureFormatter:
default:
image: &default
compression:
algorithm: GZIP_SHUFFLE
mask: *default
variance: *default
"""
extension = ".fits"
_metadata = None
supportedWriteParameters = frozenset({"recipe"})

@property
def metadata(self):
Expand Down Expand Up @@ -143,7 +190,7 @@ def readFull(self, parameters=None):
----------
parameters : `dict`, optional
If specified a dictionary of slicing parameters that overrides
those in ``fileDescriptor`.
those in ``fileDescriptor``.
Returns
-------
Expand Down Expand Up @@ -202,7 +249,7 @@ def read(self, component=None, parameters=None):
raise ValueError("Storage class inconsistency ({} vs {}) but no"
" component requested".format(fileDescriptor.readStorageClass.name,
fileDescriptor.storageClass.name))
return self.readFull()
return self.readFull(parameters=parameters)

def write(self, inMemoryDataset):
"""Write a Python object to a file.
Expand All @@ -219,5 +266,133 @@ def write(self, inMemoryDataset):
"""
# Update the location with the formatter-preferred file extension
self.fileDescriptor.location.updateExtension(self.extension)
inMemoryDataset.writeFits(self.fileDescriptor.location.path)
outputPath = self.fileDescriptor.location.path

# check to see if we have a recipe requested
recipeName = self.writeParameters.get("recipe")
recipe = self.getImageCompressionSettings(recipeName)
if recipe:
# Can not construct a PropertySet from a hierarchical
# dict but can update one.
ps = PropertySet()
ps.update(recipe)
inMemoryDataset.writeFitsWithOptions(outputPath, options=ps)
else:
inMemoryDataset.writeFits(outputPath)
return self.fileDescriptor.location.pathInStore

def getImageCompressionSettings(self, recipeName):
"""Retrieve the relevant compression settings for this recipe.
Parameters
----------
recipeName : `str`
Label associated with the collection of compression parameters
to select.
Returns
-------
settings : `dict`
The selected settings.
"""
# if no recipe has been provided and there is no default
# return immediately
if not recipeName:
if "default" not in self.writeRecipes:
return {}
recipeName = "default"

if recipeName not in self.writeRecipes:
raise RuntimeError(f"Unrecognized recipe option given for compression: {recipeName}")

recipe = self.writeRecipes[recipeName]

# Set the seed based on dataId
seed = hash(tuple(self.dataId.items())) % 2**31
for plane in ("image", "mask", "variance"):
if plane in recipe and "scaling" in recipe[plane]:
scaling = recipe[plane]["scaling"]
if "seed" in scaling and scaling["seed"] == 0:
scaling["seed"] = seed

return recipe

@classmethod
def validateWriteRecipes(cls, recipes):
"""Validate supplied recipes for this formatter.
The recipes are supplemented with default values where appropriate.
TODO: replace this custom validation code with Cerberus (DM-11846)
Parameters
----------
recipes : `dict`
Recipes to validate. Can be empty dict or `None`.
Returns
-------
validated : `dict`
Validated recipes. Returns what was given if there are no
recipes listed.
Raises
------
RuntimeError
Raised if validation fails.
"""
# Schemas define what should be there, and the default values (and by the default
# value, the expected type).
compressionSchema = {
"algorithm": "NONE",
"rows": 1,
"columns": 0,
"quantizeLevel": 0.0,
}
scalingSchema = {
"algorithm": "NONE",
"bitpix": 0,
"maskPlanes": ["NO_DATA"],
"seed": 0,
"quantizeLevel": 4.0,
"quantizePad": 5.0,
"fuzz": True,
"bscale": 1.0,
"bzero": 0.0,
}

if not recipes:
# We can not insist on recipes being specified
return recipes

def checkUnrecognized(entry, allowed, description):
"""Check to see if the entry contains unrecognised keywords"""
unrecognized = set(entry) - set(allowed)
if unrecognized:
raise RuntimeError(
f"Unrecognized entries when parsing image compression recipe {description}: "
f"{unrecognized}")

validated = {}
for name in recipes:
checkUnrecognized(recipes[name], ["image", "mask", "variance"], name)
validated[name] = {}
for plane in ("image", "mask", "variance"):
checkUnrecognized(recipes[name][plane], ["compression", "scaling"],
f"{name}->{plane}")

np = {}
validated[name][plane] = np
for settings, schema in (("compression", compressionSchema),
("scaling", scalingSchema)):
np[settings] = {}
if settings not in recipes[name][plane]:
for key in schema:
np[settings][key] = schema[key]
continue
entry = recipes[name][plane][settings]
checkUnrecognized(entry, schema.keys(), f"{name}->{plane}->{settings}")
for key in schema:
value = type(schema[key])(entry[key]) if key in entry else schema[key]
np[settings][key] = value
return validated
51 changes: 50 additions & 1 deletion tests/test_butlerFits.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@
cls: lsst.daf.butler.datastores.posixDatastore.PosixDatastore
formatters:
ExposureCompositeF: lsst.obs.base.fitsExposureFormatter.FitsExposureFormatter
lossless:
formatter: lsst.obs.base.fitsExposureFormatter.FitsExposureFormatter
parameters:
recipe: lossless
uncompressed:
formatter: lsst.obs.base.fitsExposureFormatter.FitsExposureFormatter
parameters:
recipe: noCompression
lossy:
formatter: lsst.obs.base.fitsExposureFormatter.FitsExposureFormatter
parameters:
recipe: lossyBasic
composites:
disassembled:
ExposureCompositeF: True
Expand All @@ -78,7 +90,7 @@ def setUpClass(cls):
dataIds = {
"instrument": ["DummyCam"],
"physical_filter": ["d-r"],
"visit": [42],
"visit": [42, 43, 44],
}

cls.creatorButler = makeTestRepo(cls.root, dataIds, config=Config.fromYaml(BUTLER_CONFIG))
Expand All @@ -87,6 +99,9 @@ def setUpClass(cls):
for datasetTypeName, storageClassName in (("calexp", "ExposureF"),
("unknown", "ExposureCompositeF"),
("testCatalog", "SourceCatalog"),
("lossless", "ExposureF"),
("uncompressed", "ExposureF"),
("lossy", "ExposureF"),
):
storageClass = cls.storageClassFactory.getStorageClass(storageClassName)
addDatasetType(cls.creatorButler, datasetTypeName, set(dataIds), storageClass)
Expand Down Expand Up @@ -252,6 +267,40 @@ def runExposureCompositePutGetTest(self, datasetTypeName: str) -> DatasetRef:

return ref

def putFits(self, exposure, datasetTypeName, visit):
"""Put different datasetTypes and return information."""
dataId = {"visit": visit, "instrument": "DummyCam", "physical_filter": "d-r"}
refC = self.butler.put(exposure, datasetTypeName, dataId)
uriC = self.butler.getURI(refC)
stat = os.stat(uriC.path)
size = stat.st_size
meta = self.butler.get(f"{datasetTypeName}.metadata", dataId)
return meta, size

def testCompression(self):
"""Test that we can write compressed and uncompressed FITS."""
example = os.path.join(TESTDIR, "data", "small.fits")
exposure = lsst.afw.image.ExposureF(example)

# Write a lossless compressed
metaC, sizeC = self.putFits(exposure, "lossless", 42)
self.assertEqual(metaC["TTYPE1"], "COMPRESSED_DATA")
self.assertEqual(metaC["ZCMPTYPE"], "GZIP_2")

# Write an uncompressed FITS file
metaN, sizeN = self.putFits(exposure, "uncompressed", 43)
self.assertNotIn("ZCMPTYPE", metaN)

# Write an uncompressed FITS file
metaL, sizeL = self.putFits(exposure, "lossy", 44)
self.assertEqual(metaL["TTYPE1"], "COMPRESSED_DATA")
self.assertEqual(metaL["ZCMPTYPE"], "RICE_1")

self.assertNotEqual(sizeC, sizeN)
# Data file is so small that Lossy and Compressed are dominated
# by the extra compression tables
self.assertEqual(sizeL, sizeC)


if __name__ == "__main__":
unittest.main()

0 comments on commit 769a877

Please sign in to comment.