Skip to content

Commit

Permalink
Merge pull request #99 from lsst/tickets/DM-16077
Browse files Browse the repository at this point in the history
DM-16077: Improve pickling support for butler classes.
  • Loading branch information
andy-slac committed Oct 25, 2018
2 parents 2c91d75 + 638c37e commit 0373348
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 3 deletions.
4 changes: 3 additions & 1 deletion python/lsst/daf/butler/butler.py
Expand Up @@ -143,6 +143,8 @@ def makeRepo(root, config=None, standalone=False, createRegistry=True):
return config

def __init__(self, config=None, collection=None, run=None):
# save arguments for pickling
self._args = (config, collection, run)
self.config = ButlerConfig(config)
self.registry = Registry.fromConfig(self.config)
self.datastore = Datastore.fromConfig(self.config, self.registry)
Expand Down Expand Up @@ -182,7 +184,7 @@ def __init__(self, config=None, collection=None, run=None):
def __reduce__(self):
"""Support pickling.
"""
return (Butler, (self.config, ))
return (Butler, self._args)

def __str__(self):
return "Butler(collection='{}', datastore='{}', registry='{}')".format(
Expand Down
46 changes: 45 additions & 1 deletion python/lsst/daf/butler/core/datasets.py
Expand Up @@ -23,7 +23,7 @@

from types import MappingProxyType
from .utils import slotValuesAreEqual, slotValuesToHash
from .storageClass import StorageClass
from .storageClass import StorageClass, StorageClassFactory

__all__ = ("DatasetType", "DatasetRef")

Expand Down Expand Up @@ -171,6 +171,50 @@ def _lookupNames(self):
"""
return (self.name, *self.storageClass._lookupNames())

def __getstate__(self):
"""Support for pickling.
StorageClass instances can not normally be pickled, so we need special
code to pickle those. We pickle StorageClass name instead of instance
and retrieve the instance using that name when un-pickling.
Returns
-------
state : `dict`
Instance state to pickle.
"""
return dict(name=self.name,
storageClassName=self.storageClass.name,
dataUnits=self.dataUnits)

def __setstate__(self, state):
"""Support for un-pickling.
Uses state dictionary produced by `__getstate__`.
This method retrieves StorageClass instance from StorageClassFactory
which has to be properly initialized.
Parameters
----------
state : `dict`
Pickled instance state.
"""
storageClass = StorageClassFactory().getStorageClass(state["storageClassName"])
self.__init__(name=state["name"], dataUnits=state["dataUnits"], storageClass=storageClass)

def __deepcopy__(self, memo):
"""Support for deep copy method.
If ``__getstate__`` and ``__setstate__`` methods are defined
``deepcopy`` will use those methods. We want to avoid that because
it would need initialized StorageClassFactory, instead re-implement
``__deepcopy__`` method.
"""
return DatasetType(name=deepcopy(self.name, memo),
dataUnits=deepcopy(self.dataUnits, memo),
storageClass=deepcopy(self.storageClass, memo))


class DatasetRef:
"""Reference to a Dataset in a `Registry`.
Expand Down
3 changes: 3 additions & 0 deletions tests/config/basic/butler-norun.yaml
@@ -0,0 +1,3 @@
datastore: !include posixDatastore.yaml
storageClasses: !include storageClasses.yaml
composites: !include composites.yaml
14 changes: 14 additions & 0 deletions tests/test_butler.py
Expand Up @@ -303,6 +303,20 @@ class ChainedDatastoreButlerTestCase(ButlerTests, lsst.utils.tests.TestCase):
registryStr = "registry='sqlite:///:memory:'"


class ButlerConfigNoRunTestCase(lsst.utils.tests.TestCase):
"""Test case for butler config which does not have ``run``.
"""
configFile = os.path.join(TESTDIR, "config/basic/butler-norun.yaml")

def testPickle(self):
"""Test pickle support.
"""
butler = Butler(self.configFile, run="ingest")
butlerOut = pickle.loads(pickle.dumps(butler))
self.assertIsInstance(butlerOut, Butler)
self.assertEqual(butlerOut.config, butler.config)


class MemoryTester(lsst.utils.tests.MemoryTestCase):
pass

Expand Down
18 changes: 17 additions & 1 deletion tests/test_datasets.py
Expand Up @@ -20,11 +20,12 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import unittest
import pickle

import lsst.utils.tests

from lsst.daf.butler.core.datasets import DatasetType, DatasetRef
from lsst.daf.butler.core.storageClass import StorageClass
from lsst.daf.butler.core.storageClass import StorageClass, StorageClassFactory

"""Tests for datasets module.
"""
Expand Down Expand Up @@ -85,6 +86,21 @@ def testHashability(self):
unique += 1 # datasetType should always equal its copy
self.assertEqual(len(set(types)), unique) # all other combinations are unique

def testPickle(self):
"""Test pickle support.
"""
storageClass = StorageClass("test_pickle")
datasetTypeName = "test"
dataUnits = frozenset(("camera", "visit"))
# Un-pickling requires that storage class is registered with factory.
StorageClassFactory().registerStorageClass(storageClass)
datasetType = DatasetType(datasetTypeName, dataUnits, storageClass)
datasetTypeOut = pickle.loads(pickle.dumps(datasetType))
self.assertIsInstance(datasetTypeOut, DatasetType)
self.assertEqual(datasetType.name, datasetTypeOut.name)
self.assertEqual(datasetType.dataUnits, datasetTypeOut.dataUnits)
self.assertEqual(datasetType.storageClass, datasetTypeOut.storageClass)


class DatasetRefTestCase(lsst.utils.tests.TestCase):
"""Test for DatasetRef.
Expand Down

0 comments on commit 0373348

Please sign in to comment.