Skip to content

Commit

Permalink
pickle v2: improve handling for object references
Browse files Browse the repository at this point in the history
We now allow Pickle Protocol v2 __getnewargs__() to contain
references.

The unpickler uses a new strategy for handling references.
It now places Proxy objects into the referencing machinery
to break dependencies between construction arguments and
parent objects.

A final sweep is done at the end of restore() to swap proxies
for their real instances.  This allows for proper referencing
within objects returned by __getnewargs__().

Related-to: #78
Signed-off-by: David Aguilar <davvid@gmail.com>
  • Loading branch information
davvid committed Aug 20, 2014
1 parent 1bb4b5a commit d6ce6b2
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 8 deletions.
2 changes: 1 addition & 1 deletion jsonpickle/pickler.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def _flatten_obj_instance(self, obj):
return handler(self).flatten(obj, data)

if has_getnewargs:
data[tags.NEWARGS] = obj.__getnewargs__()
data[tags.NEWARGS] = self._flatten(obj.__getnewargs__())

if util.is_module(obj):
if self.unpicklable:
Expand Down
76 changes: 71 additions & 5 deletions jsonpickle/unpickler.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,38 @@ def _make_backend(backend):
return backend


class _Proxy(object):
"""Proxies are dummy objects that are later replaced by real instances
The `restore()` function has to solve a tricky problem when pickling
objects with cyclical references -- the parent instance does not yet
exist.
The problem is that `__getnewargs__()`, `__getstate__()`, custom handlers,
and cyclical objects graphs are allowed to reference the yet-to-be-created
object via the referencing machinery.
In other words, objects are allowed to depend on themselves for
construction!
We solve this problem by placing dummy Proxy objects into the referencing
machinery so that we can construct the child objects before constructing
the parent. Objects are initially created with Proxy attribute values
instead of real references.
We collect all objects that contain references to proxies and run
a final sweep over them to swap in the real instance. This is done
at the very end of the top-level `restore()`.
The `instance` attribute below is replaced with the real instance
after `__new__()` has been used to construct the object and is used
when swapping proxies with real instances.
"""
def __init__(self):
self.instance = None


class Unpickler(object):

def __init__(self, backend=None, keys=False, safe=False):
Expand All @@ -48,6 +80,7 @@ def __init__(self, backend=None, keys=False, safe=False):
## Maps objects to their index in the _objs list
self._obj_to_idx = {}
self._objs = []
self._proxies = []

def reset(self):
"""Resets the object's internal state.
Expand All @@ -56,6 +89,7 @@ def reset(self):
self._namestack = []
self._obj_to_idx = {}
self._objs = []
self._proxies = []

def restore(self, obj, reset=True):
"""Restores a flattened object to its original python state.
Expand All @@ -70,7 +104,16 @@ def restore(self, obj, reset=True):
"""
if reset:
self.reset()
return self._restore(obj)
value = self._restore(obj)
if reset:
self._finalize()

return value

def _finalize(self):
"""Replace proxies with their corresponding instances"""
for (obj, attr, proxy) in self._proxies:
setattr(obj, attr, proxy.instance)

def _restore(self, obj):
if has_tag(obj, tags.ID):
Expand Down Expand Up @@ -141,6 +184,12 @@ def _loadfactory(self, obj):
def _restore_object_instance(self, obj, cls):
factory = self._loadfactory(obj)
args = getargs(obj)

# This is a placeholder proxy object which allows child objects to
# reference the parent object before it has been instantiated.
proxy = _Proxy()
self._mkref(proxy)

if args:
args = self._restore(args)
try:
Expand All @@ -158,7 +207,9 @@ def _restore_object_instance(self, obj, cls):
except TypeError: # fail gracefully
return self._mkref(obj)

self._mkref(instance) # allow references in downstream objects
proxy.instance = instance
self._swapref(proxy, instance)

if isinstance(instance, tuple):
return instance

Expand All @@ -179,6 +230,12 @@ def _restore_object_instance_variables(self, obj, instance):
instance[k] = value
else:
setattr(instance, k, value)

# This instance has an instance variable named `k` that is
# currently a proxy and must be replaced
if type(value) is _Proxy:
self._proxies.append((instance, k, value))

# step out
self._namestack.pop()

Expand Down Expand Up @@ -302,6 +359,16 @@ def _mkref(self, obj):
self._namedict[self._refname()] = obj
return obj

def _swapref(self, proxy, instance):
proxy_id = id(proxy)
instance_id = id(instance)

self._obj_to_idx[instance_id] = self._obj_to_idx[proxy_id]
del self._obj_to_idx[proxy_id]

self._objs[-1] = instance
self._namedict[self._refname()] = instance


def loadclass(module_and_name):
"""Loads the module and returns the class.
Expand All @@ -326,11 +393,10 @@ def loadclass(module_and_name):


def getargs(obj):

# let saved newargs take precedence over everything
"""Return arguments suitable for __new__()"""
# Let saved newargs take precedence over everything
if has_tag(obj, tags.NEWARGS):
return obj[tags.NEWARGS]

try:
seq_list = obj[tags.SEQ]
obj_dict = obj[tags.OBJECT]
Expand Down
29 changes: 27 additions & 2 deletions tests/jsonpickle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,7 @@ def test_make_refs_disabled_reference_to_list(self):
self.assertEqual(decoded.a[1][0:3], '[1,')
self.assertEqual(decoded.a[2][0][0:3], '[1,')


class PicklableNamedTuple(object):
"""
A picklable namedtuple wrapper, to demonstrate the need
Expand All @@ -601,6 +602,16 @@ def __new__(cls, propnames, vals):
instance = ntuple.__new__(ntuple, *vals)
return instance


class PickleProtocal2Thing(object):

def __init__(self, *args):
self.args = args

def __getnewargs__(self):
return self.args


class PicklingProtocol2TestCase(unittest.TestCase):

def test_pickle_newargs(self):
Expand All @@ -616,17 +627,31 @@ def test_pickle_newargs(self):
def test_validate_reconstruct_by_newargs(self):
"""
Ensure that the exemplar tuple's __getnewargs__ works
This is necessary to know whether the breakage exists
This is necessary to know whether the breakage exists
in jsonpickle or not
"""
instance = PicklableNamedTuple(('a', 'b'), (1, 2))
newinstance = PicklableNamedTuple.__new__(PicklableNamedTuple,
newinstance = PicklableNamedTuple.__new__(PicklableNamedTuple,
*(instance.__getnewargs__()))
self.assertEqual(instance, newinstance)

def test_handles_nested_objects(self):
child = PickleProtocal2Thing(None)
instance = PickleProtocal2Thing(child, child)

encoded = jsonpickle.encode(instance)
decoded = jsonpickle.decode(encoded)

self.assertEqual(PickleProtocal2Thing, decoded.__class__)
self.assertEqual(PickleProtocal2Thing, decoded.args[0].__class__)
self.assertEqual(PickleProtocal2Thing, decoded.args[1].__class__)
self.assertTrue(decoded.args[0] is decoded.args[1])


def suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(PicklingTestCase))
suite.addTest(unittest.makeSuite(PicklingProtocol2TestCase))
suite.addTest(unittest.makeSuite(JSONPickleTestCase))
suite.addTest(doctest.DocTestSuite(jsonpickle.pickler))
suite.addTest(doctest.DocTestSuite(jsonpickle.unpickler))
Expand Down

0 comments on commit d6ce6b2

Please sign in to comment.