Skip to content

Commit

Permalink
pickler: handle arbitrary bytestreams using base64
Browse files Browse the repository at this point in the history
The quoted-printable encoding seems to fall down under certain
scenarios, e.g. HDF5 data, so make our bytes transport robust by using
base64 to encode bytes for json.

Use a new "B64" tag in the data stream to allow for
backwards-compatibility.  Support for decoding the old "BYTES"
tag remains intact.  New versions of jsonpickle now emit a new
"B64" tag when bytes are encountered.

Add a new unit test with the example HDF5 data to ensure that we
will forever support it.

Closes #143
Reported-by: Jason Liang <jasonzliang@berkeley.edu>
Signed-off-by: David Aguilar <davvid@gmail.com>
  • Loading branch information
davvid committed Mar 19, 2016
1 parent 4f05e76 commit 16e7945
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 10 deletions.
5 changes: 5 additions & 0 deletions docs/source/changelog.rst
@@ -1,6 +1,11 @@
Change Log
==========

Version 0.9.4 - Unreleased
--------------------------
* Arbitrary byte streams are now better supported.
(`#143 <https://github.com/jsonpickle/jsonpickle/issues/143>`_).

Version 0.9.3 - March 9, 2016
-----------------------------
* UUID objects can now be serialized
Expand Down
4 changes: 2 additions & 2 deletions jsonpickle/pickler.py
Expand Up @@ -7,9 +7,9 @@
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution.

import base64
import warnings
import sys
import quopri
from itertools import chain, islice

import jsonpickle.util as util
Expand Down Expand Up @@ -251,7 +251,7 @@ def _flatten_bytestring(self, obj):
return obj.decode('utf-8')
except:
pass
return {tags.BYTES: quopri.encodestring(obj).decode('utf-8')}
return {tags.B64: base64.encodestring(obj).decode('utf-8')}

def _flatten_obj_instance(self, obj):
"""Recursively flatten an instance and return a json-friendly dict
Expand Down
1 change: 1 addition & 0 deletions jsonpickle/tags.py
Expand Up @@ -12,6 +12,7 @@


BYTES = 'py/bytes'
B64 = 'py/b64'
FUNCTION = 'py/function'
ID = 'py/id'
INITARGS = 'py/initargs'
Expand Down
15 changes: 11 additions & 4 deletions jsonpickle/unpickler.py
Expand Up @@ -7,8 +7,9 @@
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution.

import sys
import base64
import quopri
import sys

import jsonpickle.util as util
import jsonpickle.tags as tags
Expand Down Expand Up @@ -128,8 +129,10 @@ def _swap_proxies(self):
self._proxies = []

def _restore(self, obj):
if has_tag(obj, tags.BYTES):
restore = self._restore_bytestring
if has_tag(obj, tags.B64):
restore = self._restore_base64
elif has_tag(obj, tags.BYTES): # Backwards compatibility
restore = self._restore_quopri
elif has_tag(obj, tags.ID):
restore = self._restore_id
elif has_tag(obj, tags.REF): # Backwards compatibility
Expand Down Expand Up @@ -158,7 +161,11 @@ def _restore(self, obj):
restore = lambda x: x
return restore(obj)

def _restore_bytestring(self, obj):
def _restore_base64(self, obj):
return base64.decodestring(obj[tags.B64].encode('utf-8'))

#: For backwards compatibility with bytes data produced by older versions
def _restore_quopri(self, obj):
return quopri.decodestring(obj[tags.BYTES].encode('utf-8'))

def _restore_iterator(self, obj):
Expand Down
26 changes: 22 additions & 4 deletions tests/object_test.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-

import base64
import collections
import decimal
import re
Expand Down Expand Up @@ -762,9 +763,10 @@ def test_bytes_unicode(self):
self.assertEqual(encoded, u1)
self.assertEqual(type(encoded), unicode)
else:
self.assertTrue(encoded == {tags.BYTES: 'foo'})
self.assertTrue(type(encoded[tags.BYTES]) is unicode)
self.assertNotEqual(encoded, u1)
b64ustr= base64.encodestring(b'foo').decode('utf-8')
self.assertEqual({tags.B64: b64ustr}, encoded)
self.assertEqual(type(encoded[tags.B64]), unicode)
decoded = self.unpickler.restore(encoded)
self.assertTrue(decoded == b1)
if PY2:
Expand All @@ -774,13 +776,29 @@ def test_bytes_unicode(self):

# bytestrings that we can't decode to UTF-8 will always be wrapped
encoded = self.pickler.flatten(b2)
self.assertTrue(encoded == {tags.BYTES: 'foo=FF'})
self.assertTrue(type(encoded[tags.BYTES]) is unicode)
self.assertNotEqual(encoded, b2)
b64ustr= base64.encodestring(b'foo\xff').decode('utf-8')
self.assertEqual({tags.B64: b64ustr}, encoded)
self.assertEqual(type(encoded[tags.B64]), unicode)
decoded = self.unpickler.restore(encoded)
self.assertEqual(decoded, b2)
self.assertTrue(type(decoded) is bytes)

def test_backcompat_bytes_quoted_printable(self):
"""Test decoding bytes objects from older jsonpickle versions"""

b1 = b'foo'
b2 = b'foo\xff'

# older versions of jsonpickle used a quoted-printable encoding
expect = b1
actual = self.unpickler.restore({tags.BYTES: unicode('foo')})
self.assertEqual(expect, actual)

expect = b2
actual = self.unpickler.restore({tags.BYTES: unicode('foo=FF')})
self.assertEqual(expect, actual)

def test_nested_objects(self):
obj = ThingWithTimedeltaAttribute(99)
flattened = self.pickler.flatten(obj)
Expand Down
16 changes: 16 additions & 0 deletions tests/stdlib_test.py
Expand Up @@ -28,6 +28,22 @@ def test_known_uuid(self):
self.assertEqual(expect, actual)


class BytesTestCase(unittest.TestCase):

def test_bytestream(self):
expect = (b'\x89HDF\r\n\x1a\n\x00\x00\x00\x00\x00\x08\x08\x00'
b'\x04\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00'
b'\x00\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xffh'
b'\x848\x00\x00\x00\x00\x00\xff\xff\xff\xff\xff\xff'
b'\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00`\x00\x00'
b'\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00'
b'\x00\x88\x00\x00\x00\x00\x00\x00\x00\xa8\x02\x00'
b'\x00\x00\x00\x00\x00\x01\x00\x01\x00')
encoded = jsonpickle.encode(expect)
actual = jsonpickle.decode(encoded)
self.assertEqual(expect, actual)


def suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(UUIDTestCase))
Expand Down

0 comments on commit 16e7945

Please sign in to comment.