Skip to content

Commit

Permalink
Merge 904b423 into 35d7ddc
Browse files Browse the repository at this point in the history
  • Loading branch information
itamarst committed Apr 28, 2021
2 parents 35d7ddc + 904b423 commit 9ce0748
Show file tree
Hide file tree
Showing 9 changed files with 126 additions and 43 deletions.
Empty file added newsfragments/3672.minor
Empty file.
4 changes: 2 additions & 2 deletions src/allmydata/test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,5 +125,5 @@ def _listening(lp):
initialize()

from eliot import to_file
from allmydata.util.jsonbytes import BytesJSONEncoder
to_file(open("eliot.log", "wb"), encoder=BytesJSONEncoder)
from allmydata.util.jsonbytes import AnyBytesJSONEncoder
to_file(open("eliot.log", "wb"), encoder=AnyBytesJSONEncoder)
4 changes: 2 additions & 2 deletions src/allmydata/test/eliotutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
MonkeyPatcher,
)

from ..util.jsonbytes import BytesJSONEncoder
from ..util.jsonbytes import AnyBytesJSONEncoder


_NAME = Field.for_types(
Expand All @@ -76,7 +76,7 @@
if PY2:
_memory_logger = MemoryLogger
else:
_memory_logger = lambda: MemoryLogger(encoder=BytesJSONEncoder)
_memory_logger = lambda: MemoryLogger(encoder=AnyBytesJSONEncoder)


@attr.s
Expand Down
4 changes: 2 additions & 2 deletions src/allmydata/test/test_eliotutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
_parse_destination_description,
_EliotLogging,
)
from ..util.jsonbytes import BytesJSONEncoder
from ..util.jsonbytes import AnyBytesJSONEncoder

from .common import (
SyncTestCase,
Expand Down Expand Up @@ -109,7 +109,7 @@ def test_stdout(self):
reactor = object()
self.assertThat(
_parse_destination_description("file:-")(reactor),
Equals(FileDestination(stdout, encoder=BytesJSONEncoder)),
Equals(FileDestination(stdout, encoder=AnyBytesJSONEncoder)),
)


Expand Down
37 changes: 34 additions & 3 deletions src/allmydata/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,10 +495,10 @@ def test_convert(self):


class JSONBytes(unittest.TestCase):
"""Tests for BytesJSONEncoder."""
"""Tests for jsonbytes module."""

def test_encode_bytes(self):
"""BytesJSONEncoder can encode bytes.
"""jsonbytes.dumps() encodes bytes.
Bytes are presumed to be UTF-8 encoded.
"""
Expand All @@ -515,7 +515,7 @@ def test_encode_bytes(self):
self.assertEqual(jsonbytes.loads(encoded), expected)

def test_encode_unicode(self):
"""BytesJSONEncoder encodes Unicode string as usual."""
"""jsonbytes.dumps() encodes Unicode string as usual."""
expected = {
u"hello": [1, u"cd"],
}
Expand All @@ -529,6 +529,37 @@ def test_dumps_bytes(self):
self.assertIsInstance(encoded, bytes)
self.assertEqual(json.loads(encoded, encoding="utf-8"), x)

def test_any_bytes_unsupported_by_default(self):
"""By default non-UTF-8 bytes raise error."""
bytestring = b"abc\xff\x00"
with self.assertRaises(UnicodeDecodeError):
jsonbytes.dumps(bytestring)
with self.assertRaises(UnicodeDecodeError):
jsonbytes.dumps_bytes(bytestring)
with self.assertRaises(UnicodeDecodeError):
json.dumps(bytestring, cls=jsonbytes.UTF8BytesJSONEncoder)

def test_any_bytes(self):
"""If any_bytes is True, non-UTF-8 bytes don't break encoding."""
bytestring = b"abc\xff\xff123"
o = {bytestring: bytestring}
expected = {"abc\\xff\\xff123": "abc\\xff\\xff123"}
self.assertEqual(
json.loads(jsonbytes.dumps(o, any_bytes=True)),
expected,
)
self.assertEqual(
json.loads(json.dumps(
o, cls=jsonbytes.AnyBytesJSONEncoder)),
expected,
)
self.assertEqual(
json.loads(jsonbytes.dumps(o, any_bytes=True),
encoding="utf-8"),
expected,
)



class FakeGetVersion(object):
"""Emulate an object with a get_version."""
Expand Down
6 changes: 3 additions & 3 deletions src/allmydata/test/web/test_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def tearDown(self):
@inlineCallbacks
def test_one_log(self):
"""
Write a single Eliot log actin and see it streamed via websocket.
Write a single Eliot log action and see it streamed via websocket.
"""

proto = yield self.agent.open(
Expand All @@ -109,15 +109,15 @@ def got_message(msg, is_binary=False):
def do_a_thing(arguments):
pass

do_a_thing(arguments=[u"hello", b"good-day", 123, {"a": 35}, [None]])
do_a_thing(arguments=[u"hello", b"good-\xff-day", 123, {"a": 35}, [None]])

proto.transport.loseConnection()
yield proto.is_closed

self.assertEqual(len(messages), 2)
self.assertEqual(messages[0]["action_type"], "test:cli:some-exciting-action")
self.assertEqual(messages[0]["arguments"],
["hello", "good-day", 123, {"a": 35}, [None]])
["hello", "good-\\xff-day", 123, {"a": 35}, [None]])
self.assertEqual(messages[1]["action_type"], "test:cli:some-exciting-action")
self.assertEqual("started", messages[0]["action_status"])
self.assertEqual("succeeded", messages[1]["action_status"])
6 changes: 3 additions & 3 deletions src/allmydata/util/eliotutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
)
from twisted.application.service import Service

from .jsonbytes import BytesJSONEncoder
from .jsonbytes import AnyBytesJSONEncoder


def validateInstanceOf(t):
Expand Down Expand Up @@ -306,7 +306,7 @@ def get_file():
rotateLength=rotate_length,
maxRotatedFiles=max_rotated_files,
)
return lambda reactor: FileDestination(get_file(), BytesJSONEncoder)
return lambda reactor: FileDestination(get_file(), AnyBytesJSONEncoder)


_parse_destination_description = _DestinationParser().parse
Expand All @@ -333,4 +333,4 @@ def logged_f(*a, **kw):
if PY2:
capture_logging = eliot_capture_logging
else:
capture_logging = partial(eliot_capture_logging, encoder_=BytesJSONEncoder)
capture_logging = partial(eliot_capture_logging, encoder_=AnyBytesJSONEncoder)
103 changes: 79 additions & 24 deletions src/allmydata/util/jsonbytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,45 +14,100 @@
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401

import json
import codecs

if PY2:
def backslashreplace_py2(ex):
"""
On Python 2 'backslashreplace' error handler doesn't work, so write our
own.
"""
return ''.join('\\x{:02x}'.format(ord(c))
for c in ex.object[ex.start:ex.end]), ex.end

codecs.register_error("backslashreplace_tahoe_py2", backslashreplace_py2)


def _bytes_to_unicode(any_bytes, obj):
"""Create a function that recursively converts bytes to unicode.
:param any_bytes: If True, also support non-UTF-8-encoded bytes.
:param obj: Object to de-byte-ify.
"""
errors = "backslashreplace" if any_bytes else "strict"
if PY2 and errors == "backslashreplace":
errors = "backslashreplace_tahoe_py2"

def doit(obj):
"""Convert any bytes objects to unicode, recursively."""
if isinstance(obj, bytes):
return obj.decode("utf-8", errors=errors)
if isinstance(obj, dict):
new_obj = {}
for k, v in obj.items():
if isinstance(k, bytes):
k = k.decode("utf-8", errors=errors)
v = doit(v)
new_obj[k] = v
return new_obj
if isinstance(obj, (list, set, tuple)):
return [doit(i) for i in obj]
return obj

return doit(obj)


class UTF8BytesJSONEncoder(json.JSONEncoder):
"""
A JSON encoder than can also encode UTF-8 encoded strings.
"""
def encode(self, o, **kwargs):
return json.JSONEncoder.encode(
self, _bytes_to_unicode(False, o), **kwargs)

def iterencode(self, o, **kwargs):
return json.JSONEncoder.iterencode(
self, _bytes_to_unicode(False, o), **kwargs)


def _bytes_to_unicode(obj):
"""Convert any bytes objects to unicode, recursively."""
if isinstance(obj, bytes):
return obj.decode("utf-8")
if isinstance(obj, dict):
new_obj = {}
for k, v in obj.items():
if isinstance(k, bytes):
k = k.decode("utf-8")
v = _bytes_to_unicode(v)
new_obj[k] = v
return new_obj
if isinstance(obj, (list, set, tuple)):
return [_bytes_to_unicode(i) for i in obj]
return obj


class BytesJSONEncoder(json.JSONEncoder):
class AnyBytesJSONEncoder(json.JSONEncoder):
"""
A JSON encoder than can also encode bytes.
A JSON encoder than can also encode bytes of any sort.
The bytes are assumed to be UTF-8 encoded Unicode strings.
Bytes are decoded to strings using UTF-8, if that fails to decode then the
bytes are quoted.
"""
def encode(self, o, **kwargs):
return json.JSONEncoder.encode(
self, _bytes_to_unicode(True, o), **kwargs)

def iterencode(self, o, **kwargs):
return json.JSONEncoder.iterencode(self, _bytes_to_unicode(o), **kwargs)
return json.JSONEncoder.iterencode(
self, _bytes_to_unicode(True, o), **kwargs)


def dumps(obj, *args, **kwargs):
"""Encode to JSON, supporting bytes as keys or values.
The bytes are assumed to be UTF-8 encoded Unicode strings.
:param bool any_bytes: If False (the default) the bytes are assumed to be
UTF-8 encoded Unicode strings. If True, non-UTF-8 bytes are quoted for
human consumption.
"""
return json.dumps(obj, cls=BytesJSONEncoder, *args, **kwargs)
any_bytes = kwargs.pop("any_bytes", False)
if any_bytes:
cls = AnyBytesJSONEncoder
else:
cls = UTF8BytesJSONEncoder
return json.dumps(obj, cls=cls, *args, **kwargs)


def dumps_bytes(obj, *args, **kwargs):
"""Encode to JSON, then encode as bytes."""
"""Encode to JSON, then encode as bytes.
:param bool any_bytes: If False (the default) the bytes are assumed to be
UTF-8 encoded Unicode strings. If True, non-UTF-8 bytes are quoted for
human consumption.
"""
result = dumps(obj, *args, **kwargs)
if PY3:
result = result.encode("utf-8")
Expand Down
5 changes: 1 addition & 4 deletions src/allmydata/web/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,7 @@ def _received_eliot_log(self, message):
"""
# probably want a try/except around here? what do we do if
# transmission fails or anything else bad happens?
encoded = json.dumps(message)
if isinstance(encoded, str):
# On Python 3 dumps() returns Unicode...
encoded = encoded.encode("utf-8")
encoded = json.dumps_bytes(message, any_bytes=True)
self.sendMessage(encoded)

def onOpen(self):
Expand Down

0 comments on commit 9ce0748

Please sign in to comment.