Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't raise exceptions on bad unicode decode. #127

Merged
merged 11 commits into from
Jul 20, 2020
4 changes: 3 additions & 1 deletion src/genpy/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@ def string_serializer_generator(package, type_, name, serialize): # noqa: D401
yield '%s = str[start:end]' % (var)
else:
yield 'if python3:'
yield INDENT+"%s = str[start:end].decode('utf-8')" % (var) # If messages are python3-decode back to unicode
yield INDENT+"%s = str[start:end].decode('utf-8', 'rosmsg')" % (var) # If messages are python3-decode back to unicode
yield 'else:'
yield INDENT+'%s = str[start:end]' % (var)

Expand Down Expand Up @@ -756,6 +756,7 @@ def deserialize_fn_generator(msg_context, spec, is_numpy=False): # noqa: D401
:param is_numpy: if True, generate serializer code for numpy
datatypes instead of Python lists, ``bool``
"""
yield 'codecs.lookup_error("rosmsg").msg_type = self._type'
yield 'try:'
package = spec.package
# Instantiate embedded type classes
Expand Down Expand Up @@ -816,6 +817,7 @@ def msg_generator(msg_context, spec, search_path):

yield '# This Python file uses the following encoding: utf-8'
yield '"""autogenerated by genpy from %s.msg. Do not edit."""' % spec.full_name
yield 'import codecs'
yield 'import sys'
yield 'python3 = True if sys.hexversion > 0x03000000 else False'
yield 'import genpy\nimport struct\n'
Expand Down
21 changes: 21 additions & 0 deletions src/genpy/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
libraries for type checking and retrieving message classes by type name.
"""

import codecs
import itertools
import math
import struct
Expand Down Expand Up @@ -65,6 +66,26 @@

struct_I = struct.Struct('<I')

_warned_decoding_error = set()

# Notify the user while not crashing in the face of errors attempting
# to decode non-unicode data within a ROS message.
class RosMsgUnicodeErrors:
def __init__(self):
self.msg_type = None

def __call__(self, err):
global _warned_decoding_error
if self.msg_type not in _warned_decoding_error:
_warned_decoding_error.add(self.msg_type)
# Lazy import to avoid this cost in the non-error case.
import logging
logger = logging.getLogger('rosout')
extra = "message %s" % self.msg_type if self.msg_type else "unknown message"
logger.error("Characters replaced when decoding %s (will print only once): %s", extra, err)
return codecs.replace_errors(err)
codecs.register_error('rosmsg', RosMsgUnicodeErrors())


def isstring(s):
"""Small helper version to check an object is a string in a way that works for both Python 2 and 3."""
Expand Down
2 changes: 1 addition & 1 deletion test/files/array/string_fixed_deser.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ for i in range(0, 2):
start = end
end += length
if python3:
val0 = str[start:end].decode('utf-8')
val0 = str[start:end].decode('utf-8', 'rosmsg')
else:
val0 = str[start:end]
data.append(val0)
2 changes: 1 addition & 1 deletion test/files/array/string_varlen_deser.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ for i in range(0, length):
start = end
end += length
if python3:
val0 = str[start:end].decode('utf-8')
val0 = str[start:end].decode('utf-8', 'rosmsg')
else:
val0 = str[start:end]
data.append(val0)
2 changes: 1 addition & 1 deletion test/test_genpy_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def test_string_serializer_generator():
start = end
end += length
if python3:
var_name = str[start:end].decode('utf-8')
var_name = str[start:end].decode('utf-8', 'rosmsg')
else:
var_name = str[start:end]"""
# string serializer and array serializer are identical
Expand Down
24 changes: 24 additions & 0 deletions test/test_genpy_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -738,3 +738,27 @@ def test_serialize_exception_msg(self):
self.assertEqual(str(e), "<class 'struct.error'>: 'required argument is not a float' when writing '1.0'")
except Exception:
assert False, 'This should have raised a genpy.SerializationError instead'

@unittest.skipIf(sys.hexversion < 0x03000000, "Python 3 only test")
def test_deserialize_unicode_error(self):
from genpy.msg import TestString, TestMsgArray

m = TestString()
buff = b'\x00\x00\x00\x04\xF0\x9F\x92\xA9'
self.assertEqual(m.deserialize(buff).data, b'\xF0\x9F\x92\xA9'.decode())

m = TestString()
buff = b'\x00\x00\x00\x04\x41\xff\xfe\x42'
with self.assertLogs('rosout', level='ERROR') as cm:
self.assertEqual(m.deserialize(buff).data, 'A\ufffd\ufffdB')
self.assertEqual(m.deserialize(buff).data, 'A\ufffd\ufffdB')
self.assertEqual(len(cm.output), 1)
self.assertIn("Characters replaced when decoding message genpy/TestString (will print only once)", cm.output[0])

m = TestMsgArray()
buff = b'\x00\x00\x00\x00\x00\x00\x00\x04\x41\xff\xfe\x42'
with self.assertLogs('rosout', level='ERROR') as cm:
self.assertEqual(m.deserialize(buff).fixed_strings[0].data, 'A\ufffd\ufffdB')
self.assertEqual(m.deserialize(buff).fixed_strings[0].data, 'A\ufffd\ufffdB')
self.assertEqual(len(cm.output), 1)
self.assertIn("Characters replaced when decoding message genpy/TestMsgArray (will print only once)", cm.output[0])