diff --git a/javaobj.py b/javaobj.py index 79da5f4..821bee7 100644 --- a/javaobj.py +++ b/javaobj.py @@ -126,6 +126,9 @@ def read_to_str(data): """ return ''.join(chr(char) for char in data) + unichr = chr + unicode = str + else: # Python 2 interpreter : str & unicode def to_str(data, encoding="UTF-8"): @@ -981,7 +984,11 @@ def do_array(self, parent=None, ident=0): else: for _ in range(size): res = self._read_value(type_char, ident) - log_debug("Native value: {0}".format(res), ident) + _res = res + # py2 + if str is not unicode and isinstance(res, unicode): + _res = res.encode('ascii', 'replace') + log_debug("Native value: {0}".format(_res), ident) array.append(res) return array @@ -1072,16 +1079,15 @@ def _read_value(self, field_type, ident, name=""): # We don't need details for arrays and objects field_type = field_type[0] + _res = None if field_type == self.TYPE_BOOLEAN: (val,) = self._readStruct(">B") res = bool(val) elif field_type == self.TYPE_BYTE: (res,) = self._readStruct(">b") elif field_type == self.TYPE_CHAR: - # TYPE_CHAR is defined by the serialization specification - # but not used in the implementation, so this is - # a hypothetical code - res = bytes(self._readStruct(">bb")).decode("utf-16-be") + _res = self._readStruct(">H")[0] + res = unichr(_res) elif field_type == self.TYPE_SHORT: (res,) = self._readStruct(">h") elif field_type == self.TYPE_INTEGER: @@ -1097,7 +1103,10 @@ def _read_value(self, field_type, ident, name=""): else: raise RuntimeError("Unknown typecode: {0}".format(field_type)) - log_debug("* {0} {1}: {2}".format(field_type, name, res), ident) + if _res is None: + _res = res + + log_debug("* {0} {1}: {2}".format(field_type, name, _res), ident) return res def _convert_char_to_type(self, type_char): @@ -1513,7 +1522,10 @@ def write_array(self, obj): else: log_debug("Write array of type %s" % type_char) for v in obj: - log_debug("Writing: %s" % v) + _v = v + if str is not unicode and isinstance(v, unicode): + _v = v.encode('ascii', 'replace') + log_debug("Writing: %s" % _v) self._write_value(type_char, v) def _write_value(self, field_type, value): @@ -1531,6 +1543,8 @@ def _write_value(self, field_type, value): self._writeStruct(">B", 1, (1 if value else 0,)) elif field_type == self.TYPE_BYTE: self._writeStruct(">b", 1, (value,)) + elif field_type == self.TYPE_CHAR: + self._writeStruct(">H", 1, (ord(value),)) elif field_type == self.TYPE_SHORT: self._writeStruct(">h", 1, (value,)) elif field_type == self.TYPE_INTEGER: diff --git a/tests/testCharArray.ser b/tests/testCharArray.ser new file mode 100644 index 0000000..1e551fc Binary files /dev/null and b/tests/testCharArray.ser differ diff --git a/tests/tests.py b/tests/tests.py index 3247cf6..cfe358f 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -280,6 +280,13 @@ def test_arrays(self): self._try_marshalling(jobj, pobj) + def test_char_array(self): + jobj = self.read_file("testCharArray.ser") + pobj = javaobj.loads(jobj) + _logger.debug(pobj) + self.assertEqual(pobj, [u'\u0000', u'\ud800', u'\u0001', u'\udc00', u'\u0002', u'\uffff', u'\u0003']) + self._try_marshalling(jobj, pobj) + def test_enums(self): jobj = self.read_file("objEnums.ser") pobj = javaobj.loads(jobj)