diff --git a/openformats/formats/json.py b/openformats/formats/json.py index 1fcb9a59..8f60333c 100644 --- a/openformats/formats/json.py +++ b/openformats/formats/json.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from __future__ import absolute_import import json @@ -255,12 +257,30 @@ def _get_next_string(self): @staticmethod def escape(string): - escaped_string = string.replace('\\', r'\\').replace('"', '\\"') - + try: + # We assume string is unicode and unescaped, eg u'θ': + # - First we convert to unicode-escaped-string, eg '\u03b8' + # - Then we convert back to unicode, using ascii; unicode-escaped + # strings are always ascii + # - unicode_escape will take care of escaping the backslashes (\), + # we only need to escape the double quotes after that + escaped_string = string.encode('unicode_escape').decode('ascii') + except (UnicodeEncodeError, UnicodeDecodeError): + escaped_string = string.replace('\\', r'\\') + + escaped_string = escaped_string.replace('"', '\\"') return escaped_string @staticmethod def unescape(string): - unescaped_string = string.replace(r'\\', '\\').replace(r'\"', '"') - + try: + # We assume string is unicode and unicode-escaped, eg u'\u03b8' + # - First we convert it to str, using ascii; unicode-escaped + # strings are always ascii + # - Then we convert it to unicode, unescaped, eg 'θ' + unescaped_string = string.encode('ascii').decode('unicode_escape') + except (UnicodeEncodeError, UnicodeDecodeError): + unescaped_string = string.replace(r'\\', '\\') + + unescaped_string = unescaped_string.replace(r'\"', '"') return unescaped_string diff --git a/openformats/tests/formats/beta_keyvaluejson/test_json.py b/openformats/tests/formats/beta_keyvaluejson/test_json.py index 1b3874e5..4c1b91f9 100644 --- a/openformats/tests/formats/beta_keyvaluejson/test_json.py +++ b/openformats/tests/formats/beta_keyvaluejson/test_json.py @@ -1,3 +1,6 @@ +# -*- coding: utf-8 -*- + +import json import unittest from openformats.strings import OpenString @@ -320,8 +323,20 @@ def test_escape_json(self): "a \\\\ string. with \\\"quotes\\\"" ) + self.assertEqual(self.handler.escape(u'καλημέρα'), + self._escape_with_json(u'καλημέρα')) + def test_unescape_json(self): self.assertEqual( self.handler.unescape("a \\\\ string. with \\\"quotes\\\""), "a \\ string. with \"quotes\"" ) + + self.assertEqual( + self.handler.unescape(self._escape_with_json(u'καλημέρα')), + u'καλημέρα' + ) + + @staticmethod + def _escape_with_json(s): + return json.dumps(s)[1:-1].decode()