Skip to content

Commit

Permalink
Merge pull request #36 from transifex/json_escape_unescape
Browse files Browse the repository at this point in the history
Use unicode (un)escaping for the JSON format, like the json module does
  • Loading branch information
tabac committed Apr 22, 2016
2 parents 21a3f44 + 1cb6af9 commit ef6001e
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 4 deletions.
28 changes: 24 additions & 4 deletions openformats/formats/json.py
@@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-

from __future__ import absolute_import

import json
Expand Down Expand Up @@ -255,12 +257,30 @@ def _get_next_string(self):

@staticmethod
def escape(string):
escaped_string = string.replace('\\', r'\\').replace('"', '\\"')

try:
# We assume string is unicode and unescaped, eg u'θ':
# - First we convert to unicode-escaped-string, eg '\u03b8'
# - Then we convert back to unicode, using ascii; unicode-escaped
# strings are always ascii
# - unicode_escape will take care of escaping the backslashes (\),
# we only need to escape the double quotes after that
escaped_string = string.encode('unicode_escape').decode('ascii')
except (UnicodeEncodeError, UnicodeDecodeError):
escaped_string = string.replace('\\', r'\\')

escaped_string = escaped_string.replace('"', '\\"')
return escaped_string

@staticmethod
def unescape(string):
unescaped_string = string.replace(r'\\', '\\').replace(r'\"', '"')

try:
# We assume string is unicode and unicode-escaped, eg u'\u03b8'
# - First we convert it to str, using ascii; unicode-escaped
# strings are always ascii
# - Then we convert it to unicode, unescaped, eg 'θ'
unescaped_string = string.encode('ascii').decode('unicode_escape')
except (UnicodeEncodeError, UnicodeDecodeError):
unescaped_string = string.replace(r'\\', '\\')

unescaped_string = unescaped_string.replace(r'\"', '"')
return unescaped_string
15 changes: 15 additions & 0 deletions openformats/tests/formats/beta_keyvaluejson/test_json.py
@@ -1,3 +1,6 @@
# -*- coding: utf-8 -*-

import json
import unittest

from openformats.strings import OpenString
Expand Down Expand Up @@ -320,8 +323,20 @@ def test_escape_json(self):
"a \\\\ string. with \\\"quotes\\\""
)

self.assertEqual(self.handler.escape(u'καλημέρα'),
self._escape_with_json(u'καλημέρα'))

def test_unescape_json(self):
self.assertEqual(
self.handler.unescape("a \\\\ string. with \\\"quotes\\\""),
"a \\ string. with \"quotes\""
)

self.assertEqual(
self.handler.unescape(self._escape_with_json(u'καλημέρα')),
u'καλημέρα'
)

@staticmethod
def _escape_with_json(s):
return json.dumps(s)[1:-1].decode()

0 comments on commit ef6001e

Please sign in to comment.