spyoungtech · spyoungtech · May 25, 2020 · May 25, 2020 · May 25, 2020 · May 25, 2020
diff --git a/json5/dumper.py b/json5/dumper.py
@@ -224,7 +224,7 @@ def json_array_to_json(self, node):
     @to_json(Identifier)
     def identifier_to_json(self, node):
         self.process_wsc_before(node)
-        self.env.write(node.name)
+        self.env.write(node.raw_value)
         self.process_wsc_after(node)
 
     @to_json(Integer)

diff --git a/json5/model.py b/json5/model.py
@@ -64,10 +64,13 @@ def __init__(self, key, value):
 
 
 class Identifier(Key):
-    def __init__(self, name):
+    def __init__(self, name, raw_value=None):
         assert isinstance(name, str)
+        if raw_value is None:
+            raw_value = name
+        assert isinstance(raw_value, str)
         assert len(name) > 0
-        super().__init__(name=name)
+        super().__init__(name=name, raw_value=raw_value)
 
     def __hash__(self):
         return hash(self.name)

diff --git a/json5/parser.py b/json5/parser.py
@@ -43,19 +43,26 @@ def replace_escape_literals(matchobj):
 
 @lru_cache(maxsize=1024)
 def _latin_escape_replace(s):
-    if len(s) != 4:
+    if s.startswith('\\x') and len(s) != 4:
         raise JSON5DecodeError("'\\x' MUST be followed by two hexadecimal digits", None)
     val = ast.literal_eval(f'"{s}"')
     if val == '\\':
         val = '\\\\'  # this is important; the subsequent regex will sub it back to \\
     return val
 
 
-def latin_escape_replace(matchobj):
+def latin_unicode_escape_replace(matchobj):
     s = matchobj.group(0)
     return _latin_escape_replace(s)
 
 
+def _unicode_escape_replace(s):
+    return ast.literal_eval(f'"{s}"')
+
+def unicode_escape_replace(matchobj):
+    s = matchobj.group(0)
+    return _unicode_escape_replace(s)
+
 class JSONParser(Parser):
     # debugfile = 'parser.out'
     tokens = JSONLexer.tokens
@@ -182,7 +189,9 @@ def json_array(self, p):
 
     @_('NAME')
     def identifier(self, p):
-        return Identifier(name=p[0])
+        raw_value = p[0]
+        name = re.sub(r'\\u[0-9a-fA-F]{4}', unicode_escape_replace, raw_value)
+        return Identifier(name=name, raw_value=raw_value)
 
     @_('identifier',
        'string')
@@ -237,7 +246,7 @@ def double_quoted_string(self, p):
             self.errors.append(JSON5DecodeError(errmsg, p._slice[0]))
         contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents)
         try:
-            contents = re.sub(r'\\x[a-fA-F0-9]{0,2}', latin_escape_replace, contents)
+            contents = re.sub(r'(\\x[a-fA-F0-9]{0,2}|\\u[0-9a-fA-F]{4})', latin_unicode_escape_replace, contents)
         except JSON5DecodeError as exc:
             self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
         try:
@@ -255,7 +264,7 @@ def single_quoted_string(self, p):
             self.errors.append(JSON5DecodeError(errmsg, p._slice[0]))
         contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents)
         try:
-            contents = re.sub(r'\\x[a-fA-F0-9]{0,2}', latin_escape_replace, contents)
+            contents = re.sub(r'(\\x[a-fA-F0-9]{0,2}|\\u[0-9a-fA-F]{4})', latin_unicode_escape_replace, contents)
         except JSON5DecodeError as exc:
             self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
         try:

diff --git a/json5/tokenizer.py b/json5/tokenizer.py
@@ -76,7 +76,7 @@ def WHITESPACE(self, tok):
     HEXADECIMAL = r'0(x|X)[0-9a-fA-F]+'
     FLOAT = r'(\d+\.\d*)|(\d*\.\d+)'      # 23.45
     INTEGER = r'\d+'
-    NAME = r'[a-zA-Z_\$]([a-zA-Z_\d\$])*'
+    NAME = r'[\w_\$\\]([\w_\d\$\\])*'
 
     NAME['true'] = TRUE
     NAME['false'] = FALSE

diff --git a/tests/test_json5_load.py b/tests/test_json5_load.py
@@ -291,3 +291,11 @@ def test_load_latin_escape():
 
 def test_latin_escape_backslash_is_not_real_backslack():
     assert loads("""'\\x5C01'""") == "\\01"
+
+def test_escape_unicode():
+    json_string = """
+    {
+        sig\\u03A3ma: "\\u03A3 is the sum of all things"
+    }
+    """
+    assert loads(json_string) == {"sig\u03A3ma": "\u03A3 is the sum of all things"}
diff --git a/tests/test_json5_official_tests.py b/tests/test_json5_official_tests.py
@@ -1,4 +1,6 @@
-from json5 import loads, load, JSON5DecodeError
+from json5 import loads, load, JSON5DecodeError, dumps
+from json5.loader import ModelLoader
+from json5.dumper import ModelDumper
 import os
 import pytest
 from io import open
@@ -21,13 +23,15 @@ def test_official_files(fp):
     if not os.path.exists(tests_path):
         pytest.mark.skip("Tests repo was not present in expected location. Skipping.")
         return
-    try:
-        load(open(fp, encoding='utf-8'))
-    except JSON5DecodeError:
-        if 'todo' in fp:
-            pytest.mark.xfail("TODO files expected to fail")
-        else:
-            raise
+    load(open(fp, encoding='utf-8'))
+
+@pytest.mark.parametrize('fp', specs)
+def test_official_files_rt(fp):
+    if not os.path.exists(tests_path):
+        pytest.mark.skip("Tests repo was not present in expected location. Skipping.")
+    with open(fp, encoding='utf-8') as f:
+        json_string = f.read()
+    assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string
 
 @pytest.mark.parametrize(('input_file', 'expected'), error_specs)
 def test_official_error_specs(input_file, expected):

diff --git a/tests/test_json_helpers.py b/tests/test_json_helpers.py
@@ -1,13 +1,16 @@
 from json5.model import Identifier
 from json5.dumper import modelize
 def test_identifier_can_hash_like_string():
-    d = {Identifier('foo'): 'bar'}
+    d = {Identifier('foo', raw_value='foo'): 'bar'}
     assert d['foo'] == 'bar'
 
 def test_identifier_equals_like_string():
-    assert Identifier('foo') == 'foo'
+    assert Identifier('foo', raw_value='foo') == 'foo'
 
 
 def test_repr_does_not_contain_wsc():
     model = modelize({'foo': 'bar'})
     assert 'wsc' not in repr(model)
+
+def test_identifier_does_not_need_explicit_raw_value():
+    assert Identifier('foo').raw_value == 'foo'