spyoungtech · spyoungtech · May 23, 2020 · May 23, 2020 · May 23, 2020 · May 23, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -8,6 +8,7 @@ python:
 
 install:
   - pip install sly coverage coveralls
+  - git clone https://github.com/spyoungtech/json5-tests.git
 
 script:
   - coverage run -m pytest tests

diff --git a/json5/parser.py b/json5/parser.py
@@ -7,7 +7,8 @@
 from json5.tokenizer import JSONLexer, tokenize
 from json5.model import *
 from json5.utils import JSON5DecodeError
-
+import ast
+from functools import lru_cache
 
 class QuietSlyLogger(SlyLogger):
     def warning(self, *args, **kwargs):
@@ -32,12 +33,29 @@ def warning(self, *args, **kwargs):
 # class TrailingComma:
 #     pass
 
-
 def replace_escape_literals(matchobj):
+    s = matchobj.group(0)
+    if s.startswith('\\0') and len(s) == 3:
+        raise JSON5DecodeError("'\\0' MUST NOT be followed by a decimal digit", None)
     seq = matchobj.group(1)
     return ESCAPE_SEQUENCES.get(seq, seq)
 
 
+@lru_cache(maxsize=1024)
+def _latin_escape_replace(s):
+    if len(s) != 4:
+        raise JSON5DecodeError("'\\x' MUST be followed by two hexadecimal digits", None)
+    val = ast.literal_eval(f'"{s}"')
+    if val == '\\':
+        val = '\\\\'  # this is important; the subsequent regex will sub it back to \\
+    return val
+
+
+def latin_escape_replace(matchobj):
+    s = matchobj.group(0)
+    return _latin_escape_replace(s)
+
+
 class JSONParser(Parser):
     # debugfile = 'parser.out'
     tokens = JSONLexer.tokens
@@ -218,7 +236,14 @@ def double_quoted_string(self, p):
             errmsg = f"Illegal line terminator without continuation"
             self.errors.append(JSON5DecodeError(errmsg, p._slice[0]))
         contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents)
-        contents = re.sub(r'\\(.)', replace_escape_literals, contents)
+        try:
+            contents = re.sub(r'\\x[a-fA-F0-9]{0,2}', latin_escape_replace, contents)
+        except JSON5DecodeError as exc:
+            self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
+        try:
+            contents = re.sub(r'\\(0\d|.)', replace_escape_literals, contents)
+        except JSON5DecodeError as exc:
+            self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
         return DoubleQuotedString(contents, raw_value=raw_value)
 
     @_("SINGLE_QUOTE_STRING")
@@ -229,7 +254,14 @@ def single_quoted_string(self, p):
             errmsg = f"Illegal line terminator without continuation"
             self.errors.append(JSON5DecodeError(errmsg, p._slice[0]))
         contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents)
-        contents = re.sub(r'\\(.)', replace_escape_literals, contents)
+        try:
+            contents = re.sub(r'\\x[a-fA-F0-9]{0,2}', latin_escape_replace, contents)
+        except JSON5DecodeError as exc:
+            self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
+        try:
+            contents = re.sub(r'\\(0\d|.)', replace_escape_literals, contents)
+        except JSON5DecodeError as exc:
+            self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
         return SingleQuotedString(contents, raw_value=raw_value)
 
     @_('double_quoted_string',

diff --git a/json5/tokenizer.py b/json5/tokenizer.py
@@ -76,7 +76,7 @@ def WHITESPACE(self, tok):
     MINUS = r'\-'
     PLUS = r'\+'
     EXPONENT = r"(e|E)(\-|\+)?\d+"
-    HEXADECIMAL = r'0x[0-9a-fA-F]+'
+    HEXADECIMAL = r'0(x|X)[0-9a-fA-F]+'
     FLOAT = r'(\d+\.\d*)|(\d*\.\d+)'      # 23.45
     INTEGER = r'\d+'
     NAME = r'[a-zA-Z_\$]([a-zA-Z_\d\$])*'

diff --git a/tests/test_errors.py b/tests/test_errors.py
@@ -87,3 +87,24 @@ def test_empty_input_raises_error():
     with pytest.raises(JSON5DecodeError) as exc_info:
         loads("")
     assert "unexpected EOF" in str(exc_info.value)
+
+
+def test_backslash_x_without_two_hexadecimals_raises_error():
+    with pytest.raises(JSON5DecodeError) as exc_info:
+        loads(r"'\x1'")
+    assert "'\\x' MUST be followed by two hexadecimal digits" in str(exc_info.value)
+
+def test_null_escape_may_not_be_followed_by_decimal_digit():
+    with pytest.raises(JSON5DecodeError) as exc_info:
+        loads(r"'\01'")
+    assert "'\\0' MUST NOT be followed by a decimal digit" in str(exc_info.value)
+
+def test_backslash_x_without_two_hexadecimals_raises_error_but_for_double_quotes():
+    with pytest.raises(JSON5DecodeError) as exc_info:
+        loads(r'"\x1"')
+    assert "'\\x' MUST be followed by two hexadecimal digits" in str(exc_info.value)
+
+def test_null_escape_may_not_be_followed_by_decimal_digit_but_for_double_quotes():
+    with pytest.raises(JSON5DecodeError) as exc_info:
+        loads(r'"\01"')
+    assert "'\\0' MUST NOT be followed by a decimal digit" in str(exc_info.value)
diff --git a/tests/test_json5_load.py b/tests/test_json5_load.py
@@ -283,3 +283,11 @@ def test_load_object_with_additional_comments():
     }
     """
     assert loads(json_string) == {'foo': 'bar', 'bacon': 'eggs'}
+
+
+def test_load_latin_escape():
+    json_string = r'"\x5C"'
+    assert loads(json_string) == '\\'
+
+def test_latin_escape_backslash_is_not_real_backslack():
+    assert loads("""'\\x5C01'""") == "\\01"
diff --git a/tests/test_json5_official_tests.py b/tests/test_json5_official_tests.py
@@ -0,0 +1,41 @@
+from json5 import loads, load, JSON5DecodeError
+import os
+import pytest
+from io import open
+
+tests_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../json5-tests'))
+
+error_specs = []
+specs = []
+
+for root,dirs,files in os.walk(tests_path):
+    for f in files:
+        if f.endswith('.json5') or f.endswith('.json'):
+            specs.append(os.path.join(root, f))
+        elif f.endswith('.txt') or f.endswith('.js'):
+            error_spec = f.replace('.txt', '.errorSpec').replace('.js', '.errorSpec')
+            error_specs.append((os.path.join(root, f), os.path.join(root, error_spec)))
+
+@pytest.mark.parametrize('fp', specs)
+def test_official_files(fp):
+    if not os.path.exists(tests_path):
+        pytest.mark.skip("Tests repo was not present in expected location. Skipping.")
+        return
+    try:
+        load(open(fp, encoding='utf-8'))
+    except JSON5DecodeError:
+        if 'todo' in fp:
+            pytest.mark.xfail("TODO files expected to fail")
+        else:
+            raise
+
+@pytest.mark.parametrize(('input_file', 'expected'), error_specs)
+def test_official_error_specs(input_file, expected):
+    if not os.path.exists(tests_path):
+        pytest.mark.skip("Tests repo was not present in expected location. Skipping.")
+        return
+    if 'octal' in input_file:
+        pytest.mark.xfail("Octals are dumb")
+        return
+    with pytest.raises(JSON5DecodeError) as exc_info:
+        load(open(input_file, encoding='utf-8'))