diff --git a/.travis.yml b/.travis.yml index 1127d23..e1698a3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ python: install: - pip install sly coverage coveralls + - git clone https://github.com/spyoungtech/json5-tests.git script: - coverage run -m pytest tests diff --git a/json5/parser.py b/json5/parser.py index 10935d4..122c0ae 100644 --- a/json5/parser.py +++ b/json5/parser.py @@ -7,7 +7,8 @@ from json5.tokenizer import JSONLexer, tokenize from json5.model import * from json5.utils import JSON5DecodeError - +import ast +from functools import lru_cache class QuietSlyLogger(SlyLogger): def warning(self, *args, **kwargs): @@ -32,12 +33,29 @@ def warning(self, *args, **kwargs): # class TrailingComma: # pass - def replace_escape_literals(matchobj): + s = matchobj.group(0) + if s.startswith('\\0') and len(s) == 3: + raise JSON5DecodeError("'\\0' MUST NOT be followed by a decimal digit", None) seq = matchobj.group(1) return ESCAPE_SEQUENCES.get(seq, seq) +@lru_cache(maxsize=1024) +def _latin_escape_replace(s): + if len(s) != 4: + raise JSON5DecodeError("'\\x' MUST be followed by two hexadecimal digits", None) + val = ast.literal_eval(f'"{s}"') + if val == '\\': + val = '\\\\' # this is important; the subsequent regex will sub it back to \\ + return val + + +def latin_escape_replace(matchobj): + s = matchobj.group(0) + return _latin_escape_replace(s) + + class JSONParser(Parser): # debugfile = 'parser.out' tokens = JSONLexer.tokens @@ -218,7 +236,14 @@ def double_quoted_string(self, p): errmsg = f"Illegal line terminator without continuation" self.errors.append(JSON5DecodeError(errmsg, p._slice[0])) contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents) - contents = re.sub(r'\\(.)', replace_escape_literals, contents) + try: + contents = re.sub(r'\\x[a-fA-F0-9]{0,2}', latin_escape_replace, contents) + except JSON5DecodeError as exc: + self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0])) + try: + contents = re.sub(r'\\(0\d|.)', replace_escape_literals, contents) + except JSON5DecodeError as exc: + self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0])) return DoubleQuotedString(contents, raw_value=raw_value) @_("SINGLE_QUOTE_STRING") @@ -229,7 +254,14 @@ def single_quoted_string(self, p): errmsg = f"Illegal line terminator without continuation" self.errors.append(JSON5DecodeError(errmsg, p._slice[0])) contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents) - contents = re.sub(r'\\(.)', replace_escape_literals, contents) + try: + contents = re.sub(r'\\x[a-fA-F0-9]{0,2}', latin_escape_replace, contents) + except JSON5DecodeError as exc: + self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0])) + try: + contents = re.sub(r'\\(0\d|.)', replace_escape_literals, contents) + except JSON5DecodeError as exc: + self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0])) return SingleQuotedString(contents, raw_value=raw_value) @_('double_quoted_string', diff --git a/json5/tokenizer.py b/json5/tokenizer.py index 53e1ee5..971b692 100644 --- a/json5/tokenizer.py +++ b/json5/tokenizer.py @@ -76,7 +76,7 @@ def WHITESPACE(self, tok): MINUS = r'\-' PLUS = r'\+' EXPONENT = r"(e|E)(\-|\+)?\d+" - HEXADECIMAL = r'0x[0-9a-fA-F]+' + HEXADECIMAL = r'0(x|X)[0-9a-fA-F]+' FLOAT = r'(\d+\.\d*)|(\d*\.\d+)' # 23.45 INTEGER = r'\d+' NAME = r'[a-zA-Z_\$]([a-zA-Z_\d\$])*' diff --git a/tests/test_errors.py b/tests/test_errors.py index e2d84a4..832c4b4 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -87,3 +87,24 @@ def test_empty_input_raises_error(): with pytest.raises(JSON5DecodeError) as exc_info: loads("") assert "unexpected EOF" in str(exc_info.value) + + +def test_backslash_x_without_two_hexadecimals_raises_error(): + with pytest.raises(JSON5DecodeError) as exc_info: + loads(r"'\x1'") + assert "'\\x' MUST be followed by two hexadecimal digits" in str(exc_info.value) + +def test_null_escape_may_not_be_followed_by_decimal_digit(): + with pytest.raises(JSON5DecodeError) as exc_info: + loads(r"'\01'") + assert "'\\0' MUST NOT be followed by a decimal digit" in str(exc_info.value) + +def test_backslash_x_without_two_hexadecimals_raises_error_but_for_double_quotes(): + with pytest.raises(JSON5DecodeError) as exc_info: + loads(r'"\x1"') + assert "'\\x' MUST be followed by two hexadecimal digits" in str(exc_info.value) + +def test_null_escape_may_not_be_followed_by_decimal_digit_but_for_double_quotes(): + with pytest.raises(JSON5DecodeError) as exc_info: + loads(r'"\01"') + assert "'\\0' MUST NOT be followed by a decimal digit" in str(exc_info.value) diff --git a/tests/test_json5_load.py b/tests/test_json5_load.py index 9d392ad..76c1ad9 100644 --- a/tests/test_json5_load.py +++ b/tests/test_json5_load.py @@ -283,3 +283,11 @@ def test_load_object_with_additional_comments(): } """ assert loads(json_string) == {'foo': 'bar', 'bacon': 'eggs'} + + +def test_load_latin_escape(): + json_string = r'"\x5C"' + assert loads(json_string) == '\\' + +def test_latin_escape_backslash_is_not_real_backslack(): + assert loads("""'\\x5C01'""") == "\\01" diff --git a/tests/test_json5_official_tests.py b/tests/test_json5_official_tests.py new file mode 100644 index 0000000..34d75db --- /dev/null +++ b/tests/test_json5_official_tests.py @@ -0,0 +1,41 @@ +from json5 import loads, load, JSON5DecodeError +import os +import pytest +from io import open + +tests_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../json5-tests')) + +error_specs = [] +specs = [] + +for root,dirs,files in os.walk(tests_path): + for f in files: + if f.endswith('.json5') or f.endswith('.json'): + specs.append(os.path.join(root, f)) + elif f.endswith('.txt') or f.endswith('.js'): + error_spec = f.replace('.txt', '.errorSpec').replace('.js', '.errorSpec') + error_specs.append((os.path.join(root, f), os.path.join(root, error_spec))) + +@pytest.mark.parametrize('fp', specs) +def test_official_files(fp): + if not os.path.exists(tests_path): + pytest.mark.skip("Tests repo was not present in expected location. Skipping.") + return + try: + load(open(fp, encoding='utf-8')) + except JSON5DecodeError: + if 'todo' in fp: + pytest.mark.xfail("TODO files expected to fail") + else: + raise + +@pytest.mark.parametrize(('input_file', 'expected'), error_specs) +def test_official_error_specs(input_file, expected): + if not os.path.exists(tests_path): + pytest.mark.skip("Tests repo was not present in expected location. Skipping.") + return + if 'octal' in input_file: + pytest.mark.xfail("Octals are dumb") + return + with pytest.raises(JSON5DecodeError) as exc_info: + load(open(input_file, encoding='utf-8')) \ No newline at end of file