Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion json5/dumper.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def json_array_to_json(self, node):
@to_json(Identifier)
def identifier_to_json(self, node):
self.process_wsc_before(node)
self.env.write(node.name)
self.env.write(node.raw_value)
self.process_wsc_after(node)

@to_json(Integer)
Expand Down
7 changes: 5 additions & 2 deletions json5/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,13 @@ def __init__(self, key, value):


class Identifier(Key):
def __init__(self, name):
def __init__(self, name, raw_value=None):
assert isinstance(name, str)
if raw_value is None:
raw_value = name
assert isinstance(raw_value, str)
assert len(name) > 0
super().__init__(name=name)
super().__init__(name=name, raw_value=raw_value)

def __hash__(self):
return hash(self.name)
Expand Down
19 changes: 14 additions & 5 deletions json5/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,26 @@ def replace_escape_literals(matchobj):

@lru_cache(maxsize=1024)
def _latin_escape_replace(s):
if len(s) != 4:
if s.startswith('\\x') and len(s) != 4:
raise JSON5DecodeError("'\\x' MUST be followed by two hexadecimal digits", None)
val = ast.literal_eval(f'"{s}"')
if val == '\\':
val = '\\\\' # this is important; the subsequent regex will sub it back to \\
return val


def latin_escape_replace(matchobj):
def latin_unicode_escape_replace(matchobj):
s = matchobj.group(0)
return _latin_escape_replace(s)


def _unicode_escape_replace(s):
return ast.literal_eval(f'"{s}"')

def unicode_escape_replace(matchobj):
s = matchobj.group(0)
return _unicode_escape_replace(s)

class JSONParser(Parser):
# debugfile = 'parser.out'
tokens = JSONLexer.tokens
Expand Down Expand Up @@ -182,7 +189,9 @@ def json_array(self, p):

@_('NAME')
def identifier(self, p):
return Identifier(name=p[0])
raw_value = p[0]
name = re.sub(r'\\u[0-9a-fA-F]{4}', unicode_escape_replace, raw_value)
return Identifier(name=name, raw_value=raw_value)

@_('identifier',
'string')
Expand Down Expand Up @@ -237,7 +246,7 @@ def double_quoted_string(self, p):
self.errors.append(JSON5DecodeError(errmsg, p._slice[0]))
contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents)
try:
contents = re.sub(r'\\x[a-fA-F0-9]{0,2}', latin_escape_replace, contents)
contents = re.sub(r'(\\x[a-fA-F0-9]{0,2}|\\u[0-9a-fA-F]{4})', latin_unicode_escape_replace, contents)
except JSON5DecodeError as exc:
self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
try:
Expand All @@ -255,7 +264,7 @@ def single_quoted_string(self, p):
self.errors.append(JSON5DecodeError(errmsg, p._slice[0]))
contents = re.sub(r'\\(\r\n|[\u000A\u000D\u2028\u2029])', '', contents)
try:
contents = re.sub(r'\\x[a-fA-F0-9]{0,2}', latin_escape_replace, contents)
contents = re.sub(r'(\\x[a-fA-F0-9]{0,2}|\\u[0-9a-fA-F]{4})', latin_unicode_escape_replace, contents)
except JSON5DecodeError as exc:
self.errors.append(JSON5DecodeError(exc.args[0], p._slice[0]))
try:
Expand Down
2 changes: 1 addition & 1 deletion json5/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def WHITESPACE(self, tok):
HEXADECIMAL = r'0(x|X)[0-9a-fA-F]+'
FLOAT = r'(\d+\.\d*)|(\d*\.\d+)' # 23.45
INTEGER = r'\d+'
NAME = r'[a-zA-Z_\$]([a-zA-Z_\d\$])*'
NAME = r'[\w_\$\\]([\w_\d\$\\])*'

NAME['true'] = TRUE
NAME['false'] = FALSE
Expand Down
8 changes: 8 additions & 0 deletions tests/test_json5_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,11 @@ def test_load_latin_escape():

def test_latin_escape_backslash_is_not_real_backslack():
assert loads("""'\\x5C01'""") == "\\01"

def test_escape_unicode():
json_string = """
{
sig\\u03A3ma: "\\u03A3 is the sum of all things"
}
"""
assert loads(json_string) == {"sig\u03A3ma": "\u03A3 is the sum of all things"}
20 changes: 12 additions & 8 deletions tests/test_json5_official_tests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from json5 import loads, load, JSON5DecodeError
from json5 import loads, load, JSON5DecodeError, dumps
from json5.loader import ModelLoader
from json5.dumper import ModelDumper
import os
import pytest
from io import open
Expand All @@ -21,13 +23,15 @@ def test_official_files(fp):
if not os.path.exists(tests_path):
pytest.mark.skip("Tests repo was not present in expected location. Skipping.")
return
try:
load(open(fp, encoding='utf-8'))
except JSON5DecodeError:
if 'todo' in fp:
pytest.mark.xfail("TODO files expected to fail")
else:
raise
load(open(fp, encoding='utf-8'))

@pytest.mark.parametrize('fp', specs)
def test_official_files_rt(fp):
if not os.path.exists(tests_path):
pytest.mark.skip("Tests repo was not present in expected location. Skipping.")
with open(fp, encoding='utf-8') as f:
json_string = f.read()
assert dumps(loads(json_string, loader=ModelLoader()), dumper=ModelDumper()) == json_string

@pytest.mark.parametrize(('input_file', 'expected'), error_specs)
def test_official_error_specs(input_file, expected):
Expand Down
7 changes: 5 additions & 2 deletions tests/test_json_helpers.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
from json5.model import Identifier
from json5.dumper import modelize
def test_identifier_can_hash_like_string():
d = {Identifier('foo'): 'bar'}
d = {Identifier('foo', raw_value='foo'): 'bar'}
assert d['foo'] == 'bar'

def test_identifier_equals_like_string():
assert Identifier('foo') == 'foo'
assert Identifier('foo', raw_value='foo') == 'foo'


def test_repr_does_not_contain_wsc():
model = modelize({'foo': 'bar'})
assert 'wsc' not in repr(model)

def test_identifier_does_not_need_explicit_raw_value():
assert Identifier('foo').raw_value == 'foo'