Skip to content

Commit 47efb4a

Browse files
Issue #19361: JSON decoder now raises JSONDecodeError instead of ValueError.
1 parent 4e5d9ea commit 47efb4a

File tree

10 files changed

+146
-85
lines changed

10 files changed

+146
-85
lines changed

Doc/library/json.rst

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ Basic Usage
250250
will be passed to the constructor of the class.
251251

252252
If the data being deserialized is not a valid JSON document, a
253-
:exc:`ValueError` will be raised.
253+
:exc:`JSONDecodeError` will be raised.
254254

255255
.. function:: loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw)
256256

@@ -261,7 +261,7 @@ Basic Usage
261261
*encoding* which is ignored and deprecated.
262262

263263
If the data being deserialized is not a valid JSON document, a
264-
:exc:`ValueError` will be raised.
264+
:exc:`JSONDecodeError` will be raised.
265265

266266
Encoders and Decoders
267267
---------------------
@@ -334,13 +334,16 @@ Encoders and Decoders
334334
``'\n'``, ``'\r'`` and ``'\0'``.
335335

336336
If the data being deserialized is not a valid JSON document, a
337-
:exc:`ValueError` will be raised.
337+
:exc:`JSONDecodeError` will be raised.
338338

339339
.. method:: decode(s)
340340

341341
Return the Python representation of *s* (a :class:`str` instance
342342
containing a JSON document)
343343

344+
:exc:`JSONDecodeError` will be raised if the given JSON document is not
345+
valid.
346+
344347
.. method:: raw_decode(s)
345348

346349
Decode a JSON document from *s* (a :class:`str` beginning with a
@@ -469,6 +472,36 @@ Encoders and Decoders
469472
mysocket.write(chunk)
470473

471474

475+
Exceptions
476+
----------
477+
478+
.. exception:: JSONDecodeError(msg, doc, pos, end=None)
479+
480+
Subclass of :exc:`ValueError` with the following additional attributes:
481+
482+
.. attribute:: msg
483+
484+
The unformatted error message.
485+
486+
.. attribute:: doc
487+
488+
The JSON document being parsed.
489+
490+
.. attribute:: pos
491+
492+
The start index of *doc* where parsing failed.
493+
494+
.. attribute:: lineno
495+
496+
The line corresponding to *pos*.
497+
498+
.. attribute:: colno
499+
500+
The column corresponding to *pos*.
501+
502+
.. versionadded:: 3.5
503+
504+
472505
Standard Compliance and Interoperability
473506
----------------------------------------
474507

Doc/whatsnew/3.5.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,9 @@ json
230230
of dictionaries alphabetically by key. (Contributed by Berker Peksag in
231231
:issue:`21650`.)
232232

233+
* JSON decoder now raises :exc:`json.JSONDecodeError` instead of
234+
:exc:`ValueError`. (Contributed by Serhiy Storchaka in :issue:`19361`.)
235+
233236
os
234237
--
235238

Lib/json/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,12 @@
9898
__version__ = '2.0.9'
9999
__all__ = [
100100
'dump', 'dumps', 'load', 'loads',
101-
'JSONDecoder', 'JSONEncoder',
101+
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
102102
]
103103

104104
__author__ = 'Bob Ippolito <bob@redivi.com>'
105105

106-
from .decoder import JSONDecoder
106+
from .decoder import JSONDecoder, JSONDecodeError
107107
from .encoder import JSONEncoder
108108

109109
_default_encoder = JSONEncoder(
@@ -311,7 +311,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
311311
raise TypeError('the JSON object must be str, not {!r}'.format(
312312
s.__class__.__name__))
313313
if s.startswith(u'\ufeff'):
314-
raise ValueError("Unexpected UTF-8 BOM (decode using utf-8-sig)")
314+
raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)",
315+
s, 0)
315316
if (cls is None and object_hook is None and
316317
parse_int is None and parse_float is None and
317318
parse_constant is None and object_pairs_hook is None and not kw):

Lib/json/decoder.py

Lines changed: 41 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
except ImportError:
99
c_scanstring = None
1010

11-
__all__ = ['JSONDecoder']
11+
__all__ = ['JSONDecoder', 'JSONDecodeError']
1212

1313
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
1414

@@ -17,32 +17,30 @@
1717
NegInf = float('-inf')
1818

1919

20-
def linecol(doc, pos):
21-
if isinstance(doc, bytes):
22-
newline = b'\n'
23-
else:
24-
newline = '\n'
25-
lineno = doc.count(newline, 0, pos) + 1
26-
if lineno == 1:
27-
colno = pos + 1
28-
else:
29-
colno = pos - doc.rindex(newline, 0, pos)
30-
return lineno, colno
31-
32-
33-
def errmsg(msg, doc, pos, end=None):
34-
# Note that this function is called from _json
35-
lineno, colno = linecol(doc, pos)
36-
if end is None:
37-
fmt = '{0}: line {1} column {2} (char {3})'
38-
return fmt.format(msg, lineno, colno, pos)
39-
#fmt = '%s: line %d column %d (char %d)'
40-
#return fmt % (msg, lineno, colno, pos)
41-
endlineno, endcolno = linecol(doc, end)
42-
fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
43-
return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
44-
#fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
45-
#return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
20+
class JSONDecodeError(ValueError):
21+
"""Subclass of ValueError with the following additional properties:
22+
23+
msg: The unformatted error message
24+
doc: The JSON document being parsed
25+
pos: The start index of doc where parsing failed
26+
lineno: The line corresponding to pos
27+
colno: The column corresponding to pos
28+
29+
"""
30+
# Note that this exception is used from _json
31+
def __init__(self, msg, doc, pos):
32+
lineno = doc.count('\n', 0, pos) + 1
33+
colno = pos - doc.rfind('\n', 0, pos)
34+
errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
35+
ValueError.__init__(self, errmsg)
36+
self.msg = msg
37+
self.doc = doc
38+
self.pos = pos
39+
self.lineno = lineno
40+
self.colno = colno
41+
42+
def __reduce__(self):
43+
return self.__class__, (self.msg, self.doc, self.pos)
4644

4745

4846
_CONSTANTS = {
@@ -66,7 +64,7 @@ def _decode_uXXXX(s, pos):
6664
except ValueError:
6765
pass
6866
msg = "Invalid \\uXXXX escape"
69-
raise ValueError(errmsg(msg, s, pos))
67+
raise JSONDecodeError(msg, s, pos)
7068

7169
def py_scanstring(s, end, strict=True,
7270
_b=BACKSLASH, _m=STRINGCHUNK.match):
@@ -84,8 +82,7 @@ def py_scanstring(s, end, strict=True,
8482
while 1:
8583
chunk = _m(s, end)
8684
if chunk is None:
87-
raise ValueError(
88-
errmsg("Unterminated string starting at", s, begin))
85+
raise JSONDecodeError("Unterminated string starting at", s, begin)
8986
end = chunk.end()
9087
content, terminator = chunk.groups()
9188
# Content is contains zero or more unescaped string characters
@@ -99,22 +96,21 @@ def py_scanstring(s, end, strict=True,
9996
if strict:
10097
#msg = "Invalid control character %r at" % (terminator,)
10198
msg = "Invalid control character {0!r} at".format(terminator)
102-
raise ValueError(errmsg(msg, s, end))
99+
raise JSONDecodeError(msg, s, end)
103100
else:
104101
_append(terminator)
105102
continue
106103
try:
107104
esc = s[end]
108105
except IndexError:
109-
raise ValueError(
110-
errmsg("Unterminated string starting at", s, begin))
106+
raise JSONDecodeError("Unterminated string starting at", s, begin)
111107
# If not a unicode escape sequence, must be in the lookup table
112108
if esc != 'u':
113109
try:
114110
char = _b[esc]
115111
except KeyError:
116112
msg = "Invalid \\escape: {0!r}".format(esc)
117-
raise ValueError(errmsg(msg, s, end))
113+
raise JSONDecodeError(msg, s, end)
118114
end += 1
119115
else:
120116
uni = _decode_uXXXX(s, end)
@@ -163,8 +159,8 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
163159
pairs = object_hook(pairs)
164160
return pairs, end + 1
165161
elif nextchar != '"':
166-
raise ValueError(errmsg(
167-
"Expecting property name enclosed in double quotes", s, end))
162+
raise JSONDecodeError(
163+
"Expecting property name enclosed in double quotes", s, end)
168164
end += 1
169165
while True:
170166
key, end = scanstring(s, end, strict)
@@ -174,7 +170,7 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
174170
if s[end:end + 1] != ':':
175171
end = _w(s, end).end()
176172
if s[end:end + 1] != ':':
177-
raise ValueError(errmsg("Expecting ':' delimiter", s, end))
173+
raise JSONDecodeError("Expecting ':' delimiter", s, end)
178174
end += 1
179175

180176
try:
@@ -188,7 +184,7 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
188184
try:
189185
value, end = scan_once(s, end)
190186
except StopIteration as err:
191-
raise ValueError(errmsg("Expecting value", s, err.value)) from None
187+
raise JSONDecodeError("Expecting value", s, err.value) from None
192188
pairs_append((key, value))
193189
try:
194190
nextchar = s[end]
@@ -202,13 +198,13 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
202198
if nextchar == '}':
203199
break
204200
elif nextchar != ',':
205-
raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
201+
raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
206202
end = _w(s, end).end()
207203
nextchar = s[end:end + 1]
208204
end += 1
209205
if nextchar != '"':
210-
raise ValueError(errmsg(
211-
"Expecting property name enclosed in double quotes", s, end - 1))
206+
raise JSONDecodeError(
207+
"Expecting property name enclosed in double quotes", s, end - 1)
212208
if object_pairs_hook is not None:
213209
result = object_pairs_hook(pairs)
214210
return result, end
@@ -232,7 +228,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
232228
try:
233229
value, end = scan_once(s, end)
234230
except StopIteration as err:
235-
raise ValueError(errmsg("Expecting value", s, err.value)) from None
231+
raise JSONDecodeError("Expecting value", s, err.value) from None
236232
_append(value)
237233
nextchar = s[end:end + 1]
238234
if nextchar in _ws:
@@ -242,7 +238,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
242238
if nextchar == ']':
243239
break
244240
elif nextchar != ',':
245-
raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
241+
raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
246242
try:
247243
if s[end] in _ws:
248244
end += 1
@@ -343,7 +339,7 @@ def decode(self, s, _w=WHITESPACE.match):
343339
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
344340
end = _w(s, end).end()
345341
if end != len(s):
346-
raise ValueError(errmsg("Extra data", s, end, len(s)))
342+
raise JSONDecodeError("Extra data", s, end)
347343
return obj
348344

349345
def raw_decode(self, s, idx=0):
@@ -358,5 +354,5 @@ def raw_decode(self, s, idx=0):
358354
try:
359355
obj, end = self.scan_once(s, idx)
360356
except StopIteration as err:
361-
raise ValueError(errmsg("Expecting value", s, err.value)) from None
357+
raise JSONDecodeError("Expecting value", s, err.value) from None
362358
return obj, end

Lib/test/test_json/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,23 @@
99
# import json with and without accelerations
1010
cjson = support.import_fresh_module('json', fresh=['_json'])
1111
pyjson = support.import_fresh_module('json', blocked=['_json'])
12+
# JSONDecodeError is cached inside the _json module
13+
cjson.JSONDecodeError = cjson.decoder.JSONDecodeError = json.JSONDecodeError
1214

1315
# create two base classes that will be used by the other tests
1416
class PyTest(unittest.TestCase):
1517
json = pyjson
1618
loads = staticmethod(pyjson.loads)
1719
dumps = staticmethod(pyjson.dumps)
20+
JSONDecodeError = staticmethod(pyjson.JSONDecodeError)
1821

1922
@unittest.skipUnless(cjson, 'requires _json')
2023
class CTest(unittest.TestCase):
2124
if cjson is not None:
2225
json = cjson
2326
loads = staticmethod(cjson.loads)
2427
dumps = staticmethod(cjson.dumps)
28+
JSONDecodeError = staticmethod(cjson.JSONDecodeError)
2529

2630
# test PyTest and CTest checking if the functions come from the right module
2731
class TestPyTest(PyTest):

Lib/test/test_json/test_decode.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,12 @@ def test_keys_reuse(self):
6363
def test_extra_data(self):
6464
s = '[1, 2, 3]5'
6565
msg = 'Extra data'
66-
self.assertRaisesRegex(ValueError, msg, self.loads, s)
66+
self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s)
6767

6868
def test_invalid_escape(self):
6969
s = '["abc\\y"]'
7070
msg = 'escape'
71-
self.assertRaisesRegex(ValueError, msg, self.loads, s)
71+
self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s)
7272

7373
def test_invalid_input_type(self):
7474
msg = 'the JSON object must be str'
@@ -80,10 +80,10 @@ def test_invalid_input_type(self):
8080
def test_string_with_utf8_bom(self):
8181
# see #18958
8282
bom_json = "[1,2,3]".encode('utf-8-sig').decode('utf-8')
83-
with self.assertRaises(ValueError) as cm:
83+
with self.assertRaises(self.JSONDecodeError) as cm:
8484
self.loads(bom_json)
8585
self.assertIn('BOM', str(cm.exception))
86-
with self.assertRaises(ValueError) as cm:
86+
with self.assertRaises(self.JSONDecodeError) as cm:
8787
self.json.load(StringIO(bom_json))
8888
self.assertIn('BOM', str(cm.exception))
8989
# make sure that the BOM is not detected in the middle of a string

0 commit comments

Comments
 (0)