From c9b46b8e51bbd0a2499b84a020dd7c54d52bce3d Mon Sep 17 00:00:00 2001 From: daften Date: Mon, 28 Nov 2011 00:58:33 +0100 Subject: [PATCH 01/15] Updated simplejson to latest version --- lib/simplejson/__init__.py | 264 +++++++++--- lib/simplejson/_speedups.c | 767 ++++++++++++++++++++++++--------- lib/simplejson/decoder.py | 169 +++++--- lib/simplejson/encoder.py | 164 +++++-- lib/simplejson/ordered_dict.py | 119 +++++ lib/simplejson/scanner.py | 24 +- lib/simplejson/tool.py | 39 ++ 7 files changed, 1187 insertions(+), 359 deletions(-) mode change 100644 => 100755 lib/simplejson/__init__.py mode change 100644 => 100755 lib/simplejson/_speedups.c mode change 100644 => 100755 lib/simplejson/decoder.py mode change 100644 => 100755 lib/simplejson/encoder.py create mode 100755 lib/simplejson/ordered_dict.py mode change 100644 => 100755 lib/simplejson/scanner.py create mode 100755 lib/simplejson/tool.py diff --git a/lib/simplejson/__init__.py b/lib/simplejson/__init__.py old mode 100644 new mode 100755 index d5b4d39913..ae4a39b9f9 --- a/lib/simplejson/__init__.py +++ b/lib/simplejson/__init__.py @@ -37,7 +37,7 @@ Pretty printing:: >>> import simplejson as json - >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ') >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) { "4": 5, @@ -68,8 +68,8 @@ >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', ... object_hook=as_complex) (1+2j) - >>> import decimal - >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + >>> from decimal import Decimal + >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') True Specializing JSON object encoding:: @@ -97,16 +97,34 @@ $ echo '{ 1.2:3.4}' | python -m simplejson.tool Expecting property name: line 1 column 2 (char 2) """ -__version__ = '2.0.9' +__version__ = '2.2.1' __all__ = [ 'dump', 'dumps', 'load', 'loads', - 'JSONDecoder', 'JSONEncoder', + 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', + 'OrderedDict', ] __author__ = 'Bob Ippolito ' -from decoder import JSONDecoder +from decimal import Decimal + +from decoder import JSONDecoder, JSONDecodeError from encoder import JSONEncoder +def _import_OrderedDict(): + import collections + try: + return collections.OrderedDict + except AttributeError: + import ordered_dict + return ordered_dict.OrderedDict +OrderedDict = _import_OrderedDict() + +def _import_c_make_encoder(): + try: + from simplejson._speedups import make_encoder + return make_encoder + except ImportError: + return None _default_encoder = JSONEncoder( skipkeys=False, @@ -117,11 +135,16 @@ separators=None, encoding='utf-8', default=None, + use_decimal=True, + namedtuple_as_object=True, + tuple_as_array=True, ) def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). @@ -144,9 +167,12 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, in strict compliance of the JSON specification, instead of using the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - If ``indent`` is a non-negative integer, then JSON array elements and object - members will be pretty-printed with that indent level. An indent level - of 0 will only insert newlines. ``None`` is the most compact representation. + If *indent* is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. If ``separators`` is an ``(item_separator, dict_separator)`` tuple then it will be used instead of the default ``(', ', ': ')`` separators. @@ -157,6 +183,16 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with the ``cls`` kwarg. @@ -166,7 +202,8 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array and not kw): iterable = _default_encoder.iterencode(obj) else: if cls is None: @@ -174,7 +211,10 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, encoding=encoding, - default=default, **kw).iterencode(obj) + default=default, use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, + **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost for chunk in iterable: @@ -183,7 +223,10 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, + tuple_as_array=True, + **kw): """Serialize ``obj`` to a JSON formatted ``str``. If ``skipkeys`` is false then ``dict`` keys that are not basic types @@ -203,10 +246,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, strict compliance of the JSON specification, instead of using the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - If ``indent`` is a non-negative integer, then JSON array elements and - object members will be pretty-printed with that indent level. An indent - level of 0 will only insert newlines. ``None`` is the most compact - representation. + If ``indent`` is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. If ``separators`` is an ``(item_separator, dict_separator)`` tuple then it will be used instead of the default ``(', ', ': ')`` separators. @@ -217,6 +262,16 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with the ``cls`` kwarg. @@ -226,7 +281,8 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array and not kw): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder @@ -234,28 +290,61 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, encoding=encoding, default=default, + use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, **kw).encode(obj) -_default_decoder = JSONDecoder(encoding=None, object_hook=None) +_default_decoder = JSONDecoder(encoding=None, object_hook=None, + object_pairs_hook=None) def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, namedtuple_as_object=True, tuple_as_array=True, + **kw): """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing a JSON document) to a Python object. - If the contents of ``fp`` is encoded with an ASCII based encoding other - than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must - be specified. Encodings that are not ASCII based (such as UCS-2) are - not allowed, and should be wrapped with - ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` - object and passed to ``loads()`` - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as :class:`unicode`. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg. @@ -264,38 +353,54 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, return loads(fp.read(), encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, - parse_constant=parse_constant, **kw) + parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, + use_decimal=use_decimal, **kw) def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, **kw): """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) to a Python object. - If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding - other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name - must be specified. Encodings that are not ASCII based (such as UCS-2) - are not allowed and should be decoded to ``unicode`` first. - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). - - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). - - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). - - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN, null, true, false. - This can be used to raise an exception if invalid JSON numbers - are encountered. + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as :class:`unicode`. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg. @@ -303,16 +408,59 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, """ if (cls is None and encoding is None and object_hook is None and parse_int is None and parse_float is None and - parse_constant is None and not kw): + parse_constant is None and object_pairs_hook is None + and not use_decimal and not kw): return _default_decoder.decode(s) if cls is None: cls = JSONDecoder if object_hook is not None: kw['object_hook'] = object_hook + if object_pairs_hook is not None: + kw['object_pairs_hook'] = object_pairs_hook if parse_float is not None: kw['parse_float'] = parse_float if parse_int is not None: kw['parse_int'] = parse_int if parse_constant is not None: kw['parse_constant'] = parse_constant + if use_decimal: + if parse_float is not None: + raise TypeError("use_decimal=True implies parse_float=Decimal") + kw['parse_float'] = Decimal return cls(encoding=encoding, **kw).decode(s) + + +def _toggle_speedups(enabled): + import lib.simplejson.decoder as dec + import lib.simplejson.encoder as enc + import lib.simplejson.scanner as scan + c_make_encoder = _import_c_make_encoder() + if enabled: + dec.scanstring = dec.c_scanstring or dec.py_scanstring + enc.c_make_encoder = c_make_encoder + enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or + enc.py_encode_basestring_ascii) + scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner + else: + dec.scanstring = dec.py_scanstring + enc.c_make_encoder = None + enc.encode_basestring_ascii = enc.py_encode_basestring_ascii + scan.make_scanner = scan.py_make_scanner + dec.make_scanner = scan.make_scanner + global _default_decoder + _default_decoder = JSONDecoder( + encoding=None, + object_hook=None, + object_pairs_hook=None, + ) + global _default_encoder + _default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, + ) diff --git a/lib/simplejson/_speedups.c b/lib/simplejson/_speedups.c old mode 100644 new mode 100755 index 23b5f4a6e6..f8b0565866 --- a/lib/simplejson/_speedups.c +++ b/lib/simplejson/_speedups.c @@ -1,8 +1,26 @@ #include "Python.h" #include "structmember.h" +#if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double) +#define PyOS_string_to_double json_PyOS_string_to_double +static double +json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception); +static double +json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) { + double x; + assert(endptr == NULL); + assert(overflow_exception == NULL); + PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;) + x = PyOS_ascii_atof(s); + PyFPE_END_PROTECT(x) + return x; +} +#endif #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) #endif +#if PY_VERSION_HEX < 0x02060000 && !defined(Py_SIZE) +#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) +#endif #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) typedef int Py_ssize_t; #define PY_SSIZE_T_MAX INT_MAX @@ -26,24 +44,29 @@ typedef int Py_ssize_t; #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) +#define Decimal_Check(op) (PyObject_TypeCheck(op, DecimalTypePtr)) static PyTypeObject PyScannerType; static PyTypeObject PyEncoderType; +static PyTypeObject *DecimalTypePtr; typedef struct _PyScannerObject { PyObject_HEAD PyObject *encoding; PyObject *strict; PyObject *object_hook; + PyObject *pairs_hook; PyObject *parse_float; PyObject *parse_int; PyObject *parse_constant; + PyObject *memo; } PyScannerObject; static PyMemberDef scanner_members[] = { {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"}, {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, @@ -60,8 +83,12 @@ typedef struct _PyEncoderObject { PyObject *item_separator; PyObject *sort_keys; PyObject *skipkeys; + PyObject *key_memo; int fast_encode; int allow_nan; + int use_decimal; + int namedtuple_as_object; + int tuple_as_array; } PyEncoderObject; static PyMemberDef encoder_members[] = { @@ -73,6 +100,7 @@ static PyMemberDef encoder_members[] = { {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"}, {NULL} }; @@ -114,7 +142,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi static int encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); static PyObject * -_encoded_const(PyObject *const); +_encoded_const(PyObject *obj); static void raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); static PyObject * @@ -125,6 +153,8 @@ static PyObject * _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); static PyObject * encoder_encode_float(PyEncoderObject *s, PyObject *obj); +static int +_is_namedtuple(PyObject *obj); #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) @@ -136,14 +166,20 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj); #define MAX_EXPANSION MIN_EXPANSION #endif +static int +_is_namedtuple(PyObject *obj) +{ + return PyTuple_Check(obj) && PyObject_HasAttrString(obj, "_asdict"); +} + static int _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) { /* PyObject to Py_ssize_t converter */ *size_ptr = PyInt_AsSsize_t(o); - if (*size_ptr == -1 && PyErr_Occurred()); - return 1; - return 0; + if (*size_ptr == -1 && PyErr_Occurred()) + return 0; + return 1; } static PyObject * @@ -341,21 +377,21 @@ raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) { /* Use the Python function simplejson.decoder.errmsg to raise a nice looking ValueError exception */ - static PyObject *errmsg_fn = NULL; - PyObject *pymsg; - if (errmsg_fn == NULL) { + static PyObject *JSONDecodeError = NULL; + PyObject *exc; + if (JSONDecodeError == NULL) { PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); if (decoder == NULL) return; - errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); + JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError"); Py_DECREF(decoder); - if (errmsg_fn == NULL) + if (JSONDecodeError == NULL) return; } - pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); - if (pymsg) { - PyErr_SetObject(PyExc_ValueError, pymsg); - Py_DECREF(pymsg); + exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (exc) { + PyErr_SetObject(JSONDecodeError, exc); + Py_DECREF(exc); } } @@ -422,6 +458,20 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { return tpl; } +#define APPEND_OLD_CHUNK \ + if (chunk != NULL) { \ + if (chunks == NULL) { \ + chunks = PyList_New(0); \ + if (chunks == NULL) { \ + goto bail; \ + } \ + } \ + if (PyList_Append(chunks, chunk)) { \ + goto bail; \ + } \ + Py_CLEAR(chunk); \ + } + static PyObject * scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) { @@ -440,18 +490,19 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s Py_ssize_t next = begin; int has_unicode = 0; char *buf = PyString_AS_STRING(pystr); - PyObject *chunks = PyList_New(0); - if (chunks == NULL) { - goto bail; + PyObject *chunks = NULL; + PyObject *chunk = NULL; + + if (len == end) { + raise_errmsg("Unterminated string starting at", pystr, begin); } - if (end < 0 || len <= end) { + else if (end < 0 || len < end) { PyErr_SetString(PyExc_ValueError, "end is out of bounds"); goto bail; } while (1) { /* Find the end of the string or the next escape */ Py_UNICODE c = 0; - PyObject *chunk = NULL; for (next = end; next < len; next++) { c = (unsigned char)buf[next]; if (c == '"' || c == '\\') { @@ -471,7 +522,9 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s } /* Pick up this chunk if it's not zero length */ if (next != end) { - PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); + PyObject *strchunk; + APPEND_OLD_CHUNK + strchunk = PyString_FromStringAndSize(&buf[end], next - end); if (strchunk == NULL) { goto bail; } @@ -485,11 +538,6 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s else { chunk = strchunk; } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); } next++; if (c == '"') { @@ -594,6 +642,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s if (c > 0x7f) { has_unicode = 1; } + APPEND_OLD_CHUNK if (has_unicode) { chunk = PyUnicode_FromUnicode(&c, 1); if (chunk == NULL) { @@ -607,22 +656,28 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s goto bail; } } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); + } + + if (chunks == NULL) { + if (chunk != NULL) + rval = chunk; + else + rval = PyString_FromStringAndSize("", 0); + } + else { + APPEND_OLD_CHUNK + rval = join_list_string(chunks); + if (rval == NULL) { goto bail; } - Py_DECREF(chunk); + Py_CLEAR(chunks); } - rval = join_list_string(chunks); - if (rval == NULL) { - goto bail; - } - Py_CLEAR(chunks); *next_end_ptr = end; return rval; bail: *next_end_ptr = -1; + Py_XDECREF(chunk); Py_XDECREF(chunks); return NULL; } @@ -644,18 +699,19 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next Py_ssize_t begin = end - 1; Py_ssize_t next = begin; const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); - PyObject *chunks = PyList_New(0); - if (chunks == NULL) { - goto bail; + PyObject *chunks = NULL; + PyObject *chunk = NULL; + + if (len == end) { + raise_errmsg("Unterminated string starting at", pystr, begin); } - if (end < 0 || len <= end) { + else if (end < 0 || len < end) { PyErr_SetString(PyExc_ValueError, "end is out of bounds"); goto bail; } while (1) { /* Find the end of the string or the next escape */ Py_UNICODE c = 0; - PyObject *chunk = NULL; for (next = end; next < len; next++) { c = buf[next]; if (c == '"' || c == '\\') { @@ -672,15 +728,11 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next } /* Pick up this chunk if it's not zero length */ if (next != end) { + APPEND_OLD_CHUNK chunk = PyUnicode_FromUnicode(&buf[end], next - end); if (chunk == NULL) { goto bail; } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); } next++; if (c == '"') { @@ -782,26 +834,32 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next } #endif } + APPEND_OLD_CHUNK chunk = PyUnicode_FromUnicode(&c, 1); if (chunk == NULL) { goto bail; } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); } - rval = join_list_unicode(chunks); - if (rval == NULL) { - goto bail; + if (chunks == NULL) { + if (chunk != NULL) + rval = chunk; + else + rval = PyUnicode_FromUnicode(NULL, 0); + } + else { + APPEND_OLD_CHUNK + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); } - Py_DECREF(chunks); *next_end_ptr = end; return rval; bail: *next_end_ptr = -1; + Py_XDECREF(chunk); Py_XDECREF(chunks); return NULL; } @@ -891,9 +949,11 @@ scanner_traverse(PyObject *self, visitproc visit, void *arg) Py_VISIT(s->encoding); Py_VISIT(s->strict); Py_VISIT(s->object_hook); + Py_VISIT(s->pairs_hook); Py_VISIT(s->parse_float); Py_VISIT(s->parse_int); Py_VISIT(s->parse_constant); + Py_VISIT(s->memo); return 0; } @@ -906,9 +966,11 @@ scanner_clear(PyObject *self) Py_CLEAR(s->encoding); Py_CLEAR(s->strict); Py_CLEAR(s->object_hook); + Py_CLEAR(s->pairs_hook); Py_CLEAR(s->parse_float); Py_CLEAR(s->parse_int); Py_CLEAR(s->parse_constant); + Py_CLEAR(s->memo); return 0; } @@ -919,18 +981,30 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ *next_idx_ptr is a return-by-reference index to the first character after the closing curly brace. - Returns a new PyObject (usually a dict, but object_hook can change that) + Returns a new PyObject (usually a dict, but object_hook or + object_pairs_hook can change that) */ char *str = PyString_AS_STRING(pystr); Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - PyObject *rval = PyDict_New(); + PyObject *rval = NULL; + PyObject *pairs = NULL; + PyObject *item; PyObject *key = NULL; PyObject *val = NULL; char *encoding = PyString_AS_STRING(s->encoding); int strict = PyObject_IsTrue(s->strict); + int has_pairs_hook = (s->pairs_hook != Py_None); Py_ssize_t next_idx; - if (rval == NULL) - return NULL; + if (has_pairs_hook) { + pairs = PyList_New(0); + if (pairs == NULL) + return NULL; + } + else { + rval = PyDict_New(); + if (rval == NULL) + return NULL; + } /* skip whitespace after { */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; @@ -938,6 +1012,8 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ /* only loop if the object is non-empty */ if (idx <= end_idx && str[idx] != '}') { while (idx <= end_idx) { + PyObject *memokey; + /* read key */ if (str[idx] != '"') { raise_errmsg("Expecting property name", pystr, idx); @@ -946,6 +1022,16 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); if (key == NULL) goto bail; + memokey = PyDict_GetItem(s->memo, key); + if (memokey != NULL) { + Py_INCREF(memokey); + Py_DECREF(key); + key = memokey; + } + else { + if (PyDict_SetItem(s->memo, key, key) < 0) + goto bail; + } idx = next_idx; /* skip whitespace between key and : delimiter, read :, skip whitespace */ @@ -962,11 +1048,24 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ if (val == NULL) goto bail; - if (PyDict_SetItem(rval, key, val) == -1) - goto bail; - - Py_CLEAR(key); - Py_CLEAR(val); + if (has_pairs_hook) { + item = PyTuple_Pack(2, key, val); + if (item == NULL) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + if (PyList_Append(pairs, item) == -1) { + Py_DECREF(item); + goto bail; + } + Py_DECREF(item); + } + else { + if (PyDict_SetItem(rval, key, val) < 0) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + } idx = next_idx; /* skip whitespace before } or , */ @@ -992,6 +1091,17 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ raise_errmsg("Expecting object", pystr, end_idx); goto bail; } + + /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ + if (s->pairs_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); + if (val == NULL) + goto bail; + Py_DECREF(pairs); + *next_idx_ptr = idx + 1; + return val; + } + /* if object_hook is not None: rval = object_hook(rval) */ if (s->object_hook != Py_None) { val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); @@ -1004,9 +1114,10 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ *next_idx_ptr = idx + 1; return rval; bail: + Py_XDECREF(rval); Py_XDECREF(key); Py_XDECREF(val); - Py_DECREF(rval); + Py_XDECREF(pairs); return NULL; } @@ -1021,20 +1132,34 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss */ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; - PyObject *val = NULL; - PyObject *rval = PyDict_New(); + PyObject *rval = NULL; + PyObject *pairs = NULL; + PyObject *item; PyObject *key = NULL; + PyObject *val = NULL; int strict = PyObject_IsTrue(s->strict); + int has_pairs_hook = (s->pairs_hook != Py_None); Py_ssize_t next_idx; - if (rval == NULL) - return NULL; + if (has_pairs_hook) { + pairs = PyList_New(0); + if (pairs == NULL) + return NULL; + } + else { + rval = PyDict_New(); + if (rval == NULL) + return NULL; + } + /* skip whitespace after { */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; /* only loop if the object is non-empty */ if (idx <= end_idx && str[idx] != '}') { while (idx <= end_idx) { + PyObject *memokey; + /* read key */ if (str[idx] != '"') { raise_errmsg("Expecting property name", pystr, idx); @@ -1043,6 +1168,16 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); if (key == NULL) goto bail; + memokey = PyDict_GetItem(s->memo, key); + if (memokey != NULL) { + Py_INCREF(memokey); + Py_DECREF(key); + key = memokey; + } + else { + if (PyDict_SetItem(s->memo, key, key) < 0) + goto bail; + } idx = next_idx; /* skip whitespace between key and : delimiter, read :, skip whitespace */ @@ -1059,11 +1194,24 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss if (val == NULL) goto bail; - if (PyDict_SetItem(rval, key, val) == -1) - goto bail; - - Py_CLEAR(key); - Py_CLEAR(val); + if (has_pairs_hook) { + item = PyTuple_Pack(2, key, val); + if (item == NULL) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + if (PyList_Append(pairs, item) == -1) { + Py_DECREF(item); + goto bail; + } + Py_DECREF(item); + } + else { + if (PyDict_SetItem(rval, key, val) < 0) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + } idx = next_idx; /* skip whitespace before } or , */ @@ -1091,6 +1239,16 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss goto bail; } + /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ + if (s->pairs_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); + if (val == NULL) + goto bail; + Py_DECREF(pairs); + *next_idx_ptr = idx + 1; + return val; + } + /* if object_hook is not None: rval = object_hook(rval) */ if (s->object_hook != Py_None) { val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); @@ -1103,9 +1261,10 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss *next_idx_ptr = idx + 1; return rval; bail: + Py_XDECREF(rval); Py_XDECREF(key); Py_XDECREF(val); - Py_DECREF(rval); + Py_XDECREF(pairs); return NULL; } @@ -1135,8 +1294,13 @@ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t /* read any JSON term and de-tuplefy the (rval, idx) */ val = scan_once_str(s, pystr, idx, &next_idx); - if (val == NULL) + if (val == NULL) { + if (PyErr_ExceptionMatches(PyExc_StopIteration)) { + PyErr_Clear(); + raise_errmsg("Expecting object", pystr, idx); + } goto bail; + } if (PyList_Append(rval, val) == -1) goto bail; @@ -1202,8 +1366,13 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi /* read any JSON term */ val = scan_once_unicode(s, pystr, idx, &next_idx); - if (val == NULL) + if (val == NULL) { + if (PyErr_ExceptionMatches(PyExc_StopIteration)) { + PyErr_Clear(); + raise_errmsg("Expecting object", pystr, idx); + } goto bail; + } if (PyList_Append(rval, val) == -1) goto bail; @@ -1350,7 +1519,12 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); } else { - rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); + /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */ + double d = PyOS_string_to_double(PyString_AS_STRING(numstr), + NULL, NULL); + if (d == -1.0 && PyErr_Occurred()) + return NULL; + rval = PyFloat_FromDouble(d); } } else { @@ -1413,7 +1587,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { is_float = 1; idx += 2; - while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; } /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ @@ -1470,68 +1644,92 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n */ char *str = PyString_AS_STRING(pystr); Py_ssize_t length = PyString_GET_SIZE(pystr); + PyObject *rval = NULL; + int fallthrough = 0; if (idx >= length) { PyErr_SetNone(PyExc_StopIteration); return NULL; } + if (Py_EnterRecursiveCall(" while decoding a JSON document")) + return NULL; switch (str[idx]) { case '"': /* string */ - return scanstring_str(pystr, idx + 1, + rval = scanstring_str(pystr, idx + 1, PyString_AS_STRING(s->encoding), PyObject_IsTrue(s->strict), next_idx_ptr); + break; case '{': /* object */ - return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + break; case '[': /* array */ - return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + break; case 'n': /* null */ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; - return Py_None; + rval = Py_None; } + else + fallthrough = 1; break; case 't': /* true */ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; - return Py_True; + rval = Py_True; } + else + fallthrough = 1; break; case 'f': /* false */ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; - return Py_False; + rval = Py_False; } + else + fallthrough = 1; break; case 'N': /* NaN */ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); + rval = _parse_constant(s, "NaN", idx, next_idx_ptr); } + else + fallthrough = 1; break; case 'I': /* Infinity */ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); } + else + fallthrough = 1; break; case '-': /* -Infinity */ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); } + else + fallthrough = 1; break; + default: + fallthrough = 1; } /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_str(s, pystr, idx, next_idx_ptr); + if (fallthrough) + rval = _match_number_str(s, pystr, idx, next_idx_ptr); + Py_LeaveRecursiveCall(); + return rval; } static PyObject * @@ -1546,67 +1744,91 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ */ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + PyObject *rval = NULL; + int fallthrough = 0; if (idx >= length) { PyErr_SetNone(PyExc_StopIteration); return NULL; } + if (Py_EnterRecursiveCall(" while decoding a JSON document")) + return NULL; switch (str[idx]) { case '"': /* string */ - return scanstring_unicode(pystr, idx + 1, + rval = scanstring_unicode(pystr, idx + 1, PyObject_IsTrue(s->strict), next_idx_ptr); + break; case '{': /* object */ - return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + break; case '[': /* array */ - return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + break; case 'n': /* null */ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; - return Py_None; + rval = Py_None; } + else + fallthrough = 1; break; case 't': /* true */ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; - return Py_True; + rval = Py_True; } + else + fallthrough = 1; break; case 'f': /* false */ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; - return Py_False; + rval = Py_False; } + else + fallthrough = 1; break; case 'N': /* NaN */ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); + rval = _parse_constant(s, "NaN", idx, next_idx_ptr); } + else + fallthrough = 1; break; case 'I': /* Infinity */ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); } + else + fallthrough = 1; break; case '-': /* -Infinity */ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); } + else + fallthrough = 1; break; + default: + fallthrough = 1; } /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_unicode(s, pystr, idx, next_idx_ptr); + if (fallthrough) + rval = _match_number_unicode(s, pystr, idx, next_idx_ptr); + Py_LeaveRecursiveCall(); + return rval; } static PyObject * @@ -1636,6 +1858,7 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds) Py_TYPE(pystr)->tp_name); return NULL; } + PyDict_Clear(s->memo); return _build_rval_index_tuple(rval, next_idx); } @@ -1648,6 +1871,7 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->encoding = NULL; s->strict = NULL; s->object_hook = NULL; + s->pairs_hook = NULL; s->parse_float = NULL; s->parse_int = NULL; s->parse_constant = NULL; @@ -1668,9 +1892,17 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) return -1; + + if (s->memo == NULL) { + s->memo = PyDict_New(); + if (s->memo == NULL) + goto bail; + } /* PyString_AS_STRING is used on encoding */ s->encoding = PyObject_GetAttrString(ctx, "encoding"); + if (s->encoding == NULL) + goto bail; if (s->encoding == Py_None) { Py_DECREF(Py_None); s->encoding = PyString_InternFromString(DEFAULT_ENCODING); @@ -1690,6 +1922,9 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); if (s->object_hook == NULL) goto bail; + s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); + if (s->pairs_hook == NULL) + goto bail; s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); if (s->parse_float == NULL) goto bail; @@ -1706,6 +1941,7 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) Py_CLEAR(s->encoding); Py_CLEAR(s->strict); Py_CLEAR(s->object_hook); + Py_CLEAR(s->pairs_hook); Py_CLEAR(s->parse_float); Py_CLEAR(s->parse_int); Py_CLEAR(s->parse_constant); @@ -1772,6 +2008,7 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->item_separator = NULL; s->sort_keys = NULL; s->skipkeys = NULL; + s->key_memo = NULL; } return (PyObject *)s; } @@ -1780,18 +2017,36 @@ static int encoder_init(PyObject *self, PyObject *args, PyObject *kwds) { /* initialize Encoder object */ - static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", NULL}; PyEncoderObject *s; - PyObject *allow_nan; + PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; + PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo, *use_decimal, *namedtuple_as_object, *tuple_as_array; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, - &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOO:make_encoder", kwlist, + &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, + &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal, + &namedtuple_as_object, &tuple_as_array)) return -1; + s->markers = markers; + s->defaultfn = defaultfn; + s->encoder = encoder; + s->indent = indent; + s->key_separator = key_separator; + s->item_separator = item_separator; + s->sort_keys = sort_keys; + s->skipkeys = skipkeys; + s->key_memo = key_memo; + s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->allow_nan = PyObject_IsTrue(allow_nan); + s->use_decimal = PyObject_IsTrue(use_decimal); + s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object); + s->tuple_as_array = PyObject_IsTrue(tuple_as_array); + Py_INCREF(s->markers); Py_INCREF(s->defaultfn); Py_INCREF(s->encoder); @@ -1800,8 +2055,7 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds) Py_INCREF(s->item_separator); Py_INCREF(s->sort_keys); Py_INCREF(s->skipkeys); - s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); - s->allow_nan = PyObject_IsTrue(allow_nan); + Py_INCREF(s->key_memo); return 0; } @@ -1910,79 +2164,91 @@ static int encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) { /* Encode Python object obj to a JSON term, rval is a PyList */ - PyObject *newobj; - int rv; - - if (obj == Py_None || obj == Py_True || obj == Py_False) { - PyObject *cstr = _encoded_const(obj); - if (cstr == NULL) - return -1; - return _steal_list_append(rval, cstr); - } - else if (PyString_Check(obj) || PyUnicode_Check(obj)) - { - PyObject *encoded = encoder_encode_string(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyInt_Check(obj) || PyLong_Check(obj)) { - PyObject *encoded = PyObject_Str(obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyFloat_Check(obj)) { - PyObject *encoded = encoder_encode_float(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyList_Check(obj) || PyTuple_Check(obj)) { - return encoder_listencode_list(s, rval, obj, indent_level); - } - else if (PyDict_Check(obj)) { - return encoder_listencode_dict(s, rval, obj, indent_level); - } - else { - PyObject *ident = NULL; - if (s->markers != Py_None) { - int has_key; - ident = PyLong_FromVoidPtr(obj); - if (ident == NULL) - return -1; - has_key = PyDict_Contains(s->markers, ident); - if (has_key) { - if (has_key != -1) - PyErr_SetString(PyExc_ValueError, "Circular reference detected"); - Py_DECREF(ident); - return -1; - } - if (PyDict_SetItem(s->markers, ident, obj)) { - Py_DECREF(ident); - return -1; - } - } - newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); - if (newobj == NULL) { - Py_XDECREF(ident); - return -1; + int rv = -1; + if (Py_EnterRecursiveCall(" while encoding a JSON document")) + return rv; + do { + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr != NULL) + rv = _steal_list_append(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded != NULL) + rv = _steal_list_append(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded != NULL) + rv = _steal_list_append(rval, encoded); + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded != NULL) + rv = _steal_list_append(rval, encoded); + } + else if (s->namedtuple_as_object && _is_namedtuple(obj)) { + PyObject *newobj = PyObject_CallMethod(obj, "_asdict", NULL); + if (newobj != NULL) { + rv = encoder_listencode_dict(s, rval, newobj, indent_level); + Py_DECREF(newobj); + } } - rv = encoder_listencode_obj(s, rval, newobj, indent_level); - Py_DECREF(newobj); - if (rv) { - Py_XDECREF(ident); - return -1; + else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) { + rv = encoder_listencode_list(s, rval, obj, indent_level); + } + else if (PyDict_Check(obj)) { + rv = encoder_listencode_dict(s, rval, obj, indent_level); } - if (ident != NULL) { - if (PyDict_DelItem(s->markers, ident)) { + else if (s->use_decimal && Decimal_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded != NULL) + rv = _steal_list_append(rval, encoded); + } + else { + PyObject *ident = NULL; + PyObject *newobj; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + break; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + break; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + break; + } + } + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + break; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + rv = -1; + } + else if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + rv = -1; + } Py_XDECREF(ident); - return -1; } - Py_XDECREF(ident); } - return rv; - } + } while (0); + Py_LeaveRecursiveCall(); + return rv; } static int @@ -1992,18 +2258,22 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss static PyObject *open_dict = NULL; static PyObject *close_dict = NULL; static PyObject *empty_dict = NULL; + static PyObject *iteritems = NULL; PyObject *kstr = NULL; PyObject *ident = NULL; - PyObject *key, *value; - Py_ssize_t pos; + PyObject *iter = NULL; + PyObject *item = NULL; + PyObject *items = NULL; + PyObject *encoded = NULL; int skipkeys; Py_ssize_t idx; - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) { open_dict = PyString_InternFromString("{"); close_dict = PyString_InternFromString("}"); empty_dict = PyString_InternFromString("{}"); - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) + iteritems = PyString_InternFromString("iteritems"); + if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) return -1; } if (PyDict_Size(dct) == 0) @@ -2032,21 +2302,72 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss /* TODO: DOES NOT RUN */ indent_level += 1; /* - newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + newline_indent = '\n' + (_indent * _current_indent_level) separator = _item_separator + newline_indent buf += newline_indent */ } - /* TODO: C speedup not implemented for sort_keys */ + if (PyObject_IsTrue(s->sort_keys)) { + /* First sort the keys then replace them with (key, value) tuples. */ + Py_ssize_t i, nitems; + if (PyDict_CheckExact(dct)) + items = PyDict_Keys(dct); + else + items = PyMapping_Keys(dct); + if (items == NULL) + goto bail; + if (!PyList_Check(items)) { + PyErr_SetString(PyExc_ValueError, "keys must return list"); + goto bail; + } + if (PyList_Sort(items) < 0) + goto bail; + nitems = PyList_GET_SIZE(items); + for (i = 0; i < nitems; i++) { + PyObject *key, *value; + key = PyList_GET_ITEM(items, i); + value = PyDict_GetItem(dct, key); + item = PyTuple_Pack(2, key, value); + if (item == NULL) + goto bail; + PyList_SET_ITEM(items, i, item); + Py_DECREF(key); + } + } + else { + if (PyDict_CheckExact(dct)) + items = PyDict_Items(dct); + else + items = PyMapping_Items(dct); + } + if (items == NULL) + goto bail; + iter = PyObject_GetIter(items); + Py_DECREF(items); + if (iter == NULL) + goto bail; - pos = 0; skipkeys = PyObject_IsTrue(s->skipkeys); idx = 0; - while (PyDict_Next(dct, &pos, &key, &value)) { - PyObject *encoded; + while ((item = PyIter_Next(iter))) { + PyObject *encoded, *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; + } + key = PyTuple_GET_ITEM(item, 0); + if (key == NULL) + goto bail; + value = PyTuple_GET_ITEM(item, 1); + if (value == NULL) + goto bail; - if (PyString_Check(key) || PyUnicode_Check(key)) { + encoded = PyDict_GetItem(s->key_memo, key); + if (encoded != NULL) { + Py_INCREF(encoded); + } + else if (PyString_Check(key) || PyUnicode_Check(key)) { Py_INCREF(key); kstr = key; } @@ -2055,22 +2376,25 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss if (kstr == NULL) goto bail; } - else if (PyInt_Check(key) || PyLong_Check(key)) { - kstr = PyObject_Str(key); + else if (key == Py_True || key == Py_False || key == Py_None) { + /* This must come before the PyInt_Check because + True and False are also 1 and 0.*/ + kstr = _encoded_const(key); if (kstr == NULL) goto bail; } - else if (key == Py_True || key == Py_False || key == Py_None) { - kstr = _encoded_const(key); + else if (PyInt_Check(key) || PyLong_Check(key)) { + kstr = PyObject_Str(key); if (kstr == NULL) goto bail; } else if (skipkeys) { + Py_DECREF(item); continue; } else { /* TODO: include repr of key */ - PyErr_SetString(PyExc_ValueError, "keys must be a string"); + PyErr_SetString(PyExc_TypeError, "keys must be a string"); goto bail; } @@ -2079,21 +2403,28 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss goto bail; } - encoded = encoder_encode_string(s, kstr); - Py_CLEAR(kstr); - if (encoded == NULL) - goto bail; + if (encoded == NULL) { + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyDict_SetItem(s->key_memo, key, encoded)) + goto bail; + } if (PyList_Append(rval, encoded)) { - Py_DECREF(encoded); goto bail; } - Py_DECREF(encoded); + Py_CLEAR(encoded); if (PyList_Append(rval, s->key_separator)) goto bail; if (encoder_listencode_obj(s, rval, value, indent_level)) goto bail; + Py_CLEAR(item); idx += 1; } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; if (ident != NULL) { if (PyDict_DelItem(s->markers, ident)) goto bail; @@ -2103,7 +2434,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss /* TODO: DOES NOT RUN */ indent_level -= 1; /* - yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '\n' + (_indent * _current_indent_level) */ } if (PyList_Append(rval, close_dict)) @@ -2111,6 +2442,9 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss return 0; bail: + Py_XDECREF(encoded); + Py_XDECREF(items); + Py_XDECREF(iter); Py_XDECREF(kstr); Py_XDECREF(ident); return -1; @@ -2125,10 +2459,10 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss static PyObject *close_array = NULL; static PyObject *empty_array = NULL; PyObject *ident = NULL; - PyObject *s_fast = NULL; - Py_ssize_t num_items; - PyObject **seq_items; - Py_ssize_t i; + PyObject *iter = NULL; + PyObject *obj = NULL; + int is_true; + int i = 0; if (open_array == NULL || close_array == NULL || empty_array == NULL) { open_array = PyString_InternFromString("["); @@ -2138,14 +2472,11 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss return -1; } ident = NULL; - s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); - if (s_fast == NULL) + is_true = PyObject_IsTrue(seq); + if (is_true == -1) return -1; - num_items = PySequence_Fast_GET_SIZE(s_fast); - if (num_items == 0) { - Py_DECREF(s_fast); + else if (is_true == 0) return PyList_Append(rval, empty_array); - } if (s->markers != Py_None) { int has_key; @@ -2163,27 +2494,34 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss } } - seq_items = PySequence_Fast_ITEMS(s_fast); + iter = PyObject_GetIter(seq); + if (iter == NULL) + goto bail; + if (PyList_Append(rval, open_array)) goto bail; if (s->indent != Py_None) { /* TODO: DOES NOT RUN */ indent_level += 1; /* - newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + newline_indent = '\n' + (_indent * _current_indent_level) separator = _item_separator + newline_indent buf += newline_indent */ } - for (i = 0; i < num_items; i++) { - PyObject *obj = seq_items[i]; + while ((obj = PyIter_Next(iter))) { if (i) { if (PyList_Append(rval, s->item_separator)) goto bail; } if (encoder_listencode_obj(s, rval, obj, indent_level)) goto bail; + i++; + Py_CLEAR(obj); } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; if (ident != NULL) { if (PyDict_DelItem(s->markers, ident)) goto bail; @@ -2193,17 +2531,17 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss /* TODO: DOES NOT RUN */ indent_level -= 1; /* - yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '\n' + (_indent * _current_indent_level) */ } if (PyList_Append(rval, close_array)) goto bail; - Py_DECREF(s_fast); return 0; bail: + Py_XDECREF(obj); + Py_XDECREF(iter); Py_XDECREF(ident); - Py_DECREF(s_fast); return -1; } @@ -2229,6 +2567,7 @@ encoder_traverse(PyObject *self, visitproc visit, void *arg) Py_VISIT(s->item_separator); Py_VISIT(s->sort_keys); Py_VISIT(s->skipkeys); + Py_VISIT(s->key_memo); return 0; } @@ -2247,6 +2586,7 @@ encoder_clear(PyObject *self) Py_CLEAR(s->item_separator); Py_CLEAR(s->sort_keys); Py_CLEAR(s->skipkeys); + Py_CLEAR(s->key_memo); return 0; } @@ -2314,13 +2654,22 @@ PyDoc_STRVAR(module_doc, void init_speedups(void) { - PyObject *m; + PyObject *m, *decimal; PyScannerType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyScannerType) < 0) return; PyEncoderType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyEncoderType) < 0) return; + + decimal = PyImport_ImportModule("decimal"); + if (decimal == NULL) + return; + DecimalTypePtr = (PyTypeObject*)PyObject_GetAttrString(decimal, "Decimal"); + Py_DECREF(decimal); + if (DecimalTypePtr == NULL) + return; + m = Py_InitModule3("_speedups", speedups_methods, module_doc); Py_INCREF((PyObject*)&PyScannerType); PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); diff --git a/lib/simplejson/decoder.py b/lib/simplejson/decoder.py old mode 100644 new mode 100755 index dd57ddeefb..0702385771 --- a/lib/simplejson/decoder.py +++ b/lib/simplejson/decoder.py @@ -5,10 +5,13 @@ import struct from lib.simplejson.scanner import make_scanner -try: - from lib.simplejson._speedups import scanstring as c_scanstring -except ImportError: - c_scanstring = None +def _import_c_scanstring(): + try: + from simplejson._speedups import scanstring + return scanstring + except ImportError: + return None +c_scanstring = _import_c_scanstring() __all__ = ['JSONDecoder'] @@ -16,6 +19,8 @@ def _floatconstants(): _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + # The struct module in Python 2.4 would get frexp() out of range here + # when an endian is specified in the format string. Fixed in Python 2.5+ if sys.byteorder != 'big': _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] nan, inf = struct.unpack('dd', _BYTES) @@ -24,6 +29,32 @@ def _floatconstants(): NaN, PosInf, NegInf = _floatconstants() +class JSONDecodeError(ValueError): + """Subclass of ValueError with the following additional properties: + + msg: The unformatted error message + doc: The JSON document being parsed + pos: The start index of doc where parsing failed + end: The end index of doc where parsing failed (may be None) + lineno: The line corresponding to pos + colno: The column corresponding to pos + endlineno: The line corresponding to end (may be None) + endcolno: The column corresponding to end (may be None) + + """ + def __init__(self, msg, doc, pos, end=None): + ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) + self.msg = msg + self.doc = doc + self.pos = pos + self.end = end + self.lineno, self.colno = linecol(doc, pos) + if end is not None: + self.endlineno, self.endcolno = linecol(doc, end) + else: + self.endlineno, self.endcolno = None, None + + def linecol(doc, pos): lineno = doc.count('\n', 0, pos) + 1 if lineno == 1: @@ -62,13 +93,14 @@ def errmsg(msg, doc, pos, end=None): DEFAULT_ENCODING = "utf-8" -def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): +def py_scanstring(s, end, encoding=None, strict=True, + _b=BACKSLASH, _m=STRINGCHUNK.match): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. Unescapes all valid JSON string escape sequences and raises ValueError on attempt to decode an invalid string. If strict is False then literal control characters are allowed in the string. - + Returns a tuple of the decoded string and the index of the character in s after the end quote.""" if encoding is None: @@ -79,8 +111,8 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU while 1: chunk = _m(s, end) if chunk is None: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) + raise JSONDecodeError( + "Unterminated string starting at", s, begin) end = chunk.end() content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters @@ -96,22 +128,22 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU if strict: msg = "Invalid control character %r at" % (terminator,) #msg = "Invalid control character {0!r} at".format(terminator) - raise ValueError(errmsg(msg, s, end)) + raise JSONDecodeError(msg, s, end) else: _append(terminator) continue try: esc = s[end] except IndexError: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) + raise JSONDecodeError( + "Unterminated string starting at", s, begin) # If not a unicode escape sequence, must be in the lookup table if esc != 'u': try: char = _b[esc] except KeyError: msg = "Invalid \\escape: " + repr(esc) - raise ValueError(errmsg(msg, s, end)) + raise JSONDecodeError(msg, s, end) end += 1 else: # Unicode escape sequence @@ -119,16 +151,16 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU next_end = end + 5 if len(esc) != 4: msg = "Invalid \\uXXXX escape" - raise ValueError(errmsg(msg, s, end)) + raise JSONDecodeError(msg, s, end) uni = int(esc, 16) # Check for surrogate pair on UCS-4 systems if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: msg = "Invalid \\uXXXX\\uXXXX surrogate pair" if not s[end + 5:end + 7] == '\\u': - raise ValueError(errmsg(msg, s, end)) + raise JSONDecodeError(msg, s, end) esc2 = s[end + 7:end + 11] if len(esc2) != 4: - raise ValueError(errmsg(msg, s, end)) + raise JSONDecodeError(msg, s, end) uni2 = int(esc2, 16) uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) next_end += 6 @@ -145,8 +177,14 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) WHITESPACE_STR = ' \t\n\r' -def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): - pairs = {} +def JSONObject((s, end), encoding, strict, scan_once, object_hook, + object_pairs_hook, memo=None, + _w=WHITESPACE.match, _ws=WHITESPACE_STR): + # Backwards compatibility + if memo is None: + memo = {} + memo_get = memo.setdefault + pairs = [] # Use a slice to prevent IndexError from being raised, the following # check will raise a more specific ValueError if the string is empty nextchar = s[end:end + 1] @@ -157,19 +195,26 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE nextchar = s[end:end + 1] # Trivial empty object if nextchar == '}': + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + 1 + pairs = {} + if object_hook is not None: + pairs = object_hook(pairs) return pairs, end + 1 elif nextchar != '"': - raise ValueError(errmsg("Expecting property name", s, end)) + raise JSONDecodeError("Expecting property name", s, end) end += 1 while True: key, end = scanstring(s, end, encoding, strict) + key = memo_get(key, key) # To skip some function call overhead we optimize the fast paths where # the JSON key separator is ": " or just ":". if s[end:end + 1] != ':': end = _w(s, end).end() if s[end:end + 1] != ':': - raise ValueError(errmsg("Expecting : delimiter", s, end)) + raise JSONDecodeError("Expecting : delimiter", s, end) end += 1 @@ -184,8 +229,8 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE try: value, end = scan_once(s, end) except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) - pairs[key] = value + raise JSONDecodeError("Expecting object", s, end) + pairs.append((key, value)) try: nextchar = s[end] @@ -199,7 +244,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE if nextchar == '}': break elif nextchar != ',': - raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + raise JSONDecodeError("Expecting , delimiter", s, end - 1) try: nextchar = s[end] @@ -214,8 +259,12 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE end += 1 if nextchar != '"': - raise ValueError(errmsg("Expecting property name", s, end - 1)) + raise JSONDecodeError("Expecting property name", s, end - 1) + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + pairs = dict(pairs) if object_hook is not None: pairs = object_hook(pairs) return pairs, end @@ -234,7 +283,7 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): try: value, end = scan_once(s, end) except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) + raise JSONDecodeError("Expecting object", s, end) _append(value) nextchar = s[end:end + 1] if nextchar in _ws: @@ -244,7 +293,7 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): if nextchar == ']': break elif nextchar != ',': - raise ValueError(errmsg("Expecting , delimiter", s, end)) + raise JSONDecodeError("Expecting , delimiter", s, end) try: if s[end] in _ws: @@ -287,37 +336,54 @@ class JSONDecoder(object): """ def __init__(self, encoding=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, strict=True): - """``encoding`` determines the encoding used to interpret any ``str`` - objects decoded by this instance (utf-8 by default). It has no - effect when decoding ``unicode`` objects. + parse_int=None, parse_constant=None, strict=True, + object_pairs_hook=None): + """ + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. Note that currently only encodings that are a superset of ASCII work, - strings of other encodings should be passed in as ``unicode``. + strings of other encodings should be passed in as :class:`unicode`. - ``object_hook``, if specified, will be called with the result - of every JSON object decoded and its return value will be used in - place of the given ``dict``. This can be used to provide custom + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom deserializations (e.g. to support JSON-RPC class hinting). - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). - - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). - - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN. - This can be used to raise an exception if invalid JSON numbers - are encountered. + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + *strict* controls the parser's behavior when it encounters an + invalid control character in a string. The default setting of + ``True`` means that unescaped control characters are parse errors, if + ``False`` then control characters will be allowed in strings. """ self.encoding = encoding self.object_hook = object_hook + self.object_pairs_hook = object_pairs_hook self.parse_float = parse_float or float self.parse_int = parse_int or int self.parse_constant = parse_constant or _CONSTANTS.__getitem__ @@ -325,6 +391,7 @@ def __init__(self, encoding=None, object_hook=None, parse_float=None, self.parse_object = JSONObject self.parse_array = JSONArray self.parse_string = scanstring + self.memo = {} self.scan_once = make_scanner(self) def decode(self, s, _w=WHITESPACE.match): @@ -335,12 +402,12 @@ def decode(self, s, _w=WHITESPACE.match): obj, end = self.raw_decode(s, idx=_w(s, 0).end()) end = _w(s, end).end() if end != len(s): - raise ValueError(errmsg("Extra data", s, end, len(s))) + raise JSONDecodeError("Extra data", s, end, len(s)) return obj def raw_decode(self, s, idx=0): - """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning - with a JSON document) and return a 2-tuple of the Python + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` + beginning with a JSON document) and return a 2-tuple of the Python representation and the index in ``s`` where the document ended. This can be used to decode a JSON document from a string that may @@ -350,5 +417,5 @@ def raw_decode(self, s, idx=0): try: obj, end = self.scan_once(s, idx) except StopIteration: - raise ValueError("No JSON object could be decoded") + raise JSONDecodeError("No JSON object could be decoded", s, idx) return obj, end diff --git a/lib/simplejson/encoder.py b/lib/simplejson/encoder.py old mode 100644 new mode 100755 index 15c35f7a1b..08a986b6e4 --- a/lib/simplejson/encoder.py +++ b/lib/simplejson/encoder.py @@ -1,17 +1,19 @@ """Implementation of JSONEncoder """ import re +from decimal import Decimal -try: - from lib.simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii -except ImportError: - c_encode_basestring_ascii = None -try: - from lib.simplejson._speedups import make_encoder as c_make_encoder -except ImportError: - c_make_encoder = None +def _import_speedups(): + try: + from simplejson import _speedups + return _speedups.encode_basestring_ascii, _speedups.make_encoder + except ImportError: + return None, None +c_encode_basestring_ascii, c_make_encoder = _import_speedups() -ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +from lib.simplejson.decoder import PosInf + +ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -22,22 +24,24 @@ '\n': '\\n', '\r': '\\r', '\t': '\\t', + u'\u2028': '\\u2028', + u'\u2029': '\\u2029', } for i in range(0x20): #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) -# Assume this produces an infinity on all machines (probably not guaranteed) -INFINITY = float('1e66666') FLOAT_REPR = repr def encode_basestring(s): """Return a JSON representation of a Python string """ + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): return ESCAPE_DCT[match.group(0)] - return '"' + ESCAPE.sub(replace, s) + '"' + return u'"' + ESCAPE.sub(replace, s) + u'"' def py_encode_basestring_ascii(s): @@ -65,7 +69,8 @@ def replace(match): return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' -encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii +encode_basestring_ascii = ( + c_encode_basestring_ascii or py_encode_basestring_ascii) class JSONEncoder(object): """Extensible JSON encoder for Python data structures. @@ -75,7 +80,7 @@ class JSONEncoder(object): +-------------------+---------------+ | Python | JSON | +===================+===============+ - | dict | object | + | dict, namedtuple | object | +-------------------+---------------+ | list, tuple | array | +-------------------+---------------+ @@ -100,7 +105,9 @@ class JSONEncoder(object): key_separator = ': ' def __init__(self, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, encoding='utf-8', default=None): + indent=None, separators=None, encoding='utf-8', default=None, + use_decimal=True, namedtuple_as_object=True, + tuple_as_array=True): """Constructor for JSONEncoder, with sensible defaults. If skipkeys is false, then it is a TypeError to attempt @@ -125,10 +132,12 @@ def __init__(self, skipkeys=False, ensure_ascii=True, sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis. - If indent is a non-negative integer, then JSON array - elements and object members will be pretty-printed with that - indent level. An indent level of 0 will only insert newlines. - None is the most compact representation. + If indent is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. If specified, separators should be a (item_separator, key_separator) tuple. The default is (', ', ': '). To get the most compact JSON @@ -142,6 +151,15 @@ def __init__(self, skipkeys=False, ensure_ascii=True, transformed into unicode using that encoding prior to JSON-encoding. The default is UTF-8. + If use_decimal is true (not the default), ``decimal.Decimal`` will + be supported directly by the encoder. For the inverse, decode JSON + with ``parse_float=decimal.Decimal``. + + If namedtuple_as_object is true (the default), tuple subclasses with + ``_asdict()`` methods will be encoded as JSON objects. + + If tuple_as_array is true (the default), tuple (and subclasses) will + be encoded as JSON arrays. """ self.skipkeys = skipkeys @@ -149,9 +167,16 @@ def __init__(self, skipkeys=False, ensure_ascii=True, self.check_circular = check_circular self.allow_nan = allow_nan self.sort_keys = sort_keys + self.use_decimal = use_decimal + self.namedtuple_as_object = namedtuple_as_object + self.tuple_as_array = tuple_as_array + if isinstance(indent, (int, long)): + indent = ' ' * indent self.indent = indent if separators is not None: self.item_separator, self.key_separator = separators + elif indent is not None: + self.item_separator = ',' if default is not None: self.default = default self.encoding = encoding @@ -179,6 +204,7 @@ def default(self, o): def encode(self, o): """Return a JSON string representation of a Python data structure. + >>> from simplejson import JSONEncoder >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) '{"foo": ["bar", "baz"]}' @@ -200,7 +226,10 @@ def encode(self, o): chunks = self.iterencode(o, _one_shot=True) if not isinstance(chunks, (list, tuple)): chunks = list(chunks) - return ''.join(chunks) + if self.ensure_ascii: + return ''.join(chunks) + else: + return u''.join(chunks) def iterencode(self, o, _one_shot=False): """Encode the given object and yield each string @@ -226,9 +255,11 @@ def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): o = o.decode(_encoding) return _orig_encoder(o) - def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): - # Check for specials. Note that this type of test is processor- and/or - # platform-specific, so do tests which don't depend on the internals. + def floatstr(o, allow_nan=self.allow_nan, + _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): + # Check for specials. Note that this type of test is processor + # and/or platform-specific, so do tests which don't depend on + # the internals. if o != o: text = 'NaN' @@ -247,24 +278,62 @@ def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _negi return text - if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + key_memo = {} + if (_one_shot and c_make_encoder is not None + and self.indent is None): _iterencode = c_make_encoder( markers, self.default, _encoder, self.indent, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, self.allow_nan) + self.skipkeys, self.allow_nan, key_memo, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array) else: _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, _one_shot) - return _iterencode(o, 0) + self.skipkeys, _one_shot, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array) + try: + return _iterencode(o, 0) + finally: + key_memo.clear() + -def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, +class JSONEncoderForHTML(JSONEncoder): + """An encoder that produces JSON safe to embed in HTML. + + To embed JSON content in, say, a script tag on a web page, the + characters &, < and > should be escaped. They cannot be escaped + with the usual entities (e.g. &) because they are not expanded + within