From 3c16b843681f54130ee6a022275289cadb2f2a69 Mon Sep 17 00:00:00 2001 From: tav Date: Tue, 17 May 2011 08:45:09 +0100 Subject: [PATCH] Updated to the latest git version of simplejson. --- simplejson/__init__.py | 70 ++++++--- simplejson/_speedups.c | 331 +++++++++++++++++++++++++++-------------- simplejson/decoder.py | 19 ++- simplejson/encoder.py | 45 ++++-- simplejson/scanner.py | 11 +- simplejson/tool.py | 39 +++++ 6 files changed, 360 insertions(+), 155 deletions(-) create mode 100644 simplejson/tool.py diff --git a/simplejson/__init__.py b/simplejson/__init__.py index f6d5679..dd55861 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -97,7 +97,7 @@ $ echo '{ 1.2:3.4}' | python -m simplejson.tool Expecting property name: line 1 column 2 (char 2) """ -__version__ = '2.1.0rc2' +__version__ = '2.1.7' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', @@ -106,12 +106,25 @@ __author__ = 'Bob Ippolito ' +from decimal import Decimal + from decoder import JSONDecoder, JSONDecodeError from encoder import JSONEncoder -try: - from collections import OrderedDict -except ImportError: - from ordered_dict import OrderedDict +def _import_OrderedDict(): + import collections + try: + return collections.OrderedDict + except AttributeError: + import ordered_dict + return ordered_dict.OrderedDict +OrderedDict = _import_OrderedDict() + +def _import_c_make_encoder(): + try: + from simplejson._speedups import make_encoder + return make_encoder + except ImportError: + return None _default_encoder = JSONEncoder( skipkeys=False, @@ -122,11 +135,12 @@ separators=None, encoding='utf-8', default=None, + use_decimal=False, ) def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): + encoding='utf-8', default=None, use_decimal=False, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). @@ -165,6 +179,9 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. + If *use_decimal* is true (default: ``False``) then decimal.Decimal + will be natively serialized to JSON with full precision. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with the ``cls`` kwarg. @@ -174,7 +191,8 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): + encoding == 'utf-8' and default is None and not use_decimal + and not kw): iterable = _default_encoder.iterencode(obj) else: if cls is None: @@ -182,7 +200,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, encoding=encoding, - default=default, **kw).iterencode(obj) + default=default, use_decimal=use_decimal, **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost for chunk in iterable: @@ -191,7 +209,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): + encoding='utf-8', default=None, use_decimal=False, **kw): """Serialize ``obj`` to a JSON formatted ``str``. If ``skipkeys`` is false then ``dict`` keys that are not basic types @@ -227,6 +245,9 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. + If *use_decimal* is true (default: ``False``) then decimal.Decimal + will be natively serialized to JSON with full precision. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with the ``cls`` kwarg. @@ -236,7 +257,8 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): + encoding == 'utf-8' and default is None and not use_decimal + and not kw): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder @@ -244,7 +266,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, encoding=encoding, default=default, - **kw).encode(obj) + use_decimal=use_decimal, **kw).encode(obj) _default_decoder = JSONDecoder(encoding=None, object_hook=None, @@ -252,7 +274,8 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, **kw): """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing a JSON document) to a Python object. @@ -292,6 +315,9 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, can be used to raise an exception if invalid JSON numbers are encountered. + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg. @@ -300,11 +326,12 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, - **kw) + use_decimal=use_decimal, **kw) def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, **kw): """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) to a Python object. @@ -344,13 +371,17 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, can be used to raise an exception if invalid JSON numbers are encountered. + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg. """ if (cls is None and encoding is None and object_hook is None and parse_int is None and parse_float is None and - parse_constant is None and object_pairs_hook is None and not kw): + parse_constant is None and object_pairs_hook is None + and not use_decimal and not kw): return _default_decoder.decode(s) if cls is None: cls = JSONDecoder @@ -364,6 +395,10 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, kw['parse_int'] = parse_int if parse_constant is not None: kw['parse_constant'] = parse_constant + if use_decimal: + if parse_float is not None: + raise TypeError("use_decimal=True implies parse_float=Decimal") + kw['parse_float'] = Decimal return cls(encoding=encoding, **kw).decode(s) @@ -371,10 +406,7 @@ def _toggle_speedups(enabled): import simplejson.decoder as dec import simplejson.encoder as enc import simplejson.scanner as scan - try: - from simplejson._speedups import make_encoder as c_make_encoder - except ImportError: - c_make_encoder = None + c_make_encoder = _import_c_make_encoder() if enabled: dec.scanstring = dec.c_scanstring or dec.py_scanstring enc.c_make_encoder = c_make_encoder diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c index 14efa70..8b34747 100644 --- a/simplejson/_speedups.c +++ b/simplejson/_speedups.c @@ -18,6 +18,9 @@ json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exce #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) #endif +#if PY_VERSION_HEX < 0x02060000 && !defined(Py_SIZE) +#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) +#endif #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) typedef int Py_ssize_t; #define PY_SSIZE_T_MAX INT_MAX @@ -41,9 +44,11 @@ typedef int Py_ssize_t; #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) +#define Decimal_Check(op) (PyObject_TypeCheck(op, DecimalTypePtr)) static PyTypeObject PyScannerType; static PyTypeObject PyEncoderType; +static PyTypeObject *DecimalTypePtr; typedef struct _PyScannerObject { PyObject_HEAD @@ -81,6 +86,7 @@ typedef struct _PyEncoderObject { PyObject *key_memo; int fast_encode; int allow_nan; + int use_decimal; } PyEncoderObject; static PyMemberDef encoder_members[] = { @@ -503,8 +509,9 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s } /* Pick up this chunk if it's not zero length */ if (next != end) { + PyObject *strchunk; APPEND_OLD_CHUNK - PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); + strchunk = PyString_FromStringAndSize(&buf[end], next - end); if (strchunk == NULL) { goto bail; } @@ -1621,68 +1628,92 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n */ char *str = PyString_AS_STRING(pystr); Py_ssize_t length = PyString_GET_SIZE(pystr); + PyObject *rval = NULL; + int fallthrough = 0; if (idx >= length) { PyErr_SetNone(PyExc_StopIteration); return NULL; } + if (Py_EnterRecursiveCall(" while decoding a JSON document")) + return NULL; switch (str[idx]) { case '"': /* string */ - return scanstring_str(pystr, idx + 1, + rval = scanstring_str(pystr, idx + 1, PyString_AS_STRING(s->encoding), PyObject_IsTrue(s->strict), next_idx_ptr); + break; case '{': /* object */ - return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + break; case '[': /* array */ - return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + break; case 'n': /* null */ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; - return Py_None; + rval = Py_None; } + else + fallthrough = 1; break; case 't': /* true */ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; - return Py_True; + rval = Py_True; } + else + fallthrough = 1; break; case 'f': /* false */ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; - return Py_False; + rval = Py_False; } + else + fallthrough = 1; break; case 'N': /* NaN */ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); + rval = _parse_constant(s, "NaN", idx, next_idx_ptr); } + else + fallthrough = 1; break; case 'I': /* Infinity */ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); } + else + fallthrough = 1; break; case '-': /* -Infinity */ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); } + else + fallthrough = 1; break; + default: + fallthrough = 1; } /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_str(s, pystr, idx, next_idx_ptr); + if (fallthrough) + rval = _match_number_str(s, pystr, idx, next_idx_ptr); + Py_LeaveRecursiveCall(); + return rval; } static PyObject * @@ -1697,67 +1728,91 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ */ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); Py_ssize_t length = PyUnicode_GET_SIZE(pystr); + PyObject *rval = NULL; + int fallthrough = 0; if (idx >= length) { PyErr_SetNone(PyExc_StopIteration); return NULL; } + if (Py_EnterRecursiveCall(" while decoding a JSON document")) + return NULL; switch (str[idx]) { case '"': /* string */ - return scanstring_unicode(pystr, idx + 1, + rval = scanstring_unicode(pystr, idx + 1, PyObject_IsTrue(s->strict), next_idx_ptr); + break; case '{': /* object */ - return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + break; case '[': /* array */ - return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + break; case 'n': /* null */ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; - return Py_None; + rval = Py_None; } + else + fallthrough = 1; break; case 't': /* true */ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; - return Py_True; + rval = Py_True; } + else + fallthrough = 1; break; case 'f': /* false */ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; - return Py_False; + rval = Py_False; } + else + fallthrough = 1; break; case 'N': /* NaN */ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); + rval = _parse_constant(s, "NaN", idx, next_idx_ptr); } + else + fallthrough = 1; break; case 'I': /* Infinity */ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); } + else + fallthrough = 1; break; case '-': /* -Infinity */ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); } + else + fallthrough = 1; break; + default: + fallthrough = 1; } /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_unicode(s, pystr, idx, next_idx_ptr); + if (fallthrough) + rval = _match_number_unicode(s, pystr, idx, next_idx_ptr); + Py_LeaveRecursiveCall(); + return rval; } static PyObject * @@ -1946,18 +2001,18 @@ static int encoder_init(PyObject *self, PyObject *args, PyObject *kwds) { /* initialize Encoder object */ - static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", NULL}; + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", NULL}; PyEncoderObject *s; PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; - PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo; + PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo, *use_decimal; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOO:make_encoder", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOO:make_encoder", kwlist, &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, - &sort_keys, &skipkeys, &allow_nan, &key_memo)) + &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal)) return -1; s->markers = markers; @@ -1971,6 +2026,7 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds) s->key_memo = key_memo; s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); s->allow_nan = PyObject_IsTrue(allow_nan); + s->use_decimal = PyObject_IsTrue(use_decimal); Py_INCREF(s->markers); Py_INCREF(s->defaultfn); @@ -2089,79 +2145,84 @@ static int encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) { /* Encode Python object obj to a JSON term, rval is a PyList */ - PyObject *newobj; - int rv; - - if (obj == Py_None || obj == Py_True || obj == Py_False) { - PyObject *cstr = _encoded_const(obj); - if (cstr == NULL) - return -1; - return _steal_list_append(rval, cstr); - } - else if (PyString_Check(obj) || PyUnicode_Check(obj)) - { - PyObject *encoded = encoder_encode_string(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyInt_Check(obj) || PyLong_Check(obj)) { - PyObject *encoded = PyObject_Str(obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyFloat_Check(obj)) { - PyObject *encoded = encoder_encode_float(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyList_Check(obj) || PyTuple_Check(obj)) { - return encoder_listencode_list(s, rval, obj, indent_level); - } - else if (PyDict_Check(obj)) { - return encoder_listencode_dict(s, rval, obj, indent_level); - } - else { - PyObject *ident = NULL; - if (s->markers != Py_None) { - int has_key; - ident = PyLong_FromVoidPtr(obj); - if (ident == NULL) - return -1; - has_key = PyDict_Contains(s->markers, ident); - if (has_key) { - if (has_key != -1) - PyErr_SetString(PyExc_ValueError, "Circular reference detected"); - Py_DECREF(ident); - return -1; - } - if (PyDict_SetItem(s->markers, ident, obj)) { - Py_DECREF(ident); - return -1; - } - } - newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); - if (newobj == NULL) { - Py_XDECREF(ident); - return -1; - } - rv = encoder_listencode_obj(s, rval, newobj, indent_level); - Py_DECREF(newobj); - if (rv) { - Py_XDECREF(ident); - return -1; + int rv = -1; + if (Py_EnterRecursiveCall(" while encoding a JSON document")) + return rv; + do { + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr != NULL) + rv = _steal_list_append(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded != NULL) + rv = _steal_list_append(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded != NULL) + rv = _steal_list_append(rval, encoded); + } + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded != NULL) + rv = _steal_list_append(rval, encoded); + } + else if (PyList_Check(obj) || PyTuple_Check(obj)) { + rv = encoder_listencode_list(s, rval, obj, indent_level); + } + else if (PyDict_Check(obj)) { + rv = encoder_listencode_dict(s, rval, obj, indent_level); + } + else if (s->use_decimal && Decimal_Check(obj)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded != NULL) + rv = _steal_list_append(rval, encoded); } - if (ident != NULL) { - if (PyDict_DelItem(s->markers, ident)) { + else { + PyObject *ident = NULL; + PyObject *newobj; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + break; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + break; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + break; + } + } + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + break; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + rv = -1; + } + else if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + rv = -1; + } Py_XDECREF(ident); - return -1; } - Py_XDECREF(ident); } - return rv; - } + } while (0); + Py_LeaveRecursiveCall(); + return rv; } static int @@ -2174,9 +2235,9 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss static PyObject *iteritems = NULL; PyObject *kstr = NULL; PyObject *ident = NULL; - PyObject *key, *value; PyObject *iter = NULL; PyObject *item = NULL; + PyObject *items = NULL; PyObject *encoded = NULL; int skipkeys; Py_ssize_t idx; @@ -2221,22 +2282,61 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss */ } - /* TODO: C speedup not implemented for sort_keys */ + if (PyObject_IsTrue(s->sort_keys)) { + /* First sort the keys then replace them with (key, value) tuples. */ + Py_ssize_t i, nitems; + if (PyDict_CheckExact(dct)) + items = PyDict_Keys(dct); + else + items = PyMapping_Keys(dct); + if (items == NULL) + goto bail; + if (!PyList_Check(items)) { + PyErr_SetString(PyExc_ValueError, "keys must return list"); + goto bail; + } + if (PyList_Sort(items) < 0) + goto bail; + nitems = PyList_GET_SIZE(items); + for (i = 0; i < nitems; i++) { + PyObject *key, *value; + key = PyList_GET_ITEM(items, i); + value = PyDict_GetItem(dct, key); + item = PyTuple_Pack(2, key, value); + if (item == NULL) + goto bail; + PyList_SET_ITEM(items, i, item); + Py_DECREF(key); + } + } + else { + if (PyDict_CheckExact(dct)) + items = PyDict_Items(dct); + else + items = PyMapping_Items(dct); + } + if (items == NULL) + goto bail; + iter = PyObject_GetIter(items); + Py_DECREF(items); + if (iter == NULL) + goto bail; skipkeys = PyObject_IsTrue(s->skipkeys); idx = 0; - iter = PyObject_CallMethodObjArgs(dct, iteritems, NULL); - if (iter == NULL) - goto bail; while ((item = PyIter_Next(iter))) { - - key = PyTuple_GetItem(item, 0); + PyObject *encoded, *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; + } + key = PyTuple_GET_ITEM(item, 0); if (key == NULL) goto bail; - value = PyTuple_GetItem(item, 1); + value = PyTuple_GET_ITEM(item, 1); if (value == NULL) goto bail; - + encoded = PyDict_GetItem(s->key_memo, key); if (encoded != NULL) { Py_INCREF(encoded); @@ -2250,13 +2350,15 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss if (kstr == NULL) goto bail; } - else if (PyInt_Check(key) || PyLong_Check(key)) { - kstr = PyObject_Str(key); + else if (key == Py_True || key == Py_False || key == Py_None) { + /* This must come before the PyInt_Check because + True and False are also 1 and 0.*/ + kstr = _encoded_const(key); if (kstr == NULL) goto bail; } - else if (key == Py_True || key == Py_False || key == Py_None) { - kstr = _encoded_const(key); + else if (PyInt_Check(key) || PyLong_Check(key)) { + kstr = PyObject_Str(key); if (kstr == NULL) goto bail; } @@ -2266,7 +2368,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss } else { /* TODO: include repr of key */ - PyErr_SetString(PyExc_ValueError, "keys must be a string"); + PyErr_SetString(PyExc_TypeError, "keys must be a string"); goto bail; } @@ -2315,7 +2417,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss bail: Py_XDECREF(encoded); - Py_XDECREF(item); + Py_XDECREF(items); Py_XDECREF(iter); Py_XDECREF(kstr); Py_XDECREF(ident); @@ -2526,13 +2628,22 @@ PyDoc_STRVAR(module_doc, void init_speedups(void) { - PyObject *m; + PyObject *m, *decimal; PyScannerType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyScannerType) < 0) return; PyEncoderType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyEncoderType) < 0) return; + + decimal = PyImport_ImportModule("decimal"); + if (decimal == NULL) + return; + DecimalTypePtr = (PyTypeObject*)PyObject_GetAttrString(decimal, "Decimal"); + Py_DECREF(decimal); + if (DecimalTypePtr == NULL) + return; + m = Py_InitModule3("_speedups", speedups_methods, module_doc); Py_INCREF((PyObject*)&PyScannerType); PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); diff --git a/simplejson/decoder.py b/simplejson/decoder.py index 4bf4b18..e5496d6 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -5,10 +5,13 @@ import struct from simplejson.scanner import make_scanner -try: - from simplejson._speedups import scanstring as c_scanstring -except ImportError: - c_scanstring = None +def _import_c_scanstring(): + try: + from simplejson._speedups import scanstring + return scanstring + except ImportError: + return None +c_scanstring = _import_c_scanstring() __all__ = ['JSONDecoder'] @@ -28,7 +31,7 @@ def _floatconstants(): class JSONDecodeError(ValueError): """Subclass of ValueError with the following additional properties: - + msg: The unformatted error message doc: The JSON document being parsed pos: The start index of doc where parsing failed @@ -37,7 +40,7 @@ class JSONDecodeError(ValueError): colno: The column corresponding to pos endlineno: The line corresponding to end (may be None) endcolno: The column corresponding to end (may be None) - + """ def __init__(self, msg, doc, pos, end=None): ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) @@ -47,7 +50,7 @@ def __init__(self, msg, doc, pos, end=None): self.end = end self.lineno, self.colno = linecol(doc, pos) if end is not None: - self.endlineno, self.endcolno = linecol(doc, pos) + self.endlineno, self.endcolno = linecol(doc, end) else: self.endlineno, self.endcolno = None, None @@ -194,7 +197,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, if nextchar == '}': if object_pairs_hook is not None: result = object_pairs_hook(pairs) - return result, end + return result, end + 1 pairs = {} if object_hook is not None: pairs = object_hook(pairs) diff --git a/simplejson/encoder.py b/simplejson/encoder.py index 9a6ffab..c72bd7f 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -1,20 +1,19 @@ """Implementation of JSONEncoder """ import re +from decimal import Decimal -try: - from simplejson._speedups import encode_basestring_ascii as \ - c_encode_basestring_ascii -except ImportError: - c_encode_basestring_ascii = None -try: - from simplejson._speedups import make_encoder as c_make_encoder -except ImportError: - c_make_encoder = None +def _import_speedups(): + try: + from simplejson import _speedups + return _speedups.encode_basestring_ascii, _speedups.make_encoder + except ImportError: + return None, None +c_encode_basestring_ascii, c_make_encoder = _import_speedups() from simplejson.decoder import PosInf -ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -25,6 +24,8 @@ '\n': '\\n', '\r': '\\r', '\t': '\\t', + u'\u2028': '\\u2028', + u'\u2029': '\\u2029', } for i in range(0x20): #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) @@ -104,7 +105,8 @@ class JSONEncoder(object): key_separator = ': ' def __init__(self, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, encoding='utf-8', default=None): + indent=None, separators=None, encoding='utf-8', default=None, + use_decimal=False): """Constructor for JSONEncoder, with sensible defaults. If skipkeys is false, then it is a TypeError to attempt @@ -148,6 +150,10 @@ def __init__(self, skipkeys=False, ensure_ascii=True, transformed into unicode using that encoding prior to JSON-encoding. The default is UTF-8. + If use_decimal is true (not the default), ``decimal.Decimal`` will + be supported directly by the encoder. For the inverse, decode JSON + with ``parse_float=decimal.Decimal``. + """ self.skipkeys = skipkeys @@ -155,11 +161,14 @@ def __init__(self, skipkeys=False, ensure_ascii=True, self.check_circular = check_circular self.allow_nan = allow_nan self.sort_keys = sort_keys + self.use_decimal = use_decimal if isinstance(indent, (int, long)): indent = ' ' * indent self.indent = indent if separators is not None: self.item_separator, self.key_separator = separators + elif indent is not None: + self.item_separator = ',' if default is not None: self.default = default self.encoding = encoding @@ -263,16 +272,16 @@ def floatstr(o, allow_nan=self.allow_nan, key_memo = {} if (_one_shot and c_make_encoder is not None - and not self.indent and not self.sort_keys): + and self.indent is None): _iterencode = c_make_encoder( markers, self.default, _encoder, self.indent, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, self.allow_nan, key_memo) + self.skipkeys, self.allow_nan, key_memo, self.use_decimal) else: _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, _one_shot) + self.skipkeys, _one_shot, self.use_decimal) try: return _iterencode(o, 0) finally: @@ -308,11 +317,13 @@ def iterencode(self, o, _one_shot=False): def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + _use_decimal, ## HACK: hand-optimized bytecode; turn globals into locals False=False, True=True, ValueError=ValueError, basestring=basestring, + Decimal=Decimal, dict=dict, float=float, id=id, @@ -360,6 +371,8 @@ def _iterencode_list(lst, _current_indent_level): yield buf + str(value) elif isinstance(value, float): yield buf + _floatstr(value) + elif _use_decimal and isinstance(value, Decimal): + yield buf + str(value) else: yield buf if isinstance(value, (list, tuple)): @@ -438,6 +451,8 @@ def _iterencode_dict(dct, _current_indent_level): yield str(value) elif isinstance(value, float): yield _floatstr(value) + elif _use_decimal and isinstance(value, Decimal): + yield str(value) else: if isinstance(value, (list, tuple)): chunks = _iterencode_list(value, _current_indent_level) @@ -473,6 +488,8 @@ def _iterencode(o, _current_indent_level): elif isinstance(o, dict): for chunk in _iterencode_dict(o, _current_indent_level): yield chunk + elif _use_decimal and isinstance(o, Decimal): + yield str(o) else: if markers is not None: markerid = id(o) diff --git a/simplejson/scanner.py b/simplejson/scanner.py index c462b61..54593a3 100644 --- a/simplejson/scanner.py +++ b/simplejson/scanner.py @@ -1,10 +1,13 @@ """JSON token scanner """ import re -try: - from simplejson._speedups import make_scanner as c_make_scanner -except ImportError: - c_make_scanner = None +def _import_c_make_scanner(): + try: + from simplejson._speedups import make_scanner + return make_scanner + except ImportError: + return None +c_make_scanner = _import_c_make_scanner() __all__ = ['make_scanner'] diff --git a/simplejson/tool.py b/simplejson/tool.py new file mode 100644 index 0000000..73370db --- /dev/null +++ b/simplejson/tool.py @@ -0,0 +1,39 @@ +r"""Command-line tool to validate and pretty-print JSON + +Usage:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 2 (char 2) + +""" +import sys +import simplejson as json + +def main(): + if len(sys.argv) == 1: + infile = sys.stdin + outfile = sys.stdout + elif len(sys.argv) == 2: + infile = open(sys.argv[1], 'rb') + outfile = sys.stdout + elif len(sys.argv) == 3: + infile = open(sys.argv[1], 'rb') + outfile = open(sys.argv[2], 'wb') + else: + raise SystemExit(sys.argv[0] + " [infile [outfile]]") + try: + obj = json.load(infile, + object_pairs_hook=json.OrderedDict, + use_decimal=True) + except ValueError, e: + raise SystemExit(e) + json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True) + outfile.write('\n') + + +if __name__ == '__main__': + main()