Skip to content

Commit

Permalink
encoding memoization
Browse files Browse the repository at this point in the history
git-svn-id: http://simplejson.googlecode.com/svn/trunk@201 a4795897-2c25-0410-b006-0d3caba88fa1
  • Loading branch information
etrepum committed Dec 27, 2009
1 parent 6cccfee commit c5042a1
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 15 deletions.
3 changes: 3 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
Version 2.1.0 released XXXX-XX-XX

* Memoization of object keys during encoding (when using speedups)
* Encoder changed to use PyIter_Next for list iteration to avoid
potential threading issues
* Encoder changed to use iteritems rather than PyDict_Next in order to
support dict subclasses that have a well defined ordering
http://bugs.python.org/issue6105
Expand Down
41 changes: 28 additions & 13 deletions simplejson/_speedups.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ typedef struct _PyEncoderObject {
PyObject *item_separator;
PyObject *sort_keys;
PyObject *skipkeys;
PyObject *key_memo;
int fast_encode;
int allow_nan;
} PyEncoderObject;
Expand All @@ -90,6 +91,7 @@ static PyMemberDef encoder_members[] = {
{"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
{"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
{"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
{"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
{NULL}
};

Expand Down Expand Up @@ -1864,6 +1866,7 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
s->item_separator = NULL;
s->sort_keys = NULL;
s->skipkeys = NULL;
s->key_memo = NULL;
}
return (PyObject *)s;
}
Expand All @@ -1872,18 +1875,18 @@ static int
encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
{
/* initialize Encoder object */
static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", NULL};

PyEncoderObject *s;
PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo;

assert(PyEncoder_Check(self));
s = (PyEncoderObject *)self;

if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOO:make_encoder", kwlist,
&markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
&sort_keys, &skipkeys, &allow_nan))
&sort_keys, &skipkeys, &allow_nan, &key_memo))
return -1;

s->markers = markers;
Expand All @@ -1894,6 +1897,7 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
s->item_separator = item_separator;
s->sort_keys = sort_keys;
s->skipkeys = skipkeys;
s->key_memo = key_memo;
s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
s->allow_nan = PyObject_IsTrue(allow_nan);

Expand All @@ -1905,6 +1909,7 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
Py_INCREF(s->item_separator);
Py_INCREF(s->sort_keys);
Py_INCREF(s->skipkeys);
Py_INCREF(s->key_memo);
return 0;
}

Expand Down Expand Up @@ -2101,6 +2106,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
PyObject *key, *value;
PyObject *iter = NULL;
PyObject *item = NULL;
PyObject *encoded = NULL;
int skipkeys;
Py_ssize_t idx;

Expand Down Expand Up @@ -2152,16 +2158,19 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
if (iter == NULL)
goto bail;
while ((item = PyIter_Next(iter))) {
PyObject *encoded;

key = PyTuple_GetItem(item, 0);
if (key == NULL)
goto bail;
value = PyTuple_GetItem(item, 1);
if (value == NULL)
goto bail;

if (PyString_Check(key) || PyUnicode_Check(key)) {

encoded = PyDict_GetItem(s->key_memo, key);
if (encoded != NULL) {
Py_INCREF(encoded);
}
else if (PyString_Check(key) || PyUnicode_Check(key)) {
Py_INCREF(key);
kstr = key;
}
Expand Down Expand Up @@ -2195,15 +2204,18 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
goto bail;
}

encoded = encoder_encode_string(s, kstr);
Py_CLEAR(kstr);
if (encoded == NULL)
goto bail;
if (encoded == NULL) {
encoded = encoder_encode_string(s, kstr);
Py_CLEAR(kstr);
if (encoded == NULL)
goto bail;
if (PyDict_SetItem(s->key_memo, key, encoded))
goto bail;
}
if (PyList_Append(rval, encoded)) {
Py_DECREF(encoded);
goto bail;
}
Py_DECREF(encoded);
Py_CLEAR(encoded);
if (PyList_Append(rval, s->key_separator))
goto bail;
if (encoder_listencode_obj(s, rval, value, indent_level))
Expand Down Expand Up @@ -2231,6 +2243,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
return 0;

bail:
Py_XDECREF(encoded);
Py_XDECREF(item);
Py_XDECREF(iter);
Py_XDECREF(kstr);
Expand Down Expand Up @@ -2355,6 +2368,7 @@ encoder_traverse(PyObject *self, visitproc visit, void *arg)
Py_VISIT(s->item_separator);
Py_VISIT(s->sort_keys);
Py_VISIT(s->skipkeys);
Py_VISIT(s->key_memo);
return 0;
}

Expand All @@ -2373,6 +2387,7 @@ encoder_clear(PyObject *self)
Py_CLEAR(s->item_separator);
Py_CLEAR(s->sort_keys);
Py_CLEAR(s->skipkeys);
Py_CLEAR(s->key_memo);
return 0;
}

Expand Down
8 changes: 6 additions & 2 deletions simplejson/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,18 +261,22 @@ def floatstr(o, allow_nan=self.allow_nan,
return text


key_memo = {}
if (_one_shot and c_make_encoder is not None
and not self.indent and not self.sort_keys):
_iterencode = c_make_encoder(
markers, self.default, _encoder, self.indent,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan)
self.skipkeys, self.allow_nan, key_memo)
else:
_iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot)
return _iterencode(o, 0)
try:
return _iterencode(o, 0)
finally:
key_memo.clear()

def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
Expand Down

0 comments on commit c5042a1

Please sign in to comment.