Skip to content

Commit dc3eaa8

Browse files
committed
Issue #23206: Make json.dumps(..., ensure_ascii=False) as fast as the default case of ensure_ascii=True. Patch by Naoki Inada.
1 parent 2cae11e commit dc3eaa8

File tree

5 files changed

+142
-7
lines changed

5 files changed

+142
-7
lines changed

Lib/json/encoder.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
from _json import encode_basestring_ascii as c_encode_basestring_ascii
77
except ImportError:
88
c_encode_basestring_ascii = None
9+
try:
10+
from _json import encode_basestring as c_encode_basestring
11+
except ImportError:
12+
c_encode_basestring = None
913
try:
1014
from _json import make_encoder as c_make_encoder
1115
except ImportError:
@@ -30,7 +34,7 @@
3034
INFINITY = float('inf')
3135
FLOAT_REPR = repr
3236

33-
def encode_basestring(s):
37+
def py_encode_basestring(s):
3438
"""Return a JSON representation of a Python string
3539
3640
"""
@@ -39,6 +43,9 @@ def replace(match):
3943
return '"' + ESCAPE.sub(replace, s) + '"'
4044

4145

46+
encode_basestring = (c_encode_basestring or py_encode_basestring)
47+
48+
4249
def py_encode_basestring_ascii(s):
4350
"""Return an ASCII-only JSON representation of a Python string
4451

Lib/test/test_json/test_encode_basestring_ascii.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@
1111
(' s p a c e d ', '" s p a c e d "'),
1212
('\U0001d120', '"\\ud834\\udd20"'),
1313
('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
14-
('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
15-
('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
16-
('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
1714
("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
1815
('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
1916
('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,7 @@ Ali Ikinci
626626
Aaron Iles
627627
Lars Immisch
628628
Bobby Impollonia
629+
Naoki Inada
629630
Meador Inge
630631
Peter Ingebretson
631632
Tony Ingraldi

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,9 @@ Core and Builtins
203203
Library
204204
-------
205205

206+
- Issue #23206: Make ``json.dumps(..., ensure_ascii=False)`` as fast as the
207+
default case of ``ensure_ascii=True``. Patch by Naoki Inada.
208+
206209
- Issue #23185: Add math.inf and math.nan constants.
207210

208211
- Issue #23186: Add ssl.SSLObject.shared_ciphers() and

Modules/_json.c

Lines changed: 130 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ typedef struct _PyEncoderObject {
4747
PyObject *item_separator;
4848
PyObject *sort_keys;
4949
PyObject *skipkeys;
50-
int fast_encode;
50+
PyCFunction fast_encode;
5151
int allow_nan;
5252
} PyEncoderObject;
5353

@@ -218,6 +218,97 @@ ascii_escape_unicode(PyObject *pystr)
218218
return rval;
219219
}
220220

221+
static PyObject *
222+
escape_unicode(PyObject *pystr)
223+
{
224+
/* Take a PyUnicode pystr and return a new escaped PyUnicode */
225+
Py_ssize_t i;
226+
Py_ssize_t input_chars;
227+
Py_ssize_t output_size;
228+
Py_ssize_t chars;
229+
PyObject *rval;
230+
void *input;
231+
int kind;
232+
Py_UCS4 maxchar;
233+
234+
if (PyUnicode_READY(pystr) == -1)
235+
return NULL;
236+
237+
maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
238+
input_chars = PyUnicode_GET_LENGTH(pystr);
239+
input = PyUnicode_DATA(pystr);
240+
kind = PyUnicode_KIND(pystr);
241+
242+
/* Compute the output size */
243+
for (i = 0, output_size = 2; i < input_chars; i++) {
244+
Py_UCS4 c = PyUnicode_READ(kind, input, i);
245+
switch (c) {
246+
case '\\': case '"': case '\b': case '\f':
247+
case '\n': case '\r': case '\t':
248+
output_size += 2;
249+
break;
250+
default:
251+
if (c <= 0x1f)
252+
output_size += 6;
253+
else
254+
output_size++;
255+
}
256+
}
257+
258+
rval = PyUnicode_New(output_size, maxchar);
259+
if (rval == NULL)
260+
return NULL;
261+
262+
kind = PyUnicode_KIND(rval);
263+
264+
#define ENCODE_OUTPUT do { \
265+
chars = 0; \
266+
output[chars++] = '"'; \
267+
for (i = 0; i < input_chars; i++) { \
268+
Py_UCS4 c = PyUnicode_READ(kind, input, i); \
269+
switch (c) { \
270+
case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
271+
case '"': output[chars++] = '\\'; output[chars++] = c; break; \
272+
case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
273+
case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
274+
case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
275+
case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
276+
case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
277+
default: \
278+
if (c <= 0x1f) { \
279+
output[chars++] = '\\'; \
280+
output[chars++] = 'u'; \
281+
output[chars++] = '0'; \
282+
output[chars++] = '0'; \
283+
output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
284+
output[chars++] = Py_hexdigits[(c ) & 0xf]; \
285+
} else { \
286+
output[chars++] = c; \
287+
} \
288+
} \
289+
} \
290+
output[chars++] = '"'; \
291+
} while (0)
292+
293+
if (kind == PyUnicode_1BYTE_KIND) {
294+
Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
295+
ENCODE_OUTPUT;
296+
} else if (kind == PyUnicode_2BYTE_KIND) {
297+
Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
298+
ENCODE_OUTPUT;
299+
} else {
300+
Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
301+
assert(kind == PyUnicode_4BYTE_KIND);
302+
ENCODE_OUTPUT;
303+
}
304+
#undef ENCODE_OUTPUT
305+
306+
#ifdef Py_DEBUG
307+
assert(_PyUnicode_CheckConsistency(rval, 1));
308+
#endif
309+
return rval;
310+
}
311+
221312
static void
222313
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
223314
{
@@ -530,6 +621,31 @@ py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
530621
return rval;
531622
}
532623

624+
625+
PyDoc_STRVAR(pydoc_encode_basestring,
626+
"encode_basestring(string) -> string\n"
627+
"\n"
628+
"Return a JSON representation of a Python string"
629+
);
630+
631+
static PyObject *
632+
py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
633+
{
634+
PyObject *rval;
635+
/* Return a JSON representation of a Python string */
636+
/* METH_O */
637+
if (PyUnicode_Check(pystr)) {
638+
rval = escape_unicode(pystr);
639+
}
640+
else {
641+
PyErr_Format(PyExc_TypeError,
642+
"first argument must be a string, not %.80s",
643+
Py_TYPE(pystr)->tp_name);
644+
return NULL;
645+
}
646+
return rval;
647+
}
648+
533649
static void
534650
scanner_dealloc(PyObject *self)
535651
{
@@ -1223,7 +1339,14 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
12231339
s->item_separator = item_separator;
12241340
s->sort_keys = sort_keys;
12251341
s->skipkeys = skipkeys;
1226-
s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1342+
s->fast_encode = NULL;
1343+
if (PyCFunction_Check(s->encoder)) {
1344+
PyCFunction f = PyCFunction_GetFunction(s->encoder);
1345+
if (f == (PyCFunction)py_encode_basestring_ascii ||
1346+
f == (PyCFunction)py_encode_basestring) {
1347+
s->fast_encode = f;
1348+
}
1349+
}
12271350
s->allow_nan = PyObject_IsTrue(allow_nan);
12281351

12291352
Py_INCREF(s->markers);
@@ -1372,7 +1495,7 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj)
13721495
{
13731496
/* Return the JSON representation of a string */
13741497
if (s->fast_encode)
1375-
return py_encode_basestring_ascii(NULL, obj);
1498+
return s->fast_encode(NULL, obj);
13761499
else
13771500
return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
13781501
}
@@ -1840,6 +1963,10 @@ static PyMethodDef speedups_methods[] = {
18401963
(PyCFunction)py_encode_basestring_ascii,
18411964
METH_O,
18421965
pydoc_encode_basestring_ascii},
1966+
{"encode_basestring",
1967+
(PyCFunction)py_encode_basestring,
1968+
METH_O,
1969+
pydoc_encode_basestring},
18431970
{"scanstring",
18441971
(PyCFunction)py_scanstring,
18451972
METH_VARARGS,

0 commit comments

Comments
 (0)