@@ -47,7 +47,7 @@ typedef struct _PyEncoderObject {
4747 PyObject * item_separator ;
4848 PyObject * sort_keys ;
4949 PyObject * skipkeys ;
50- int fast_encode ;
50+ PyCFunction fast_encode ;
5151 int allow_nan ;
5252} PyEncoderObject ;
5353
@@ -218,6 +218,97 @@ ascii_escape_unicode(PyObject *pystr)
218218 return rval ;
219219}
220220
221+ static PyObject *
222+ escape_unicode (PyObject * pystr )
223+ {
224+ /* Take a PyUnicode pystr and return a new escaped PyUnicode */
225+ Py_ssize_t i ;
226+ Py_ssize_t input_chars ;
227+ Py_ssize_t output_size ;
228+ Py_ssize_t chars ;
229+ PyObject * rval ;
230+ void * input ;
231+ int kind ;
232+ Py_UCS4 maxchar ;
233+
234+ if (PyUnicode_READY (pystr ) == -1 )
235+ return NULL ;
236+
237+ maxchar = PyUnicode_MAX_CHAR_VALUE (pystr );
238+ input_chars = PyUnicode_GET_LENGTH (pystr );
239+ input = PyUnicode_DATA (pystr );
240+ kind = PyUnicode_KIND (pystr );
241+
242+ /* Compute the output size */
243+ for (i = 0 , output_size = 2 ; i < input_chars ; i ++ ) {
244+ Py_UCS4 c = PyUnicode_READ (kind , input , i );
245+ switch (c ) {
246+ case '\\' : case '"' : case '\b' : case '\f' :
247+ case '\n' : case '\r' : case '\t' :
248+ output_size += 2 ;
249+ break ;
250+ default :
251+ if (c <= 0x1f )
252+ output_size += 6 ;
253+ else
254+ output_size ++ ;
255+ }
256+ }
257+
258+ rval = PyUnicode_New (output_size , maxchar );
259+ if (rval == NULL )
260+ return NULL ;
261+
262+ kind = PyUnicode_KIND (rval );
263+
264+ #define ENCODE_OUTPUT do { \
265+ chars = 0; \
266+ output[chars++] = '"'; \
267+ for (i = 0; i < input_chars; i++) { \
268+ Py_UCS4 c = PyUnicode_READ(kind, input, i); \
269+ switch (c) { \
270+ case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
271+ case '"': output[chars++] = '\\'; output[chars++] = c; break; \
272+ case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
273+ case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
274+ case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
275+ case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
276+ case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
277+ default: \
278+ if (c <= 0x1f) { \
279+ output[chars++] = '\\'; \
280+ output[chars++] = 'u'; \
281+ output[chars++] = '0'; \
282+ output[chars++] = '0'; \
283+ output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
284+ output[chars++] = Py_hexdigits[(c ) & 0xf]; \
285+ } else { \
286+ output[chars++] = c; \
287+ } \
288+ } \
289+ } \
290+ output[chars++] = '"'; \
291+ } while (0)
292+
293+ if (kind == PyUnicode_1BYTE_KIND ) {
294+ Py_UCS1 * output = PyUnicode_1BYTE_DATA (rval );
295+ ENCODE_OUTPUT ;
296+ } else if (kind == PyUnicode_2BYTE_KIND ) {
297+ Py_UCS2 * output = PyUnicode_2BYTE_DATA (rval );
298+ ENCODE_OUTPUT ;
299+ } else {
300+ Py_UCS4 * output = PyUnicode_4BYTE_DATA (rval );
301+ assert (kind == PyUnicode_4BYTE_KIND );
302+ ENCODE_OUTPUT ;
303+ }
304+ #undef ENCODE_OUTPUT
305+
306+ #ifdef Py_DEBUG
307+ assert (_PyUnicode_CheckConsistency (rval , 1 ));
308+ #endif
309+ return rval ;
310+ }
311+
221312static void
222313raise_errmsg (char * msg , PyObject * s , Py_ssize_t end )
223314{
@@ -530,6 +621,31 @@ py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
530621 return rval ;
531622}
532623
624+
625+ PyDoc_STRVAR (pydoc_encode_basestring ,
626+ "encode_basestring(string) -> string\n"
627+ "\n"
628+ "Return a JSON representation of a Python string"
629+ );
630+
631+ static PyObject *
632+ py_encode_basestring (PyObject * self UNUSED , PyObject * pystr )
633+ {
634+ PyObject * rval ;
635+ /* Return a JSON representation of a Python string */
636+ /* METH_O */
637+ if (PyUnicode_Check (pystr )) {
638+ rval = escape_unicode (pystr );
639+ }
640+ else {
641+ PyErr_Format (PyExc_TypeError ,
642+ "first argument must be a string, not %.80s" ,
643+ Py_TYPE (pystr )-> tp_name );
644+ return NULL ;
645+ }
646+ return rval ;
647+ }
648+
533649static void
534650scanner_dealloc (PyObject * self )
535651{
@@ -1223,7 +1339,14 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
12231339 s -> item_separator = item_separator ;
12241340 s -> sort_keys = sort_keys ;
12251341 s -> skipkeys = skipkeys ;
1226- s -> fast_encode = (PyCFunction_Check (s -> encoder ) && PyCFunction_GetFunction (s -> encoder ) == (PyCFunction )py_encode_basestring_ascii );
1342+ s -> fast_encode = NULL ;
1343+ if (PyCFunction_Check (s -> encoder )) {
1344+ PyCFunction f = PyCFunction_GetFunction (s -> encoder );
1345+ if (f == (PyCFunction )py_encode_basestring_ascii ||
1346+ f == (PyCFunction )py_encode_basestring ) {
1347+ s -> fast_encode = f ;
1348+ }
1349+ }
12271350 s -> allow_nan = PyObject_IsTrue (allow_nan );
12281351
12291352 Py_INCREF (s -> markers );
@@ -1372,7 +1495,7 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj)
13721495{
13731496 /* Return the JSON representation of a string */
13741497 if (s -> fast_encode )
1375- return py_encode_basestring_ascii (NULL , obj );
1498+ return s -> fast_encode (NULL , obj );
13761499 else
13771500 return PyObject_CallFunctionObjArgs (s -> encoder , obj , NULL );
13781501}
@@ -1840,6 +1963,10 @@ static PyMethodDef speedups_methods[] = {
18401963 (PyCFunction )py_encode_basestring_ascii ,
18411964 METH_O ,
18421965 pydoc_encode_basestring_ascii },
1966+ {"encode_basestring" ,
1967+ (PyCFunction )py_encode_basestring ,
1968+ METH_O ,
1969+ pydoc_encode_basestring },
18431970 {"scanstring" ,
18441971 (PyCFunction )py_scanstring ,
18451972 METH_VARARGS ,
0 commit comments