Skip to content

Commit f6ac96b

Browse files
committed
Improve and simplify UTF-8 validation in JSON
1 parent 68a0639 commit f6ac96b

File tree

1 file changed

+16
-50
lines changed

1 file changed

+16
-50
lines changed

ext/json/json_encoder.c

Lines changed: 16 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -246,33 +246,13 @@ static int php_json_encode_array(smart_str *buf, zval *val, int options, php_jso
246246
}
247247
/* }}} */
248248

249-
static int php_json_valid_utf8(char utf8[], size_t len) /* {{{ */
250-
{
251-
size_t pos = 0, us;
252-
int status;
253-
254-
while (pos < len) {
255-
us = (unsigned char)utf8[pos];
256-
if (us < 0x80) {
257-
pos++;
258-
} else {
259-
us = php_next_utf8_char((const unsigned char *)utf8, len, &pos, &status);
260-
if (status != SUCCESS) {
261-
return 0;
262-
}
263-
}
264-
}
265-
return 1;
266-
}
267-
/* }}} */
268-
269249
static int php_json_escape_string(
270250
smart_str *buf, char *s, size_t len,
271251
int options, php_json_encoder *encoder) /* {{{ */
272252
{
273253
int status;
274254
unsigned int us;
275-
size_t pos, checkpoint;
255+
size_t prev_pos, pos, checkpoint;
276256

277257
if (len == 0) {
278258
smart_str_appendl(buf, "\"\"", 2);
@@ -295,18 +275,6 @@ static int php_json_escape_string(
295275
}
296276

297277
}
298-
299-
if (options & PHP_JSON_UNESCAPED_UNICODE) {
300-
/* validate UTF-8 string first */
301-
if (!php_json_valid_utf8(s, len)) {
302-
encoder->error_code = PHP_JSON_ERROR_UTF8;
303-
if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
304-
smart_str_appendl(buf, "null", 4);
305-
}
306-
return FAILURE;
307-
}
308-
}
309-
310278
pos = 0;
311279
checkpoint = buf->s ? ZSTR_LEN(buf->s) : 0;
312280

@@ -315,27 +283,27 @@ static int php_json_escape_string(
315283
smart_str_appendc(buf, '"');
316284

317285
do {
318-
us = (unsigned char)s[pos];
319-
if (us >= 0x80 && (!(options & PHP_JSON_UNESCAPED_UNICODE) || us == 0xE2)) {
320-
/* UTF-8 character */
321-
us = php_next_utf8_char((const unsigned char *)s, len, &pos, &status);
322-
if (status != SUCCESS) {
323-
if (buf->s) {
324-
ZSTR_LEN(buf->s) = checkpoint;
325-
}
326-
encoder->error_code = PHP_JSON_ERROR_UTF8;
327-
if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
328-
smart_str_appendl(buf, "null", 4);
329-
}
330-
return FAILURE;
286+
prev_pos = pos;
287+
us = php_next_utf8_char((unsigned char *)s, len, &pos, &status);
288+
/* check whether UTF8 character is correct */
289+
if (status != SUCCESS) {
290+
if (buf->s) {
291+
ZSTR_LEN(buf->s) = checkpoint;
331292
}
293+
encoder->error_code = PHP_JSON_ERROR_UTF8;
294+
if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
295+
smart_str_appendl(buf, "null", 4);
296+
}
297+
return FAILURE;
298+
}
299+
if (us >= 0x80 && (!(options & PHP_JSON_UNESCAPED_UNICODE) || (unsigned char)s[prev_pos] == 0xE2)) {
332300
/* Escape U+2028/U+2029 line terminators, UNLESS both
333301
JSON_UNESCAPED_UNICODE and
334302
JSON_UNESCAPED_LINE_TERMINATORS were provided */
335303
if ((options & PHP_JSON_UNESCAPED_UNICODE)
336304
&& ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS)
337305
|| us < 0x2028 || us > 0x2029)) {
338-
smart_str_appendl(buf, &s[pos - 3], 3);
306+
smart_str_appendl(buf, &s[prev_pos], 3);
339307
continue;
340308
}
341309
/* From http://en.wikipedia.org/wiki/UTF16 */
@@ -357,8 +325,6 @@ static int php_json_escape_string(
357325
smart_str_appendc(buf, digits[(us & 0xf0) >> 4]);
358326
smart_str_appendc(buf, digits[(us & 0xf)]);
359327
} else {
360-
pos++;
361-
362328
switch (us) {
363329
case '"':
364330
if (options & PHP_JSON_HEX_QUOT) {
@@ -434,7 +400,7 @@ static int php_json_escape_string(
434400

435401
default:
436402
if (us >= ' ') {
437-
smart_str_appendc(buf, (unsigned char) us);
403+
smart_str_appendl(buf, s + prev_pos, pos - prev_pos);
438404
} else {
439405
smart_str_appendl(buf, "\\u00", sizeof("\\u00")-1);
440406
smart_str_appendc(buf, digits[(us & 0xf0) >> 4]);

0 commit comments

Comments
 (0)