diff --git a/NEWS b/NEWS index 0f1d118c49722..5ada8e57e7baf 100644 --- a/NEWS +++ b/NEWS @@ -75,6 +75,10 @@ . Fixed bug #65933 (Cannot specify config lines longer than 1024 bytes). (Chris Wright) . Implement request #67106 (Split main fpm config). (Elan Ruusamäe, Remi) +- Json + . Replace non-free JSON parser with a parser from Jsond extension, fixes #63520 + (JSON extension includes a problematic license statement). (Jakub Zelenka) + - LiteSpeed: . Updated LiteSpeed SAPI code from V5.5 to V6.6. (George Wang) diff --git a/UPGRADING b/UPGRADING index 2d02a38565752..0d3e51d57467f 100644 --- a/UPGRADING +++ b/UPGRADING @@ -107,6 +107,10 @@ PHP X.Y UPGRADE NOTES instead. . Removed set_magic_quotes_runtime() and its alias magic_quotes_runtime(). +- Json: + . Rejected RFC 7159 incompatible number formats in json_decode string - + top level (07, 0xff, .1, -.1) and all levels ([1.], [1.e1]) + - Stream: . Removed set_socket_blocking() in favor of its alias stream_set_blocking(). diff --git a/ext/json/CREDITS b/ext/json/CREDITS index 9bd7f44f21e71..a9a0dc70c4fdf 100644 --- a/ext/json/CREDITS +++ b/ext/json/CREDITS @@ -1,2 +1,2 @@ JSON -Omar Kilani, Scott MacVicar +Jakub Zelenka, Omar Kilani, Scott MacVicar diff --git a/ext/json/JSON_parser.c b/ext/json/JSON_parser.c deleted file mode 100644 index 4c17eb273ce50..0000000000000 --- a/ext/json/JSON_parser.c +++ /dev/null @@ -1,755 +0,0 @@ -/* JSON_parser.c */ - -/* 2005-12-30 */ - -/* -Copyright (c) 2005 JSON.org - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -The Software shall be used for Good, not Evil. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ - -#include -#include "JSON_parser.h" - -/* Windows defines IN for documentation */ -#undef IN - -#define true 1 -#define false 0 -#define __ -1 /* the universal error code */ - -/* - Characters are mapped into these 31 character classes. This allows for - a significant reduction in the size of the state transition table. -*/ - -enum classes { - C_SPACE, /* space */ - C_WHITE, /* other whitespace */ - C_LCURB, /* { */ - C_RCURB, /* } */ - C_LSQRB, /* [ */ - C_RSQRB, /* ] */ - C_COLON, /* : */ - C_COMMA, /* , */ - C_QUOTE, /* " */ - C_BACKS, /* \ */ - C_SLASH, /* / */ - C_PLUS, /* + */ - C_MINUS, /* - */ - C_POINT, /* . */ - C_ZERO , /* 0 */ - C_DIGIT, /* 123456789 */ - C_LOW_A, /* a */ - C_LOW_B, /* b */ - C_LOW_C, /* c */ - C_LOW_D, /* d */ - C_LOW_E, /* e */ - C_LOW_F, /* f */ - C_LOW_L, /* l */ - C_LOW_N, /* n */ - C_LOW_R, /* r */ - C_LOW_S, /* s */ - C_LOW_T, /* t */ - C_LOW_U, /* u */ - C_ABCDF, /* ABCDF */ - C_E, /* E */ - C_ETC, /* everything else */ - NR_CLASSES -}; - -static const int ascii_class[128] = { -/* - This array maps the 128 ASCII characters into character classes. - The remaining Unicode characters should be mapped to C_ETC. - Non-whitespace control characters are errors. -*/ - __, __, __, __, __, __, __, __, - __, C_WHITE, C_WHITE, __, __, C_WHITE, __, __, - __, __, __, __, __, __, __, __, - __, __, __, __, __, __, __, __, - - C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, - C_ETC, C_ETC, C_ETC, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH, - C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, - C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, - - C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC, - C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, - C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, - C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC, - - C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC, - C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC, - C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC, - C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC -}; - - -/* - The state codes. -*/ -enum states { - GO, /* start */ - OK, /* ok */ - OB, /* object */ - KE, /* key */ - CO, /* colon */ - VA, /* value */ - AR, /* array */ - ST, /* string */ - ES, /* escape */ - U1, /* u1 */ - U2, /* u2 */ - U3, /* u3 */ - U4, /* u4 */ - MI, /* minus */ - ZE, /* zero */ - IN, /* integer */ - FR, /* fraction */ - E1, /* e */ - E2, /* ex */ - E3, /* exp */ - T1, /* tr */ - T2, /* tru */ - T3, /* true */ - F1, /* fa */ - F2, /* fal */ - F3, /* fals */ - F4, /* false */ - N1, /* nu */ - N2, /* nul */ - N3, /* null */ - NR_STATES -}; - - -static const int state_transition_table[NR_STATES][NR_CLASSES] = { -/* - The state transition table takes the current state and the current symbol, - and returns either a new state or an action. An action is represented as a - negative number. A JSON text is accepted if at the end of the text the - state is OK and if the mode is MODE_DONE. - - white 1-9 ABCDF etc - space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E |*/ -/*start GO*/ {GO,GO,-6,__,-5,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*ok OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*object OB*/ {OB,OB,__,-9,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*key KE*/ {KE,KE,__,__,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*colon CO*/ {CO,CO,__,__,__,__,-2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*value VA*/ {VA,VA,-6,__,-5,__,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__}, -/*array AR*/ {AR,AR,-6,__,-5,-7,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__}, -/*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,ES,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST}, -/*escape ES*/ {__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__}, -/*u1 U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__}, -/*u2 U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__}, -/*u3 U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__}, -/*u4 U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ST,ST,ST,ST,ST,ST,ST,ST,__,__,__,__,__,__,ST,ST,__}, -/*minus MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IN,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*zero ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,__,__,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__}, -/*int IN*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,IN,IN,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__}, -/*frac FR*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__}, -/*e E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*ex E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*exp E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*tr T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__}, -/*tru T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__}, -/*true T3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__}, -/*fa F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*fal F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__}, -/*fals F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__}, -/*false F4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__}, -/*nu N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__}, -/*nul N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__}, -/*null N3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__}, -}; - - -/* - These modes can be pushed on the stack. -*/ -enum modes { - MODE_ARRAY, - MODE_DONE, - MODE_KEY, - MODE_OBJECT, -}; - - -/* - Push a mode onto the stack. Return false if there is overflow. -*/ -static int -push(JSON_parser jp, int mode) -{ - jp->top += 1; - if (jp->top >= jp->depth) { - jp->error_code = PHP_JSON_ERROR_DEPTH; - return false; - } - jp->stack[jp->top] = mode; - return true; -} - - -/* - Pop the stack, assuring that the current mode matches the expectation. - Return false if there is underflow or if the modes mismatch. -*/ -static int -pop(JSON_parser jp, int mode) -{ - if (jp->top < 0 || jp->stack[jp->top] != mode) { - jp->error_code = PHP_JSON_ERROR_STATE_MISMATCH; - return false; - } - jp->top -= 1; - return true; -} - -/* - new_JSON_checker starts the checking process by constructing a JSON_checker - object. It takes a depth parameter that restricts the level of maximum - nesting. - - To continue the process, call JSON_checker_char for each character in the - JSON text, and then call JSON_checker_done to obtain the final result. - These functions are fully reentrant. - - The JSON_checker object will be deleted by JSON_checker_done. - JSON_checker_char will delete the JSON_checker object if it sees an error. -*/ -JSON_parser -new_JSON_parser(int depth) -{ - JSON_parser jp = (JSON_parser)emalloc(sizeof(struct JSON_parser_struct)); - jp->state = GO; - jp->depth = depth; - jp->top = -1; - jp->error_code = PHP_JSON_ERROR_NONE; - jp->stack = (int*)ecalloc(depth, sizeof(int)); - if (depth > JSON_PARSER_DEFAULT_DEPTH) { - jp->the_zstack = (zval *) safe_emalloc(depth, sizeof(zval), 0); - } else { - jp->the_zstack = &jp->the_static_zstack[0]; - } - push(jp, MODE_DONE); - return jp; -} - -/* - Delete the JSON_parser object. -*/ -int -free_JSON_parser(JSON_parser jp) -{ - efree((void*)jp->stack); - if (jp->the_zstack != &jp->the_static_zstack[0]) { - efree(jp->the_zstack); - } - efree((void*)jp); - return false; -} - -static int dehexchar(char c) -{ - if (c >= '0' && c <= '9') - { - return c - '0'; - } - else if (c >= 'A' && c <= 'F') - { - return c - ('A' - 10); - } - else if (c >= 'a' && c <= 'f') - { - return c - ('a' - 10); - } - else - { - return -1; - } -} - - -static void json_create_zval(zval *z, smart_str *buf, int type, int options) -{ - if (type == IS_LONG) - { - zend_bool bigint = 0; - - if (buf->s->val[0] == '-') { - buf->s->len--; - } - - if (buf->s->len >= MAX_LENGTH_OF_LONG - 1) { - if (buf->s->len == MAX_LENGTH_OF_LONG - 1) { - int cmp = strcmp(buf->s->val + (buf->s->val[0] == '-'), long_min_digits); - - if (!(cmp < 0 || (cmp == 0 && buf->s->val[0] == '-'))) { - bigint = 1; - } - } else { - bigint = 1; - } - } - - if (bigint) { - /* value too large to represent as a long */ - if (options & PHP_JSON_BIGINT_AS_STRING) { - if (buf->s->val[0] == '-') { - /* Restore last char consumed above */ - buf->s->len++; - } - goto use_string; - } else { - goto use_double; - } - } - - ZVAL_LONG(z, ZEND_STRTOL(buf->s->val, NULL, 10)); - } - else if (type == IS_DOUBLE) - { -use_double: - ZVAL_DOUBLE(z, zend_strtod(buf->s->val, NULL)); - } - else if (type == IS_STRING) - { -use_string: - if (buf->s) { - ZVAL_STRINGL(z, buf->s->val, buf->s->len); - } else { - ZVAL_EMPTY_STRING(z); - } - } - else if (type == IS_FALSE) { - ZVAL_FALSE(z); - } - else if (type == IS_TRUE) { - ZVAL_TRUE(z); - } - else /* type == IS_NULL) || type unknown */ - { - ZVAL_NULL(z); - } -} - - -static void utf16_to_utf8(smart_str *buf, unsigned short utf16) -{ - if (utf16 < 0x80) - { - smart_str_appendc(buf, (unsigned char) utf16); - } - else if (utf16 < 0x800) - { - smart_str_appendc(buf, 0xc0 | (utf16 >> 6)); - smart_str_appendc(buf, 0x80 | (utf16 & 0x3f)); - } - else if ((utf16 & 0xfc00) == 0xdc00 - && buf->s->len >= 3 - && ((unsigned char) buf->s->val[buf->s->len - 3]) == 0xed - && ((unsigned char) buf->s->val[buf->s->len - 2] & 0xf0) == 0xa0 - && ((unsigned char) buf->s->val[buf->s->len - 1] & 0xc0) == 0x80) - { - /* found surrogate pair */ - zend_ulong utf32; - - utf32 = (((buf->s->val[buf->s->len - 2] & 0xf) << 16) - | ((buf->s->val[buf->s->len - 1] & 0x3f) << 10) - | (utf16 & 0x3ff)) + 0x10000; - buf->s->len -= 3; - - smart_str_appendc(buf, (unsigned char) (0xf0 | (utf32 >> 18))); - smart_str_appendc(buf, 0x80 | ((utf32 >> 12) & 0x3f)); - smart_str_appendc(buf, 0x80 | ((utf32 >> 6) & 0x3f)); - smart_str_appendc(buf, 0x80 | (utf32 & 0x3f)); - } - else - { - smart_str_appendc(buf, 0xe0 | (utf16 >> 12)); - smart_str_appendc(buf, 0x80 | ((utf16 >> 6) & 0x3f)); - smart_str_appendc(buf, 0x80 | (utf16 & 0x3f)); - } -} - -static inline void add_assoc_or_property(int assoc, zval *target, smart_str *key, zval *zv) -{ - zend_bool empty_key = !key->s || key->s->len == 0; - if (!assoc) { - add_property_zval_ex(target, empty_key ? "_empty_" : key->s->val, empty_key ? sizeof("_empty_")-1 : key->s->len, zv); - if (Z_REFCOUNTED_P(zv)) Z_DELREF_P(zv); - } else { - add_assoc_zval_ex(target, empty_key ? "" : key->s->val, empty_key ? 0 : key->s->len, zv); - } - if (key->s) { - key->s->len = 0; - } -} - -static void attach_zval(JSON_parser jp, int up, int cur, smart_str *key, int assoc) -{ - zval *root = &jp->the_zstack[up]; - zval *child = &jp->the_zstack[cur]; - int up_mode = jp->stack[up]; - - if (up_mode == MODE_ARRAY) - { - add_next_index_zval(root, child); - } - else if (up_mode == MODE_OBJECT) - { - add_assoc_or_property(assoc, root, key, child); - } -} - - -#define FREE_BUFFERS() smart_str_free(&buf); smart_str_free(&key); -#define SWAP_BUFFERS(from, to) do { \ - zend_string *t1 = from.s; \ - int t2 = from.a; \ - from.s = to.s; \ - from.a = to.a; \ - to.s = t1; \ - to.a = t2; \ - if (from.s) { from.s->len = 0; } \ - } while(0); -#define JSON_RESET_TYPE() type = -1; - -/* - The JSON_parser takes a UTF-16 encoded string and determines if it is a - syntactically correct JSON text. Along the way, it creates a PHP variable. - - It is implemented as a Pushdown Automaton; that means it is a finite state - machine with a stack. -*/ -int -parse_JSON_ex(JSON_parser jp, zval *z, unsigned short utf16_json[], int length, int options) -{ - int next_char; /* the next character */ - int next_class; /* the next character class */ - int next_state; /* the next state */ - int the_index; - int assoc = options & PHP_JSON_OBJECT_AS_ARRAY; - - smart_str buf = {0}; - smart_str key = {0}; - - unsigned short utf16 = 0; - int type; - - JSON_RESET_TYPE(); - - for (the_index = 0; the_index < length; the_index += 1) { - next_char = utf16_json[the_index]; - if (next_char >= 128) { - next_class = C_ETC; - } else { - next_class = ascii_class[next_char]; - if (next_class <= __) { - jp->error_code = PHP_JSON_ERROR_CTRL_CHAR; - FREE_BUFFERS(); - return false; - } - } -/* - Get the next state from the transition table. -*/ - next_state = state_transition_table[jp->state][next_class]; - if (next_state >= 0) { -/* - Change the state and iterate -*/ - if (type == IS_STRING) { - if (next_state == ST && jp->state != U4) { - if (jp->state != ES) { - utf16_to_utf8(&buf, next_char); - } else { - switch (next_char) { - case 'b': - smart_str_appendc(&buf, '\b'); - break; - case 't': - smart_str_appendc(&buf, '\t'); - break; - case 'n': - smart_str_appendc(&buf, '\n'); - break; - case 'f': - smart_str_appendc(&buf, '\f'); - break; - case 'r': - smart_str_appendc(&buf, '\r'); - break; - default: - utf16_to_utf8(&buf, next_char); - break; - } - } - } else if (next_state == U2) { - utf16 = dehexchar(next_char) << 12; - } else if (next_state == U3) { - utf16 += dehexchar(next_char) << 8; - } else if (next_state == U4) { - utf16 += dehexchar(next_char) << 4; - } else if (next_state == ST && jp->state == U4) { - utf16 += dehexchar(next_char); - utf16_to_utf8(&buf, utf16); - } - } else if (type < IS_LONG && (next_class == C_DIGIT || next_class == C_ZERO)) { - type = IS_LONG; - smart_str_appendc(&buf, next_char); - } else if (type == IS_LONG && next_state == E1) { - type = IS_DOUBLE; - smart_str_appendc(&buf, next_char); - } else if (type < IS_DOUBLE && next_class == C_POINT) { - type = IS_DOUBLE; - smart_str_appendc(&buf, next_char); - } else if (type < IS_STRING && next_class == C_QUOTE) { - type = IS_STRING; - } else if (type < IS_FALSE && (jp->state == F4 && next_state == OK)) { - type = IS_FALSE; - } else if (type < IS_TRUE && (jp->state == T3 && next_state == OK)) { - type = IS_TRUE; - } else if (type < IS_NULL && jp->state == N3 && next_state == OK) { - type = IS_NULL; - } else if (type != IS_STRING && next_class > C_WHITE) { - utf16_to_utf8(&buf, next_char); - } - jp->state = next_state; - } else { -/* - Perform one of the predefined actions. -*/ - switch (next_state) { -/* empty } */ - case -9: - if (!pop(jp, MODE_KEY)) { - FREE_BUFFERS(); - return false; - } - jp->state = OK; - break; -/* } */ - case -8: - if (type != -1 && jp->stack[jp->top] == MODE_OBJECT) - { - zval mval; - smart_str_0(&buf); - - json_create_zval(&mval, &buf, type, options); - - add_assoc_or_property(assoc, &jp->the_zstack[jp->top], &key, &mval); - - if (buf.s) { buf.s->len = 0; } - JSON_RESET_TYPE(); - } - - - if (!pop(jp, MODE_OBJECT)) { - FREE_BUFFERS(); - return false; - } - jp->state = OK; - break; -/* ] */ - case -7: - { - if (type != -1 && jp->stack[jp->top] == MODE_ARRAY) - { - zval mval; - smart_str_0(&buf); - - json_create_zval(&mval, &buf, type, options); - add_next_index_zval(&jp->the_zstack[jp->top], &mval); - if (buf.s) { buf.s->len = 0; } - JSON_RESET_TYPE(); - } - - if (!pop(jp, MODE_ARRAY)) { - FREE_BUFFERS(); - return false; - } - jp->state = OK; - } - break; -/* { */ - case -6: - if (!push(jp, MODE_KEY)) { - FREE_BUFFERS(); - return false; - } - - jp->state = OB; - if (jp->top > 0) { - zval *obj = &jp->the_zstack[jp->top]; - - if (!assoc) { - object_init(obj); - } else { - array_init(obj); - } - - if (jp->top == 1) { - ZVAL_COPY_VALUE(z, obj); - } - - if (jp->top > 1) { - attach_zval(jp, jp->top - 1, jp->top, &key, assoc); - } - - JSON_RESET_TYPE(); - } - - break; -/* [ */ - case -5: - if (!push(jp, MODE_ARRAY)) { - FREE_BUFFERS(); - return false; - } - jp->state = AR; - - if (jp->top > 0) { - zval *arr = &jp->the_zstack[jp->top]; - - array_init(arr); - - if (jp->top == 1) { - ZVAL_COPY_VALUE(z, arr); - } - - if (jp->top > 1) { - attach_zval(jp, jp->top - 1, jp->top, &key, assoc); - } - - JSON_RESET_TYPE(); - } - - break; - -/* " */ - case -4: - switch (jp->stack[jp->top]) { - case MODE_KEY: - jp->state = CO; - smart_str_0(&buf); - SWAP_BUFFERS(buf, key); - JSON_RESET_TYPE(); - break; - case MODE_ARRAY: - case MODE_OBJECT: - jp->state = OK; - break; - case MODE_DONE: - if (type == IS_STRING) { - if (buf.s) { - smart_str_0(&buf); - ZVAL_STRINGL(z, buf.s->val, buf.s->len); - } else { - ZVAL_EMPTY_STRING(z); - } - jp->state = OK; - break; - } - /* fall through if not IS_STRING */ - default: - FREE_BUFFERS(); - jp->error_code = PHP_JSON_ERROR_SYNTAX; - return false; - } - break; -/* , */ - case -3: - { - zval mval; - - if (type != -1 && - (jp->stack[jp->top] == MODE_OBJECT || - jp->stack[jp->top] == MODE_ARRAY)) - { - smart_str_0(&buf); - json_create_zval(&mval, &buf, type, options); - } - - switch (jp->stack[jp->top]) { - case MODE_OBJECT: - if (pop(jp, MODE_OBJECT) && push(jp, MODE_KEY)) { - if (type != -1) { - add_assoc_or_property(assoc, &jp->the_zstack[jp->top], &key, &mval); - } - jp->state = KE; - } - break; - case MODE_ARRAY: - if (type != -1) { - add_next_index_zval(&jp->the_zstack[jp->top], &mval); - } - jp->state = VA; - break; - default: - FREE_BUFFERS(); - jp->error_code = PHP_JSON_ERROR_SYNTAX; - return false; - } - if (buf.s) { buf.s->len = 0; } - JSON_RESET_TYPE(); - } - break; -/* : */ - case -2: - if (pop(jp, MODE_KEY) && push(jp, MODE_OBJECT)) { - jp->state = VA; - break; - } -/* - syntax error -*/ - default: - { - jp->error_code = PHP_JSON_ERROR_SYNTAX; - FREE_BUFFERS(); - return false; - } - } - } - } - - FREE_BUFFERS(); - if (jp->state == OK && pop(jp, MODE_DONE)) { - return true; - } - - jp->error_code = PHP_JSON_ERROR_SYNTAX; - return false; -} - - -/* - * Local variables: - * tab-width: 4 - * c-basic-offset: 4 - * End: - * vim600: noet sw=4 ts=4 - * vim<600: noet sw=4 ts=4 - */ diff --git a/ext/json/JSON_parser.h b/ext/json/JSON_parser.h deleted file mode 100644 index 718b9197b674d..0000000000000 --- a/ext/json/JSON_parser.h +++ /dev/null @@ -1,43 +0,0 @@ -/* JSON_parser.h */ - -#ifndef JSON_PARSER_H -#define JSON_PARSER_H - -#include "php.h" -#include "php_json.h" -#include "zend_smart_str.h" - -#define JSON_PARSER_DEFAULT_DEPTH 512 - -typedef struct JSON_parser_struct { - int state; - int depth; - int top; - int error_code; - int* stack; - zval *the_zstack; - zval the_static_zstack[JSON_PARSER_DEFAULT_DEPTH]; -} * JSON_parser; - -enum error_codes { - PHP_JSON_ERROR_NONE = 0, - PHP_JSON_ERROR_DEPTH, - PHP_JSON_ERROR_STATE_MISMATCH, - PHP_JSON_ERROR_CTRL_CHAR, - PHP_JSON_ERROR_SYNTAX, - PHP_JSON_ERROR_UTF8, - PHP_JSON_ERROR_RECURSION, - PHP_JSON_ERROR_INF_OR_NAN, - PHP_JSON_ERROR_UNSUPPORTED_TYPE -}; - -extern JSON_parser new_JSON_parser(int depth); -extern int parse_JSON_ex(JSON_parser jp, zval *z, unsigned short utf16_json[], int length, int options); -extern int free_JSON_parser(JSON_parser jp); - -static inline int parse_JSON(JSON_parser jp, zval *z, unsigned short utf16_json[], int length, int assoc) -{ - return parse_JSON_ex(jp, z, utf16_json, length, assoc ? PHP_JSON_OBJECT_AS_ARRAY : 0); -} - -#endif diff --git a/ext/json/Makefile.frag b/ext/json/Makefile.frag new file mode 100644 index 0000000000000..cb26acabc70af --- /dev/null +++ b/ext/json/Makefile.frag @@ -0,0 +1,5 @@ +$(srcdir)/json_scanner.c: $(srcdir)/json_scanner.re + $(RE2C) -t $(srcdir)/php_json_scanner_defs.h --no-generation-date -bci -o $@ $(srcdir)/json_scanner.re + +$(srcdir)/json_parser.tab.c: $(srcdir)/json_parser.y + $(YACC) --defines -l $(srcdir)/json_parser.y -o $@ diff --git a/ext/json/config.m4 b/ext/json/config.m4 index 9c83a5d893694..fb87a939928dd 100644 --- a/ext/json/config.m4 +++ b/ext/json/config.m4 @@ -9,7 +9,13 @@ if test "$PHP_JSON" != "no"; then AC_DEFINE([HAVE_JSON],1 ,[whether to enable JavaScript Object Serialization support]) AC_HEADER_STDC - PHP_NEW_EXTENSION(json, json.c utf8_decode.c JSON_parser.c, $ext_shared,, -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1) +PHP_NEW_EXTENSION(json, + json.c \ + json_encoder.c \ + json_parser.tab.c \ + json_scanner.c, + $ext_shared,, -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1) PHP_INSTALL_HEADERS([ext/json], [php_json.h]) + PHP_ADD_MAKEFILE_FRAGMENT() PHP_SUBST(JSON_SHARED_LIBADD) fi diff --git a/ext/json/config.w32 b/ext/json/config.w32 index 60ccf91630d8b..996b86829dee2 100644 --- a/ext/json/config.w32 +++ b/ext/json/config.w32 @@ -5,7 +5,7 @@ ARG_ENABLE("json", "JavaScript Object Serialization support", "yes"); if (PHP_JSON != "no") { EXTENSION('json', 'json.c', PHP_JSON_SHARED, "/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"); - ADD_SOURCES(configure_module_dirname, "JSON_parser.c utf8_decode.c", "json"); + ADD_SOURCES(configure_module_dirname, "json_encoder.c json_parser.tab.c json_scanner.c", "json"); PHP_INSTALL_HEADERS("ext/json/", "php_json.h"); } diff --git a/ext/json/json.c b/ext/json/json.c index adeffe154c424..0f8fdfb132382 100644 --- a/ext/json/json.c +++ b/ext/json/json.c @@ -27,8 +27,9 @@ #include "ext/standard/info.h" #include "ext/standard/html.h" #include "zend_smart_str.h" -#include "JSON_parser.h" #include "php_json.h" +#include "php_json_encoder.h" +#include "php_json_parser.h" #include #include @@ -45,8 +46,6 @@ static PHP_FUNCTION(json_decode); static PHP_FUNCTION(json_last_error); static PHP_FUNCTION(json_last_error_msg); -static const char digits[] = "0123456789abcdef"; - PHP_JSON_API zend_class_entry *php_json_serializable_ce; ZEND_DECLARE_MODULE_GLOBALS(json) @@ -182,613 +181,22 @@ static PHP_MINFO_FUNCTION(json) } /* }}} */ -static void json_escape_string(smart_str *buf, char *s, size_t len, int options); - -static int json_determine_array_type(zval *val) /* {{{ */ -{ - int i; - HashTable *myht = HASH_OF(val); - - i = myht ? zend_hash_num_elements(myht) : 0; - if (i > 0) { - zend_string *key; - zend_ulong index, idx; - - idx = 0; - ZEND_HASH_FOREACH_KEY(myht, index, key) { - if (key) { - return PHP_JSON_OUTPUT_OBJECT; - } else { - if (index != idx) { - return PHP_JSON_OUTPUT_OBJECT; - } - } - idx++; - } ZEND_HASH_FOREACH_END(); - } - - return PHP_JSON_OUTPUT_ARRAY; -} -/* }}} */ - -/* {{{ Pretty printing support functions */ - -static inline void json_pretty_print_char(smart_str *buf, int options, char c) /* {{{ */ -{ - if (options & PHP_JSON_PRETTY_PRINT) { - smart_str_appendc(buf, c); - } -} -/* }}} */ - -static inline void json_pretty_print_indent(smart_str *buf, int options) /* {{{ */ -{ - int i; - - if (options & PHP_JSON_PRETTY_PRINT) { - for (i = 0; i < JSON_G(encoder_depth); ++i) { - smart_str_appendl(buf, " ", 4); - } - } -} -/* }}} */ - -/* }}} */ - -static void json_encode_array(smart_str *buf, zval *val, int options) /* {{{ */ -{ - int i, r, need_comma = 0; - HashTable *myht; - - if (Z_TYPE_P(val) == IS_ARRAY) { - myht = HASH_OF(val); - r = (options & PHP_JSON_FORCE_OBJECT) ? PHP_JSON_OUTPUT_OBJECT : json_determine_array_type(val); - } else { - myht = Z_OBJPROP_P(val); - r = PHP_JSON_OUTPUT_OBJECT; - } - - if (myht && ZEND_HASH_GET_APPLY_COUNT(myht) > 1) { - JSON_G(error_code) = PHP_JSON_ERROR_RECURSION; - smart_str_appendl(buf, "null", 4); - return; - } - - if (r == PHP_JSON_OUTPUT_ARRAY) { - smart_str_appendc(buf, '['); - } else { - smart_str_appendc(buf, '{'); - } - - ++JSON_G(encoder_depth); - - i = myht ? zend_hash_num_elements(myht) : 0; - - if (i > 0) { - zend_string *key; - zval *data; - zend_ulong index; - HashTable *tmp_ht; - - ZEND_HASH_FOREACH_KEY_VAL_IND(myht, index, key, data) { - ZVAL_DEREF(data); - tmp_ht = HASH_OF(data); - if (tmp_ht && ZEND_HASH_APPLY_PROTECTION(tmp_ht)) { - ZEND_HASH_INC_APPLY_COUNT(tmp_ht); - } - - if (r == PHP_JSON_OUTPUT_ARRAY) { - if (need_comma) { - smart_str_appendc(buf, ','); - } else { - need_comma = 1; - } - - json_pretty_print_char(buf, options, '\n'); - json_pretty_print_indent(buf, options); - php_json_encode(buf, data, options); - } else if (r == PHP_JSON_OUTPUT_OBJECT) { - if (key) { - if (key->val[0] == '\0' && Z_TYPE_P(val) == IS_OBJECT) { - /* Skip protected and private members. */ - if (tmp_ht && ZEND_HASH_APPLY_PROTECTION(tmp_ht)) { - ZEND_HASH_DEC_APPLY_COUNT(tmp_ht); - } - continue; - } - - if (need_comma) { - smart_str_appendc(buf, ','); - } else { - need_comma = 1; - } - - json_pretty_print_char(buf, options, '\n'); - json_pretty_print_indent(buf, options); - - json_escape_string(buf, key->val, key->len, options & ~PHP_JSON_NUMERIC_CHECK); - smart_str_appendc(buf, ':'); - - json_pretty_print_char(buf, options, ' '); - - php_json_encode(buf, data, options); - } else { - if (need_comma) { - smart_str_appendc(buf, ','); - } else { - need_comma = 1; - } - - json_pretty_print_char(buf, options, '\n'); - json_pretty_print_indent(buf, options); - - smart_str_appendc(buf, '"'); - smart_str_append_long(buf, (zend_long) index); - smart_str_appendc(buf, '"'); - smart_str_appendc(buf, ':'); - - json_pretty_print_char(buf, options, ' '); - - php_json_encode(buf, data, options); - } - } - - if (tmp_ht && ZEND_HASH_APPLY_PROTECTION(tmp_ht)) { - ZEND_HASH_DEC_APPLY_COUNT(tmp_ht); - } - } ZEND_HASH_FOREACH_END(); - } - - if (JSON_G(encoder_depth) > JSON_G(encode_max_depth)) { - JSON_G(error_code) = PHP_JSON_ERROR_DEPTH; - } - --JSON_G(encoder_depth); - - /* Only keep closing bracket on same line for empty arrays/objects */ - if (need_comma) { - json_pretty_print_char(buf, options, '\n'); - json_pretty_print_indent(buf, options); - } - - if (r == PHP_JSON_OUTPUT_ARRAY) { - smart_str_appendc(buf, ']'); - } else { - smart_str_appendc(buf, '}'); - } -} -/* }}} */ - -static int json_utf8_to_utf16(unsigned short *utf16, char utf8[], int len) /* {{{ */ -{ - size_t pos = 0, us; - int j, status; - - if (utf16) { - /* really convert the utf8 string */ - for (j=0 ; pos < len ; j++) { - us = php_next_utf8_char((const unsigned char *)utf8, len, &pos, &status); - if (status != SUCCESS) { - return -1; - } - /* From http://en.wikipedia.org/wiki/UTF16 */ - if (us >= 0x10000) { - us -= 0x10000; - utf16[j++] = (unsigned short)((us >> 10) | 0xd800); - utf16[j] = (unsigned short)((us & 0x3ff) | 0xdc00); - } else { - utf16[j] = (unsigned short)us; - } - } - } else { - /* Only check if utf8 string is valid, and compute utf16 length */ - for (j=0 ; pos < len ; j++) { - us = php_next_utf8_char((const unsigned char *)utf8, len, &pos, &status); - if (status != SUCCESS) { - return -1; - } - if (us >= 0x10000) { - j++; - } - } - } - return j; -} -/* }}} */ - -static void json_escape_string(smart_str *buf, char *s, size_t len, int options) /* {{{ */ -{ - int status; - unsigned int us, next_us = 0; - size_t pos, checkpoint; - - if (len == 0) { - smart_str_appendl(buf, "\"\"", 2); - return; - } - - if (options & PHP_JSON_NUMERIC_CHECK) { - double d; - int type; - zend_long p; - - if ((type = is_numeric_string(s, len, &p, &d, 0)) != 0) { - if (type == IS_LONG) { - smart_str_append_long(buf, p); - } else if (type == IS_DOUBLE) { - if (!zend_isinf(d) && !zend_isnan(d)) { - char num[NUM_BUF_SIZE]; - int l; - - php_gcvt(d, EG(precision), '.', 'e', (char *)num); - l = strlen(num); - if (options & PHP_JSON_PRESERVE_ZERO_FRACTION && strchr(num, '.') == NULL && l < NUM_BUF_SIZE - 2) { - num[l++] = '.'; - num[l++] = '0'; - num[l] = '\0'; - } - smart_str_appendl(buf, num, l); - } else { - JSON_G(error_code) = PHP_JSON_ERROR_INF_OR_NAN; - smart_str_appendc(buf, '0'); - } - } - return; - } - - } - - if (options & PHP_JSON_UNESCAPED_UNICODE) { - /* validate UTF-8 string first */ - if (json_utf8_to_utf16(NULL, s, len) < 0) { - JSON_G(error_code) = PHP_JSON_ERROR_UTF8; - smart_str_appendl(buf, "null", 4); - return; - } - } - - pos = 0; - checkpoint = buf->s ? buf->s->len : 0; - - /* pre-allocate for string length plus 2 quotes */ - smart_str_alloc(buf, len+2, 0); - smart_str_appendc(buf, '"'); - - do { - if (UNEXPECTED(next_us)) { - us = next_us; - next_us = 0; - } else { - us = (unsigned char)s[pos]; - if (!(options & PHP_JSON_UNESCAPED_UNICODE) && us >= 0x80) { - /* UTF-8 character */ - us = php_next_utf8_char((const unsigned char *)s, len, &pos, &status); - if (status != SUCCESS) { - if (buf->s) { - buf->s->len = checkpoint; - } - JSON_G(error_code) = PHP_JSON_ERROR_UTF8; - smart_str_appendl(buf, "null", 4); - return; - } - /* From http://en.wikipedia.org/wiki/UTF16 */ - if (us >= 0x10000) { - us -= 0x10000; - next_us = (unsigned short)((us & 0x3ff) | 0xdc00); - us = (unsigned short)((us >> 10) | 0xd800); - } - } else { - pos++; - } - } - - switch (us) { - case '"': - if (options & PHP_JSON_HEX_QUOT) { - smart_str_appendl(buf, "\\u0022", 6); - } else { - smart_str_appendl(buf, "\\\"", 2); - } - break; - - case '\\': - smart_str_appendl(buf, "\\\\", 2); - break; - - case '/': - if (options & PHP_JSON_UNESCAPED_SLASHES) { - smart_str_appendc(buf, '/'); - } else { - smart_str_appendl(buf, "\\/", 2); - } - break; - - case '\b': - smart_str_appendl(buf, "\\b", 2); - break; - - case '\f': - smart_str_appendl(buf, "\\f", 2); - break; - - case '\n': - smart_str_appendl(buf, "\\n", 2); - break; - - case '\r': - smart_str_appendl(buf, "\\r", 2); - break; - - case '\t': - smart_str_appendl(buf, "\\t", 2); - break; - - case '<': - if (options & PHP_JSON_HEX_TAG) { - smart_str_appendl(buf, "\\u003C", 6); - } else { - smart_str_appendc(buf, '<'); - } - break; - - case '>': - if (options & PHP_JSON_HEX_TAG) { - smart_str_appendl(buf, "\\u003E", 6); - } else { - smart_str_appendc(buf, '>'); - } - break; - - case '&': - if (options & PHP_JSON_HEX_AMP) { - smart_str_appendl(buf, "\\u0026", 6); - } else { - smart_str_appendc(buf, '&'); - } - break; - - case '\'': - if (options & PHP_JSON_HEX_APOS) { - smart_str_appendl(buf, "\\u0027", 6); - } else { - smart_str_appendc(buf, '\''); - } - break; - - default: - if (us >= ' ' && ((options & PHP_JSON_UNESCAPED_UNICODE) || (us & 127) == us)) { - smart_str_appendc(buf, (unsigned char) us); - } else { - smart_str_appendl(buf, "\\u", 2); - smart_str_appendc(buf, digits[(us & 0xf000) >> 12]); - smart_str_appendc(buf, digits[(us & 0xf00) >> 8]); - smart_str_appendc(buf, digits[(us & 0xf0) >> 4]); - smart_str_appendc(buf, digits[(us & 0xf)]); - } - break; - } - } while (pos < len || next_us); - - smart_str_appendc(buf, '"'); -} -/* }}} */ - -static void json_encode_serializable_object(smart_str *buf, zval *val, int options) /* {{{ */ -{ - zend_class_entry *ce = Z_OBJCE_P(val); - zval retval, fname; - HashTable* myht; - - if (Z_TYPE_P(val) == IS_ARRAY) { - myht = HASH_OF(val); - } else { - myht = Z_OBJPROP_P(val); - } - - if (myht && ZEND_HASH_GET_APPLY_COUNT(myht) > 1) { - JSON_G(error_code) = PHP_JSON_ERROR_RECURSION; - smart_str_appendl(buf, "null", 4); - return; - } - - ZVAL_STRING(&fname, "jsonSerialize"); - - if (FAILURE == call_user_function_ex(EG(function_table), val, &fname, &retval, 0, NULL, 1, NULL) || Z_TYPE(retval) == IS_UNDEF) { - zend_throw_exception_ex(NULL, 0, "Failed calling %s::jsonSerialize()", ce->name->val); - smart_str_appendl(buf, "null", sizeof("null") - 1); - zval_ptr_dtor(&fname); - return; - } - - if (EG(exception)) { - /* Error already raised */ - zval_ptr_dtor(&retval); - zval_ptr_dtor(&fname); - smart_str_appendl(buf, "null", sizeof("null") - 1); - return; - } - - if ((Z_TYPE(retval) == IS_OBJECT) && - (Z_OBJ(retval) == Z_OBJ_P(val))) { - /* Handle the case where jsonSerialize does: return $this; by going straight to encode array */ - json_encode_array(buf, &retval, options); - } else { - /* All other types, encode as normal */ - php_json_encode(buf, &retval, options); - } - - zval_ptr_dtor(&retval); - zval_ptr_dtor(&fname); -} -/* }}} */ - PHP_JSON_API void php_json_encode(smart_str *buf, zval *val, int options) /* {{{ */ { -again: - switch (Z_TYPE_P(val)) - { - case IS_NULL: - smart_str_appendl(buf, "null", 4); - break; - - case IS_TRUE: - smart_str_appendl(buf, "true", 4); - break; - case IS_FALSE: - smart_str_appendl(buf, "false", 5); - break; - - case IS_LONG: - smart_str_append_long(buf, Z_LVAL_P(val)); - break; - - case IS_DOUBLE: - { - char num[NUM_BUF_SIZE]; - int len; - double dbl = Z_DVAL_P(val); - - if (!zend_isinf(dbl) && !zend_isnan(dbl)) { - php_gcvt(dbl, EG(precision), '.', 'e', (char *)num); - len = strlen(num); - if (options & PHP_JSON_PRESERVE_ZERO_FRACTION && strchr(num, '.') == NULL && len < NUM_BUF_SIZE - 2) { - num[len++] = '.'; - num[len++] = '0'; - num[len] = '\0'; - } - smart_str_appendl(buf, num, len); - } else { - JSON_G(error_code) = PHP_JSON_ERROR_INF_OR_NAN; - smart_str_appendc(buf, '0'); - } - } - break; - - case IS_STRING: - json_escape_string(buf, Z_STRVAL_P(val), Z_STRLEN_P(val), options); - break; - - case IS_OBJECT: - if (instanceof_function(Z_OBJCE_P(val), php_json_serializable_ce)) { - json_encode_serializable_object(buf, val, options); - break; - } - /* fallthrough -- Non-serializable object */ - case IS_ARRAY: - json_encode_array(buf, val, options); - break; - - case IS_REFERENCE: - val = Z_REFVAL_P(val); - goto again; - - default: - JSON_G(error_code) = PHP_JSON_ERROR_UNSUPPORTED_TYPE; - smart_str_appendl(buf, "null", 4); - break; - } - - return; + php_json_encode_zval(buf, val, options); } /* }}} */ PHP_JSON_API void php_json_decode_ex(zval *return_value, char *str, size_t str_len, zend_long options, zend_long depth) /* {{{ */ { - size_t utf16_len; - unsigned short *utf16; - JSON_parser jp; - - utf16 = (unsigned short *) safe_emalloc((str_len+1), sizeof(unsigned short), 1); - - utf16_len = json_utf8_to_utf16(utf16, str, str_len); - if (utf16_len <= 0) { - if (utf16) { - efree(utf16); - } - JSON_G(error_code) = PHP_JSON_ERROR_UTF8; - RETURN_NULL(); - } + php_json_parser parser; - if (depth <= 0) { - php_error_docref(NULL, E_WARNING, "Depth must be greater than zero"); - efree(utf16); - RETURN_NULL(); - } + php_json_parser_init(&parser, return_value, str, str_len, options, depth); - jp = new_JSON_parser(depth); - if (!parse_JSON_ex(jp, return_value, utf16, utf16_len, options)) { - double d; - int type, overflow_info; - zend_long p; - char *trim = str; - int trim_len = str_len; - - zval_dtor(return_value); - - /* Increment trimmed string pointer to strip leading whitespace */ - /* JSON RFC says to consider as whitespace: space, tab, LF or CR */ - while (trim_len && (*trim == ' ' || *trim == '\t' || *trim == '\n' || *trim == '\r')) { - trim++; - trim_len--; - } - - /* Decrement trimmed string length to strip trailing whitespace */ - while (trim_len && (trim[trim_len - 1] == ' ' || trim[trim_len - 1] == '\t' || trim[trim_len - 1] == '\n' || trim[trim_len - 1] == '\r')) { - trim_len--; - } - - RETVAL_NULL(); - if (trim_len == 4) { - if (!strncmp(trim, "null", trim_len)) { - /* We need to explicitly clear the error because its an actual NULL and not an error */ - jp->error_code = PHP_JSON_ERROR_NONE; - RETVAL_NULL(); - } else if (!strncmp(trim, "true", trim_len)) { - RETVAL_BOOL(1); - } - } else if (trim_len == 5 && !strncmp(trim, "false", trim_len)) { - RETVAL_BOOL(0); - } - - if ((type = is_numeric_string_ex(trim, trim_len, &p, &d, 0, &overflow_info)) != 0) { - if (type == IS_LONG) { - RETVAL_LONG(p); - } else if (type == IS_DOUBLE) { - if (options & PHP_JSON_BIGINT_AS_STRING && overflow_info) { - /* Within an object or array, a numeric literal is assumed - * to be an integer if and only if it's entirely made up of - * digits (exponent notation will result in the number - * being treated as a double). We'll match that behaviour - * here. */ - int i; - zend_bool is_float = 0; - - for (i = (trim[0] == '-' ? 1 : 0); i < trim_len; i++) { - /* Not using isdigit() because it's locale specific, - * but we expect JSON input to always be UTF-8. */ - if (trim[i] < '0' || trim[i] > '9') { - is_float = 1; - break; - } - } - - if (is_float) { - RETVAL_DOUBLE(d); - } else { - RETVAL_STRINGL(trim, trim_len); - } - } else { - RETVAL_DOUBLE(d); - } - } - } - - if (Z_TYPE_P(return_value) != IS_NULL) { - jp->error_code = PHP_JSON_ERROR_NONE; - } + if (php_json_yyparse(&parser)) { + JSON_G(error_code) = php_json_parser_error_code(&parser); + RETURN_NULL(); } - efree(utf16); - JSON_G(error_code) = jp->error_code; - free_JSON_parser(jp); } /* }}} */ @@ -799,7 +207,7 @@ static PHP_FUNCTION(json_encode) zval *parameter; smart_str buf = {0}; zend_long options = 0; - zend_long depth = JSON_PARSER_DEFAULT_DEPTH; + zend_long depth = PHP_JSON_PARSER_DEFAULT_DEPTH; if (zend_parse_parameters(ZEND_NUM_ARGS(), "z|ll", ¶meter, &options, &depth) == FAILURE) { return; @@ -828,7 +236,7 @@ static PHP_FUNCTION(json_decode) char *str; size_t str_len; zend_bool assoc = 0; /* return JS objects as PHP objects by default */ - zend_long depth = JSON_PARSER_DEFAULT_DEPTH; + zend_long depth = PHP_JSON_PARSER_DEFAULT_DEPTH; zend_long options = 0; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|bll", &str, &str_len, &assoc, &depth, &options) == FAILURE) { diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c new file mode 100644 index 0000000000000..f219b50263f16 --- /dev/null +++ b/ext/json/json_encoder.c @@ -0,0 +1,549 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 7 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2015 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Omar Kilani | + | Jakub Zelenka | + +----------------------------------------------------------------------+ +*/ + +/* $Id$ */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php.h" +#include "php_ini.h" +#include "ext/standard/info.h" +#include "ext/standard/html.h" +#include "zend_smart_str.h" +#include "php_json.h" +#include + +/* double limits */ +#include +#if defined(DBL_MANT_DIG) && defined(DBL_MIN_EXP) +#define PHP_JSON_DOUBLE_MAX_LENGTH (3 + DBL_MANT_DIG - DBL_MIN_EXP) +#else +#define PHP_JSON_DOUBLE_MAX_LENGTH 1080 +#endif + +ZEND_DECLARE_MODULE_GLOBALS(json) + +static const char digits[] = "0123456789abcdef"; + +static void php_json_escape_string(smart_str *buf, char *s, size_t len, int options); + +static int php_json_determine_array_type(zval *val) /* {{{ */ +{ + int i; + HashTable *myht = HASH_OF(val); + + i = myht ? zend_hash_num_elements(myht) : 0; + if (i > 0) { + zend_string *key; + zend_ulong index, idx; + + idx = 0; + ZEND_HASH_FOREACH_KEY(myht, index, key) { + if (key) { + return PHP_JSON_OUTPUT_OBJECT; + } else { + if (index != idx) { + return PHP_JSON_OUTPUT_OBJECT; + } + } + idx++; + } ZEND_HASH_FOREACH_END(); + } + + return PHP_JSON_OUTPUT_ARRAY; +} +/* }}} */ + +/* {{{ Pretty printing support functions */ + +static inline void php_json_pretty_print_char(smart_str *buf, int options, char c) /* {{{ */ +{ + if (options & PHP_JSON_PRETTY_PRINT) { + smart_str_appendc(buf, c); + } +} +/* }}} */ + +static inline void php_json_pretty_print_indent(smart_str *buf, int options) /* {{{ */ +{ + int i; + + if (options & PHP_JSON_PRETTY_PRINT) { + for (i = 0; i < JSON_G(encoder_depth); ++i) { + smart_str_appendl(buf, " ", 4); + } + } +} +/* }}} */ + +/* }}} */ + +static inline void php_json_encode_double(smart_str *buf, double d, int options) /* {{{ */ +{ + if (!zend_isinf(d) && !zend_isnan(d)) { + size_t len; + char num[PHP_JSON_DOUBLE_MAX_LENGTH]; + php_gcvt(d, EG(precision), '.', 'e', &num[0]); + len = strlen(num); + if (options & PHP_JSON_PRESERVE_ZERO_FRACTION && strchr(num, '.') == NULL && len < PHP_JSON_DOUBLE_MAX_LENGTH - 2) { + num[len++] = '.'; + num[len++] = '0'; + num[len] = '\0'; + } + smart_str_appendl(buf, num, len); + } else { + JSON_G(error_code) = PHP_JSON_ERROR_INF_OR_NAN; + smart_str_appendc(buf, '0'); + } +} +/* }}} */ + +static void php_json_encode_array(smart_str *buf, zval *val, int options) /* {{{ */ +{ + int i, r, need_comma = 0; + HashTable *myht; + + if (Z_TYPE_P(val) == IS_ARRAY) { + myht = HASH_OF(val); + r = (options & PHP_JSON_FORCE_OBJECT) ? PHP_JSON_OUTPUT_OBJECT : php_json_determine_array_type(val); + } else { + myht = Z_OBJPROP_P(val); + r = PHP_JSON_OUTPUT_OBJECT; + } + + if (myht && ZEND_HASH_GET_APPLY_COUNT(myht) > 1) { + JSON_G(error_code) = PHP_JSON_ERROR_RECURSION; + smart_str_appendl(buf, "null", 4); + return; + } + + if (r == PHP_JSON_OUTPUT_ARRAY) { + smart_str_appendc(buf, '['); + } else { + smart_str_appendc(buf, '{'); + } + + ++JSON_G(encoder_depth); + + i = myht ? zend_hash_num_elements(myht) : 0; + + if (i > 0) { + zend_string *key; + zval *data; + zend_ulong index; + HashTable *tmp_ht; + + ZEND_HASH_FOREACH_KEY_VAL_IND(myht, index, key, data) { + ZVAL_DEREF(data); + tmp_ht = HASH_OF(data); + if (tmp_ht && ZEND_HASH_APPLY_PROTECTION(tmp_ht)) { + ZEND_HASH_INC_APPLY_COUNT(tmp_ht); + } + + if (r == PHP_JSON_OUTPUT_ARRAY) { + if (need_comma) { + smart_str_appendc(buf, ','); + } else { + need_comma = 1; + } + + php_json_pretty_print_char(buf, options, '\n'); + php_json_pretty_print_indent(buf, options); + php_json_encode(buf, data, options); + } else if (r == PHP_JSON_OUTPUT_OBJECT) { + if (key) { + if (key->val[0] == '\0' && Z_TYPE_P(val) == IS_OBJECT) { + /* Skip protected and private members. */ + if (tmp_ht && ZEND_HASH_APPLY_PROTECTION(tmp_ht)) { + ZEND_HASH_DEC_APPLY_COUNT(tmp_ht); + } + continue; + } + + if (need_comma) { + smart_str_appendc(buf, ','); + } else { + need_comma = 1; + } + + php_json_pretty_print_char(buf, options, '\n'); + php_json_pretty_print_indent(buf, options); + + php_json_escape_string(buf, key->val, key->len, options & ~PHP_JSON_NUMERIC_CHECK); + smart_str_appendc(buf, ':'); + + php_json_pretty_print_char(buf, options, ' '); + + php_json_encode(buf, data, options); + } else { + if (need_comma) { + smart_str_appendc(buf, ','); + } else { + need_comma = 1; + } + + php_json_pretty_print_char(buf, options, '\n'); + php_json_pretty_print_indent(buf, options); + + smart_str_appendc(buf, '"'); + smart_str_append_long(buf, (zend_long) index); + smart_str_appendc(buf, '"'); + smart_str_appendc(buf, ':'); + + php_json_pretty_print_char(buf, options, ' '); + + php_json_encode(buf, data, options); + } + } + + if (tmp_ht && ZEND_HASH_APPLY_PROTECTION(tmp_ht)) { + ZEND_HASH_DEC_APPLY_COUNT(tmp_ht); + } + } ZEND_HASH_FOREACH_END(); + } + + if (JSON_G(encoder_depth) > JSON_G(encode_max_depth)) { + JSON_G(error_code) = PHP_JSON_ERROR_DEPTH; + } + --JSON_G(encoder_depth); + + /* Only keep closing bracket on same line for empty arrays/objects */ + if (need_comma) { + php_json_pretty_print_char(buf, options, '\n'); + php_json_pretty_print_indent(buf, options); + } + + if (r == PHP_JSON_OUTPUT_ARRAY) { + smart_str_appendc(buf, ']'); + } else { + smart_str_appendc(buf, '}'); + } +} +/* }}} */ + +static int php_json_utf8_to_utf16(unsigned short *utf16, char utf8[], int len) /* {{{ */ +{ + size_t pos = 0, us; + int j, status; + + if (utf16) { + /* really convert the utf8 string */ + for (j=0 ; pos < len ; j++) { + us = php_next_utf8_char((const unsigned char *)utf8, len, &pos, &status); + if (status != SUCCESS) { + return -1; + } + /* From http://en.wikipedia.org/wiki/UTF16 */ + if (us >= 0x10000) { + us -= 0x10000; + utf16[j++] = (unsigned short)((us >> 10) | 0xd800); + utf16[j] = (unsigned short)((us & 0x3ff) | 0xdc00); + } else { + utf16[j] = (unsigned short)us; + } + } + } else { + /* Only check if utf8 string is valid, and compute utf16 length */ + for (j=0 ; pos < len ; j++) { + us = php_next_utf8_char((const unsigned char *)utf8, len, &pos, &status); + if (status != SUCCESS) { + return -1; + } + if (us >= 0x10000) { + j++; + } + } + } + return j; +} +/* }}} */ + +static void php_json_escape_string(smart_str *buf, char *s, size_t len, int options) /* {{{ */ +{ + int status; + unsigned int us, next_us = 0; + size_t pos, checkpoint; + + if (len == 0) { + smart_str_appendl(buf, "\"\"", 2); + return; + } + + if (options & PHP_JSON_NUMERIC_CHECK) { + double d; + int type; + zend_long p; + + if ((type = is_numeric_string(s, len, &p, &d, 0)) != 0) { + if (type == IS_LONG) { + smart_str_append_long(buf, p); + } else if (type == IS_DOUBLE) { + php_json_encode_double(buf, d, options); + } + return; + } + + } + + if (options & PHP_JSON_UNESCAPED_UNICODE) { + /* validate UTF-8 string first */ + if (php_json_utf8_to_utf16(NULL, s, len) < 0) { + JSON_G(error_code) = PHP_JSON_ERROR_UTF8; + smart_str_appendl(buf, "null", 4); + return; + } + } + + pos = 0; + checkpoint = buf->s ? buf->s->len : 0; + + /* pre-allocate for string length plus 2 quotes */ + smart_str_alloc(buf, len+2, 0); + smart_str_appendc(buf, '"'); + + do { + if (UNEXPECTED(next_us)) { + us = next_us; + next_us = 0; + } else { + us = (unsigned char)s[pos]; + if (!(options & PHP_JSON_UNESCAPED_UNICODE) && us >= 0x80) { + /* UTF-8 character */ + us = php_next_utf8_char((const unsigned char *)s, len, &pos, &status); + if (status != SUCCESS) { + if (buf->s) { + buf->s->len = checkpoint; + } + JSON_G(error_code) = PHP_JSON_ERROR_UTF8; + smart_str_appendl(buf, "null", 4); + return; + } + /* From http://en.wikipedia.org/wiki/UTF16 */ + if (us >= 0x10000) { + us -= 0x10000; + next_us = (unsigned short)((us & 0x3ff) | 0xdc00); + us = (unsigned short)((us >> 10) | 0xd800); + } + } else { + pos++; + } + } + + switch (us) { + case '"': + if (options & PHP_JSON_HEX_QUOT) { + smart_str_appendl(buf, "\\u0022", 6); + } else { + smart_str_appendl(buf, "\\\"", 2); + } + break; + + case '\\': + smart_str_appendl(buf, "\\\\", 2); + break; + + case '/': + if (options & PHP_JSON_UNESCAPED_SLASHES) { + smart_str_appendc(buf, '/'); + } else { + smart_str_appendl(buf, "\\/", 2); + } + break; + + case '\b': + smart_str_appendl(buf, "\\b", 2); + break; + + case '\f': + smart_str_appendl(buf, "\\f", 2); + break; + + case '\n': + smart_str_appendl(buf, "\\n", 2); + break; + + case '\r': + smart_str_appendl(buf, "\\r", 2); + break; + + case '\t': + smart_str_appendl(buf, "\\t", 2); + break; + + case '<': + if (options & PHP_JSON_HEX_TAG) { + smart_str_appendl(buf, "\\u003C", 6); + } else { + smart_str_appendc(buf, '<'); + } + break; + + case '>': + if (options & PHP_JSON_HEX_TAG) { + smart_str_appendl(buf, "\\u003E", 6); + } else { + smart_str_appendc(buf, '>'); + } + break; + + case '&': + if (options & PHP_JSON_HEX_AMP) { + smart_str_appendl(buf, "\\u0026", 6); + } else { + smart_str_appendc(buf, '&'); + } + break; + + case '\'': + if (options & PHP_JSON_HEX_APOS) { + smart_str_appendl(buf, "\\u0027", 6); + } else { + smart_str_appendc(buf, '\''); + } + break; + + default: + if (us >= ' ' && ((options & PHP_JSON_UNESCAPED_UNICODE) || (us & 127) == us)) { + smart_str_appendc(buf, (unsigned char) us); + } else { + smart_str_appendl(buf, "\\u", 2); + smart_str_appendc(buf, digits[(us & 0xf000) >> 12]); + smart_str_appendc(buf, digits[(us & 0xf00) >> 8]); + smart_str_appendc(buf, digits[(us & 0xf0) >> 4]); + smart_str_appendc(buf, digits[(us & 0xf)]); + } + break; + } + } while (pos < len || next_us); + + smart_str_appendc(buf, '"'); +} +/* }}} */ + +static void php_json_encode_serializable_object(smart_str *buf, zval *val, int options) /* {{{ */ +{ + zend_class_entry *ce = Z_OBJCE_P(val); + zval retval, fname; + HashTable* myht; + + if (Z_TYPE_P(val) == IS_ARRAY) { + myht = HASH_OF(val); + } else { + myht = Z_OBJPROP_P(val); + } + + if (myht && ZEND_HASH_GET_APPLY_COUNT(myht) > 1) { + JSON_G(error_code) = PHP_JSON_ERROR_RECURSION; + smart_str_appendl(buf, "null", 4); + return; + } + + ZVAL_STRING(&fname, "jsonSerialize"); + + if (FAILURE == call_user_function_ex(EG(function_table), val, &fname, &retval, 0, NULL, 1, NULL) || Z_TYPE(retval) == IS_UNDEF) { + zend_throw_exception_ex(NULL, 0, "Failed calling %s::jsonSerialize()", ce->name->val); + smart_str_appendl(buf, "null", sizeof("null") - 1); + zval_ptr_dtor(&fname); + return; + } + + if (EG(exception)) { + /* Error already raised */ + zval_ptr_dtor(&retval); + zval_ptr_dtor(&fname); + smart_str_appendl(buf, "null", sizeof("null") - 1); + return; + } + + if ((Z_TYPE(retval) == IS_OBJECT) && + (Z_OBJ(retval) == Z_OBJ_P(val))) { + /* Handle the case where jsonSerialize does: return $this; by going straight to encode array */ + php_json_encode_array(buf, &retval, options); + } else { + /* All other types, encode as normal */ + php_json_encode(buf, &retval, options); + } + + zval_ptr_dtor(&retval); + zval_ptr_dtor(&fname); +} +/* }}} */ + +void php_json_encode_zval(smart_str *buf, zval *val, int options) /* {{{ */ +{ +again: + switch (Z_TYPE_P(val)) + { + case IS_NULL: + smart_str_appendl(buf, "null", 4); + break; + + case IS_TRUE: + smart_str_appendl(buf, "true", 4); + break; + case IS_FALSE: + smart_str_appendl(buf, "false", 5); + break; + + case IS_LONG: + smart_str_append_long(buf, Z_LVAL_P(val)); + break; + + case IS_DOUBLE: + php_json_encode_double(buf, Z_DVAL_P(val), options); + break; + + case IS_STRING: + php_json_escape_string(buf, Z_STRVAL_P(val), Z_STRLEN_P(val), options); + break; + + case IS_OBJECT: + if (instanceof_function(Z_OBJCE_P(val), php_json_serializable_ce)) { + php_json_encode_serializable_object(buf, val, options); + break; + } + /* fallthrough -- Non-serializable object */ + case IS_ARRAY: + php_json_encode_array(buf, val, options); + break; + + case IS_REFERENCE: + val = Z_REFVAL_P(val); + goto again; + + default: + JSON_G(error_code) = PHP_JSON_ERROR_UNSUPPORTED_TYPE; + smart_str_appendl(buf, "null", 4); + break; + } + + return; +} +/* }}} */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/json/json_parser.y b/ext/json/json_parser.y new file mode 100644 index 0000000000000..72f2890f6f1ac --- /dev/null +++ b/ext/json/json_parser.y @@ -0,0 +1,227 @@ +%code top { +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2015 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Jakub Zelenka | + +----------------------------------------------------------------------+ +*/ + +#include "php.h" +#include "php_json.h" +#include "php_json_parser.h" + +#define YYDEBUG 0 + +#if YYDEBUG +int json_yydebug = 1; +#endif + +#define PHP_JSON_USE(uv) ((void) (uv)) +#define PHP_JSON_USE_1(uvr, uv1) PHP_JSON_USE(uvr); PHP_JSON_USE(uv1) +#define PHP_JSON_USE_2(uvr, uv1, uv2) PHP_JSON_USE(uvr); PHP_JSON_USE(uv1); PHP_JSON_USE(uv2) + +} + +%pure-parser +%name-prefix "php_json_yy" +%lex-param { php_json_parser *parser } +%parse-param { php_json_parser *parser } + +%union { + zval value; + struct { + zval key; + zval val; + } pair; +} + + +%token PHP_JSON_T_NUL +%token PHP_JSON_T_TRUE +%token PHP_JSON_T_FALSE +%token PHP_JSON_T_INT +%token PHP_JSON_T_DOUBLE +%token PHP_JSON_T_STRING +%token PHP_JSON_T_ESTRING +%token PHP_JSON_T_EOI +%token PHP_JSON_T_ERROR + +%type start object key value array errlex +%type members member elements element +%type pair + +%destructor { zval_dtor(&$$); } +%destructor { zend_hash_destroy($$); FREE_HASHTABLE($$); } +%destructor { zval_dtor(&$$.key); zval_dtor(&$$.val); } + +%code { +int php_json_yylex(union YYSTYPE *value, php_json_parser *parser); +void php_json_yyerror(php_json_parser *parser, char const *msg); +void php_json_parser_object_init(php_json_parser *parser, zval *object); +void php_json_parser_object_update(php_json_parser *parser, zval *object, zval *zkey, zval *zvalue); +void php_json_parser_array_init(zval *object); +void php_json_parser_array_append(zval *array, zval *zvalue); + +#define PHP_JSON_DEPTH_DEC --parser->depth +#define PHP_JSON_DEPTH_INC \ + if (parser->max_depth && parser->depth >= parser->max_depth) { \ + parser->scanner.errcode = PHP_JSON_ERROR_DEPTH; \ + YYERROR; \ + } \ + ++parser->depth +} + +%% /* Rules */ + +start: + value PHP_JSON_T_EOI { $$ = $1; ZVAL_COPY_VALUE(parser->return_value, &$1); PHP_JSON_USE($2); YYACCEPT; } + | value errlex { PHP_JSON_USE_2($$, $1, $2); } +; + +object: + '{' { PHP_JSON_DEPTH_INC; } members object_end { PHP_JSON_DEPTH_DEC; $$ = $3; } +; + +object_end: + '}' + | ']' { parser->scanner.errcode = PHP_JSON_ERROR_STATE_MISMATCH; YYERROR; } +; + +members: + /* empty */ { php_json_parser_object_init(parser, &$$); } + | member +; + +member: + pair { php_json_parser_object_init(parser, &$$); php_json_parser_object_update(parser, &$$, &$1.key, &$1.val); } + | member ',' pair { php_json_parser_object_update(parser, &$1, &$3.key, &$3.val); $$ = $1; } + | member errlex { PHP_JSON_USE_2($$, $1, $2); } +; + +pair: + key ':' value { $$.key = $1; $$.val = $3; } + | key errlex { PHP_JSON_USE_2($$, $1, $2); } +; + +array: + '[' { PHP_JSON_DEPTH_INC; } elements array_end { PHP_JSON_DEPTH_DEC; $$ = $3; } +; + +array_end: + ']' + | '}' { parser->scanner.errcode = PHP_JSON_ERROR_STATE_MISMATCH; YYERROR; } +; + +elements: + /* empty */ { php_json_parser_array_init(&$$); } + | element +; + +element: + value { php_json_parser_array_init(&$$); php_json_parser_array_append(&$$, &$1); } + | element ',' value { php_json_parser_array_append(&$1, &$3); $$ = $1; } + | element errlex { PHP_JSON_USE_2($$, $1, $2); } +; + +key: + PHP_JSON_T_STRING + | PHP_JSON_T_ESTRING +; + +value: + object + | array + | PHP_JSON_T_STRING + | PHP_JSON_T_ESTRING + | PHP_JSON_T_INT + | PHP_JSON_T_DOUBLE + | PHP_JSON_T_NUL + | PHP_JSON_T_TRUE + | PHP_JSON_T_FALSE + | errlex +; + +errlex: + PHP_JSON_T_ERROR { PHP_JSON_USE_1($$, $1); YYERROR; } +; + +%% /* Functions */ + +void php_json_parser_init(php_json_parser *parser, zval *return_value, char *str, int str_len, long options, long max_depth) +{ + memset(parser, 0, sizeof(php_json_parser)); + php_json_scanner_init(&parser->scanner, str, str_len, options); + parser->depth = 1; + parser->max_depth = max_depth; + parser->return_value = return_value; +} + +php_json_error_code php_json_parser_error_code(php_json_parser *parser) +{ + return parser->scanner.errcode; +} + +void php_json_parser_object_init(php_json_parser *parser, zval *object) +{ + if (parser->scanner.options & PHP_JSON_OBJECT_AS_ARRAY) { + array_init(object); + } else { + object_init(object); + } +} + +void php_json_parser_object_update(php_json_parser *parser, zval *object, zval *zkey, zval *zvalue) +{ + char *key = Z_STRVAL_P(zkey); + int key_len = Z_STRLEN_P(zkey); + + if (parser->scanner.options & PHP_JSON_OBJECT_AS_ARRAY) { + add_assoc_zval_ex(object, key, key_len, zvalue); + } else { + if (key_len == 0) { + key = "_empty_"; + key_len = sizeof("_empty_") - 1; + } + add_property_zval_ex(object, key, key_len, zvalue TSRMLS_CC); + + if (Z_REFCOUNTED_P(zvalue)) { + Z_DELREF_P(zvalue); + } + } + zval_dtor(zkey); +} + +void php_json_parser_array_init(zval *array) +{ + array_init(array); +} + +void php_json_parser_array_append(zval *array, zval *zvalue) +{ + add_next_index_zval(array, zvalue); +} + +int php_json_yylex(union YYSTYPE *value, php_json_parser *parser) +{ + int token = php_json_scan(&parser->scanner); + value->value = parser->scanner.value; + return token; +} + +void php_json_yyerror(php_json_parser *parser, char const *msg) +{ + if (!parser->scanner.errcode) { + parser->scanner.errcode = PHP_JSON_ERROR_SYNTAX; + } +} diff --git a/ext/json/json_scanner.c b/ext/json/json_scanner.c new file mode 100644 index 0000000000000..6cd4d12fd65b9 --- /dev/null +++ b/ext/json/json_scanner.c @@ -0,0 +1,1155 @@ +/* Generated by re2c 0.13.6 */ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2015 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Jakub Zelenka | + +----------------------------------------------------------------------+ +*/ + +#include "php.h" +#include "php_json_scanner.h" +#include "php_json_scanner_defs.h" +#include "php_json_parser.h" +#include "json_parser.tab.h" + +#define YYCTYPE php_json_ctype +#define YYCURSOR s->cursor +#define YYLIMIT s->limit +#define YYMARKER s->marker +#define YYCTXMARKER s->ctxmarker + +#define YYGETCONDITION() s->state +#define YYSETCONDITION(yystate) s->state = yystate + +#define YYFILL(n) + +#define PHP_JSON_CONDITION_SET(condition) YYSETCONDITION(yyc##condition) +#define PHP_JSON_CONDITION_GOTO(condition) goto yyc_##condition + +#define PHP_JSON_SCANNER_COPY_ESC() php_json_scanner_copy_string(s, 0) +#define PHP_JSON_SCANNER_COPY_UTF() php_json_scanner_copy_string(s, 5) +#define PHP_JSON_SCANNER_COPY_UTF_SP() php_json_scanner_copy_string(s, 11) + +#define PHP_JSON_INT_MAX_LENGTH (MAX_LENGTH_OF_LONG - 1) + + +static void php_json_scanner_copy_string(php_json_scanner *s, int esc_size) +{ + size_t len = s->cursor - s->str_start - esc_size - 1; + if (len) { + memcpy(s->pstr, s->str_start, len); + s->pstr += len; + } +} + +static int php_json_hex_to_int(char code) +{ + if (code >= '0' && code <= '9') { + return code - '0'; + } else if (code >= 'A' && code <= 'F') { + return code - ('A' - 10); + } else if (code >= 'a' && code <= 'f') { + return code - ('a' - 10); + } else { + /* this should never happened (just to suppress compiler warning) */ + return -1; + } +} + +static int php_json_ucs2_to_int_ex(php_json_scanner *s, int size, int start) +{ + int i, code = 0; + php_json_ctype *pc = s->cursor - start; + for (i = 0; i < size; i++) { + code |= php_json_hex_to_int(*(pc--)) << (i * 4); + } + return code; +} + +static int php_json_ucs2_to_int(php_json_scanner *s, int size) +{ + return php_json_ucs2_to_int_ex(s, size, 1); +} + +void php_json_scanner_init(php_json_scanner *s, char *str, int str_len, long options) +{ + s->cursor = (php_json_ctype *) str; + s->limit = (php_json_ctype *) str + str_len; + s->options = options; + PHP_JSON_CONDITION_SET(JS); +} + +int php_json_scan(php_json_scanner *s) +{ + ZVAL_NULL(&s->value); + +std: + s->token = s->cursor; + + + { + YYCTYPE yych; + unsigned int yyaccept = 0; + if (YYGETCONDITION() < 1) { + goto yyc_JS; + } else { + if (YYGETCONDITION() < 2) { + goto yyc_STR_P1; + } else { + goto yyc_STR_P2; + } + } +/* *********************************** */ +yyc_JS: + { + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 0, 0, 0, 128, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *YYCURSOR; + if (yych <= '9') { + if (yych <= ' ') { + if (yych <= '\n') { + if (yych <= 0x00) goto yy3; + if (yych <= 0x08) goto yy5; + if (yych <= '\t') goto yy7; + goto yy9; + } else { + if (yych == '\r') goto yy10; + if (yych <= 0x1F) goto yy5; + goto yy7; + } + } else { + if (yych <= ',') { + if (yych == '"') goto yy11; + if (yych <= '+') goto yy5; + goto yy13; + } else { + if (yych <= '-') goto yy15; + if (yych <= '/') goto yy5; + if (yych <= '0') goto yy16; + goto yy18; + } + } + } else { + if (yych <= 'm') { + if (yych <= '\\') { + if (yych <= ':') goto yy19; + if (yych == '[') goto yy21; + goto yy5; + } else { + if (yych <= ']') goto yy23; + if (yych == 'f') goto yy25; + goto yy5; + } + } else { + if (yych <= 'z') { + if (yych <= 'n') goto yy26; + if (yych == 't') goto yy27; + goto yy5; + } else { + if (yych <= '{') goto yy28; + if (yych == '}') goto yy30; + goto yy5; + } + } + } +yy3: + ++YYCURSOR; + { + if (s->limit < s->cursor) { + return PHP_JSON_T_EOI; + } else { + s->errcode = PHP_JSON_ERROR_SYNTAX; + return PHP_JSON_T_ERROR; + } + } +yy5: + ++YYCURSOR; +yy6: + { + s->errcode = PHP_JSON_ERROR_SYNTAX; + return PHP_JSON_T_ERROR; + } +yy7: + ++YYCURSOR; + yych = *YYCURSOR; + goto yy58; +yy8: + { goto std; } +yy9: + yych = *++YYCURSOR; + goto yy8; +yy10: + yych = *++YYCURSOR; + if (yych == '\n') goto yy59; + goto yy58; +yy11: + ++YYCURSOR; + { + s->str_start = s->cursor; + s->str_esc = 0; + PHP_JSON_CONDITION_SET(STR_P1); + PHP_JSON_CONDITION_GOTO(STR_P1); + } +yy13: + ++YYCURSOR; + { return ','; } +yy15: + yych = *++YYCURSOR; + if (yych <= '/') goto yy6; + if (yych <= '0') goto yy56; + if (yych <= '9') goto yy46; + goto yy6; +yy16: + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if (yych <= 'D') { + if (yych == '.') goto yy48; + } else { + if (yych <= 'E') goto yy49; + if (yych == 'e') goto yy49; + } +yy17: + { + zend_bool bigint = 0, negative = s->token[0] == '-'; + size_t digits = (size_t) (s->cursor - s->token - negative); + if (digits >= PHP_JSON_INT_MAX_LENGTH) { + if (digits == PHP_JSON_INT_MAX_LENGTH) { + int cmp = strncmp((char *) (s->token + negative), LONG_MIN_DIGITS, PHP_JSON_INT_MAX_LENGTH); + if (!(cmp < 0 || (cmp == 0 && negative))) { + bigint = 1; + } + } else { + bigint = 1; + } + } + if (!bigint) { + ZVAL_LONG(&s->value, strtol((char *) s->token, NULL, 10)); + return PHP_JSON_T_INT; + } else if (s->options & PHP_JSON_BIGINT_AS_STRING) { + ZVAL_STRINGL(&s->value, (char *) s->token, s->cursor - s->token); + return PHP_JSON_T_STRING; + } else { + ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL)); + return PHP_JSON_T_DOUBLE; + } + } +yy18: + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + goto yy47; +yy19: + ++YYCURSOR; + { return ':'; } +yy21: + ++YYCURSOR; + { return '['; } +yy23: + ++YYCURSOR; + { return ']'; } +yy25: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + if (yych == 'a') goto yy41; + goto yy6; +yy26: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + if (yych == 'u') goto yy37; + goto yy6; +yy27: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + if (yych == 'r') goto yy32; + goto yy6; +yy28: + ++YYCURSOR; + { return '{'; } +yy30: + ++YYCURSOR; + { return '}'; } +yy32: + yych = *++YYCURSOR; + if (yych == 'u') goto yy34; +yy33: + YYCURSOR = YYMARKER; + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy17; + } else { + goto yy6; + } + } else { + goto yy53; + } +yy34: + yych = *++YYCURSOR; + if (yych != 'e') goto yy33; + ++YYCURSOR; + { + ZVAL_TRUE(&s->value); + return PHP_JSON_T_TRUE; + } +yy37: + yych = *++YYCURSOR; + if (yych != 'l') goto yy33; + yych = *++YYCURSOR; + if (yych != 'l') goto yy33; + ++YYCURSOR; + { + ZVAL_NULL(&s->value); + return PHP_JSON_T_NUL; + } +yy41: + yych = *++YYCURSOR; + if (yych != 'l') goto yy33; + yych = *++YYCURSOR; + if (yych != 's') goto yy33; + yych = *++YYCURSOR; + if (yych != 'e') goto yy33; + ++YYCURSOR; + { + ZVAL_FALSE(&s->value); + return PHP_JSON_T_FALSE; + } +yy46: + yyaccept = 0; + YYMARKER = ++YYCURSOR; + yych = *YYCURSOR; +yy47: + if (yybm[0+yych] & 64) { + goto yy46; + } + if (yych <= 'D') { + if (yych != '.') goto yy17; + } else { + if (yych <= 'E') goto yy49; + if (yych == 'e') goto yy49; + goto yy17; + } +yy48: + yych = *++YYCURSOR; + if (yych <= '/') goto yy33; + if (yych <= '9') goto yy54; + goto yy33; +yy49: + yych = *++YYCURSOR; + if (yych <= ',') { + if (yych != '+') goto yy33; + } else { + if (yych <= '-') goto yy50; + if (yych <= '/') goto yy33; + if (yych <= '9') goto yy51; + goto yy33; + } +yy50: + yych = *++YYCURSOR; + if (yych <= '/') goto yy33; + if (yych >= ':') goto yy33; +yy51: + ++YYCURSOR; + yych = *YYCURSOR; + if (yych <= '/') goto yy53; + if (yych <= '9') goto yy51; +yy53: + { + ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL)); + return PHP_JSON_T_DOUBLE; + } +yy54: + yyaccept = 2; + YYMARKER = ++YYCURSOR; + yych = *YYCURSOR; + if (yych <= 'D') { + if (yych <= '/') goto yy53; + if (yych <= '9') goto yy54; + goto yy53; + } else { + if (yych <= 'E') goto yy49; + if (yych == 'e') goto yy49; + goto yy53; + } +yy56: + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if (yych <= 'D') { + if (yych == '.') goto yy48; + goto yy17; + } else { + if (yych <= 'E') goto yy49; + if (yych == 'e') goto yy49; + goto yy17; + } +yy57: + ++YYCURSOR; + yych = *YYCURSOR; +yy58: + if (yybm[0+yych] & 128) { + goto yy57; + } + goto yy8; +yy59: + ++YYCURSOR; + yych = *YYCURSOR; + goto yy8; + } +/* *********************************** */ +yyc_STR_P1: + yych = *YYCURSOR; + if (yych <= 0xDF) { + if (yych <= '[') { + if (yych <= 0x1F) goto yy62; + if (yych == '"') goto yy66; + goto yy64; + } else { + if (yych <= '\\') goto yy68; + if (yych <= 0x7F) goto yy64; + if (yych <= 0xC1) goto yy70; + goto yy72; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) goto yy73; + if (yych <= 0xEC) goto yy74; + if (yych <= 0xED) goto yy75; + goto yy76; + } else { + if (yych <= 0xF0) goto yy77; + if (yych <= 0xF3) goto yy78; + if (yych <= 0xF4) goto yy79; + goto yy70; + } + } +yy62: + ++YYCURSOR; + { + s->errcode = PHP_JSON_ERROR_CTRL_CHAR; + return PHP_JSON_T_ERROR; + } +yy64: + ++YYCURSOR; +yy65: + { PHP_JSON_CONDITION_GOTO(STR_P1); } +yy66: + ++YYCURSOR; + { + zend_string *str; + size_t len = s->cursor - s->str_start - s->str_esc - 1; + if (len == 0) { + PHP_JSON_CONDITION_SET(JS); + ZVAL_EMPTY_STRING(&s->value); + return PHP_JSON_T_ESTRING; + } + str = zend_string_alloc(len, 0); + str->val[len] = '\0'; + ZVAL_STR(&s->value, str); + if (s->str_esc) { + s->pstr = (php_json_ctype *) Z_STRVAL(s->value); + s->cursor = s->str_start; + PHP_JSON_CONDITION_SET(STR_P2); + PHP_JSON_CONDITION_GOTO(STR_P2); + } else { + memcpy(Z_STRVAL(s->value), s->str_start, len); + PHP_JSON_CONDITION_SET(JS); + return PHP_JSON_T_STRING; + } + } +yy68: + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if (yych <= 'e') { + if (yych <= '/') { + if (yych == '"') goto yy92; + if (yych >= '/') goto yy92; + } else { + if (yych <= '\\') { + if (yych >= '\\') goto yy92; + } else { + if (yych == 'b') goto yy92; + } + } + } else { + if (yych <= 'q') { + if (yych <= 'f') goto yy92; + if (yych == 'n') goto yy92; + } else { + if (yych <= 's') { + if (yych <= 'r') goto yy92; + } else { + if (yych <= 't') goto yy92; + if (yych <= 'u') goto yy94; + } + } + } +yy69: + { + s->errcode = PHP_JSON_ERROR_SYNTAX; + return PHP_JSON_T_ERROR; + } +yy70: + ++YYCURSOR; +yy71: + { + s->errcode = PHP_JSON_ERROR_UTF8; + return PHP_JSON_T_ERROR; + } +yy72: + yych = *++YYCURSOR; + if (yych <= 0x7F) goto yy71; + if (yych <= 0xBF) goto yy83; + goto yy71; +yy73: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + if (yych <= 0x9F) goto yy71; + if (yych <= 0xBF) goto yy91; + goto yy71; +yy74: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + if (yych <= 0x7F) goto yy71; + if (yych <= 0xBF) goto yy90; + goto yy71; +yy75: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + if (yych <= 0x7F) goto yy71; + if (yych <= 0x9F) goto yy89; + goto yy71; +yy76: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + if (yych <= 0x7F) goto yy71; + if (yych <= 0xBF) goto yy88; + goto yy71; +yy77: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + if (yych <= 0x8F) goto yy71; + if (yych <= 0xBF) goto yy86; + goto yy71; +yy78: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + if (yych <= 0x7F) goto yy71; + if (yych <= 0xBF) goto yy84; + goto yy71; +yy79: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + if (yych <= 0x7F) goto yy71; + if (yych >= 0x90) goto yy71; + yych = *++YYCURSOR; + if (yych <= 0x7F) goto yy81; + if (yych <= 0xBF) goto yy82; +yy81: + YYCURSOR = YYMARKER; + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy69; + } else { + goto yy71; + } + } else { + goto yy101; + } +yy82: + yych = *++YYCURSOR; + if (yych <= 0x7F) goto yy81; + if (yych >= 0xC0) goto yy81; +yy83: + yych = *++YYCURSOR; + goto yy65; +yy84: + yych = *++YYCURSOR; + if (yych <= 0x7F) goto yy81; + if (yych >= 0xC0) goto yy81; + yych = *++YYCURSOR; + if (yych <= 0x7F) goto yy81; + if (yych <= 0xBF) goto yy83; + goto yy81; +yy86: + yych = *++YYCURSOR; + if (yych <= 0x7F) goto yy81; + if (yych >= 0xC0) goto yy81; + yych = *++YYCURSOR; + if (yych <= 0x7F) goto yy81; + if (yych <= 0xBF) goto yy83; + goto yy81; +yy88: + yych = *++YYCURSOR; + if (yych <= 0x7F) goto yy81; + if (yych <= 0xBF) goto yy83; + goto yy81; +yy89: + yych = *++YYCURSOR; + if (yych <= 0x7F) goto yy81; + if (yych <= 0xBF) goto yy83; + goto yy81; +yy90: + yych = *++YYCURSOR; + if (yych <= 0x7F) goto yy81; + if (yych <= 0xBF) goto yy83; + goto yy81; +yy91: + yych = *++YYCURSOR; + if (yych <= 0x7F) goto yy81; + if (yych <= 0xBF) goto yy83; + goto yy81; +yy92: + ++YYCURSOR; + { + s->str_esc++; + PHP_JSON_CONDITION_GOTO(STR_P1); + } +yy94: + yych = *++YYCURSOR; + if (yych <= 'D') { + if (yych <= '9') { + if (yych <= '/') goto yy81; + if (yych >= '1') goto yy97; + } else { + if (yych <= '@') goto yy81; + if (yych <= 'C') goto yy97; + goto yy96; + } + } else { + if (yych <= 'c') { + if (yych <= 'F') goto yy97; + if (yych <= '`') goto yy81; + goto yy97; + } else { + if (yych <= 'd') goto yy96; + if (yych <= 'f') goto yy97; + goto yy81; + } + } + yych = *++YYCURSOR; + if (yych <= '9') { + if (yych <= '/') goto yy81; + if (yych <= '0') goto yy112; + if (yych <= '7') goto yy113; + goto yy98; + } else { + if (yych <= 'F') { + if (yych <= '@') goto yy81; + goto yy98; + } else { + if (yych <= '`') goto yy81; + if (yych <= 'f') goto yy98; + goto yy81; + } + } +yy96: + yych = *++YYCURSOR; + if (yych <= 'B') { + if (yych <= '7') { + if (yych <= '/') goto yy81; + goto yy98; + } else { + if (yych <= '9') goto yy102; + if (yych <= '@') goto yy81; + goto yy102; + } + } else { + if (yych <= '`') { + if (yych <= 'F') goto yy98; + goto yy81; + } else { + if (yych <= 'b') goto yy102; + if (yych <= 'f') goto yy98; + goto yy81; + } + } +yy97: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy81; + if (yych >= ':') goto yy81; + } else { + if (yych <= 'F') goto yy98; + if (yych <= '`') goto yy81; + if (yych >= 'g') goto yy81; + } +yy98: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy81; + if (yych >= ':') goto yy81; + } else { + if (yych <= 'F') goto yy99; + if (yych <= '`') goto yy81; + if (yych >= 'g') goto yy81; + } +yy99: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy81; + if (yych >= ':') goto yy81; + } else { + if (yych <= 'F') goto yy100; + if (yych <= '`') goto yy81; + if (yych >= 'g') goto yy81; + } +yy100: + ++YYCURSOR; +yy101: + { + s->str_esc += 3; + PHP_JSON_CONDITION_GOTO(STR_P1); + } +yy102: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy81; + if (yych >= ':') goto yy81; + } else { + if (yych <= 'F') goto yy103; + if (yych <= '`') goto yy81; + if (yych >= 'g') goto yy81; + } +yy103: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy81; + if (yych >= ':') goto yy81; + } else { + if (yych <= 'F') goto yy104; + if (yych <= '`') goto yy81; + if (yych >= 'g') goto yy81; + } +yy104: + yyaccept = 2; + yych = *(YYMARKER = ++YYCURSOR); + if (yych != '\\') goto yy101; + yych = *++YYCURSOR; + if (yych != 'u') goto yy81; + yych = *++YYCURSOR; + if (yych == 'D') goto yy107; + if (yych != 'd') goto yy81; +yy107: + yych = *++YYCURSOR; + if (yych <= 'B') goto yy81; + if (yych <= 'F') goto yy108; + if (yych <= 'b') goto yy81; + if (yych >= 'g') goto yy81; +yy108: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy81; + if (yych >= ':') goto yy81; + } else { + if (yych <= 'F') goto yy109; + if (yych <= '`') goto yy81; + if (yych >= 'g') goto yy81; + } +yy109: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy81; + if (yych >= ':') goto yy81; + } else { + if (yych <= 'F') goto yy110; + if (yych <= '`') goto yy81; + if (yych >= 'g') goto yy81; + } +yy110: + ++YYCURSOR; + { + s->str_esc += 8; + PHP_JSON_CONDITION_GOTO(STR_P1); + } +yy112: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy81; + if (yych <= '7') goto yy117; + if (yych <= '9') goto yy114; + goto yy81; + } else { + if (yych <= 'F') goto yy114; + if (yych <= '`') goto yy81; + if (yych <= 'f') goto yy114; + goto yy81; + } +yy113: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy81; + if (yych >= ':') goto yy81; + } else { + if (yych <= 'F') goto yy114; + if (yych <= '`') goto yy81; + if (yych >= 'g') goto yy81; + } +yy114: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy81; + if (yych >= ':') goto yy81; + } else { + if (yych <= 'F') goto yy115; + if (yych <= '`') goto yy81; + if (yych >= 'g') goto yy81; + } +yy115: + ++YYCURSOR; + { + s->str_esc += 4; + PHP_JSON_CONDITION_GOTO(STR_P1); + } +yy117: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy81; + if (yych >= ':') goto yy81; + } else { + if (yych <= 'F') goto yy118; + if (yych <= '`') goto yy81; + if (yych >= 'g') goto yy81; + } +yy118: + ++YYCURSOR; + { + s->str_esc += 5; + PHP_JSON_CONDITION_GOTO(STR_P1); + } +/* *********************************** */ +yyc_STR_P2: + yych = *YYCURSOR; + if (yych == '"') goto yy124; + if (yych == '\\') goto yy126; + ++YYCURSOR; + { PHP_JSON_CONDITION_GOTO(STR_P2); } +yy124: + ++YYCURSOR; + YYSETCONDITION(yycJS); + { + PHP_JSON_SCANNER_COPY_ESC(); + return PHP_JSON_T_STRING; + } +yy126: + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if (yych == 'u') goto yy128; +yy127: + { + char esc; + PHP_JSON_SCANNER_COPY_ESC(); + switch (*s->cursor) { + case 'b': + esc = '\b'; + break; + case 'f': + esc = '\f'; + break; + case 'n': + esc = '\n'; + break; + case 'r': + esc = '\r'; + break; + case 't': + esc = '\t'; + break; + case '\\': + case '/': + case '"': + esc = *s->cursor; + break; + default: + s->errcode = PHP_JSON_ERROR_SYNTAX; + return PHP_JSON_T_ERROR; + } + *(s->pstr++) = esc; + ++YYCURSOR; + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } +yy128: + yych = *++YYCURSOR; + if (yych <= 'D') { + if (yych <= '9') { + if (yych <= '/') goto yy129; + if (yych <= '0') goto yy130; + goto yy132; + } else { + if (yych <= '@') goto yy129; + if (yych <= 'C') goto yy132; + goto yy131; + } + } else { + if (yych <= 'c') { + if (yych <= 'F') goto yy132; + if (yych >= 'a') goto yy132; + } else { + if (yych <= 'd') goto yy131; + if (yych <= 'f') goto yy132; + } + } +yy129: + YYCURSOR = YYMARKER; + if (yyaccept == 0) { + goto yy127; + } else { + goto yy136; + } +yy130: + yych = *++YYCURSOR; + if (yych <= '9') { + if (yych <= '/') goto yy129; + if (yych <= '0') goto yy147; + if (yych <= '7') goto yy148; + goto yy133; + } else { + if (yych <= 'F') { + if (yych <= '@') goto yy129; + goto yy133; + } else { + if (yych <= '`') goto yy129; + if (yych <= 'f') goto yy133; + goto yy129; + } + } +yy131: + yych = *++YYCURSOR; + if (yych <= 'B') { + if (yych <= '7') { + if (yych <= '/') goto yy129; + goto yy133; + } else { + if (yych <= '9') goto yy137; + if (yych <= '@') goto yy129; + goto yy137; + } + } else { + if (yych <= '`') { + if (yych <= 'F') goto yy133; + goto yy129; + } else { + if (yych <= 'b') goto yy137; + if (yych <= 'f') goto yy133; + goto yy129; + } + } +yy132: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy129; + if (yych >= ':') goto yy129; + } else { + if (yych <= 'F') goto yy133; + if (yych <= '`') goto yy129; + if (yych >= 'g') goto yy129; + } +yy133: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy129; + if (yych >= ':') goto yy129; + } else { + if (yych <= 'F') goto yy134; + if (yych <= '`') goto yy129; + if (yych >= 'g') goto yy129; + } +yy134: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy129; + if (yych >= ':') goto yy129; + } else { + if (yych <= 'F') goto yy135; + if (yych <= '`') goto yy129; + if (yych >= 'g') goto yy129; + } +yy135: + ++YYCURSOR; +yy136: + { + int utf16 = php_json_ucs2_to_int(s, 4); + PHP_JSON_SCANNER_COPY_UTF(); + *(s->pstr++) = (char) (0xe0 | (utf16 >> 12)); + *(s->pstr++) = (char) (0x80 | ((utf16 >> 6) & 0x3f)); + *(s->pstr++) = (char) (0x80 | (utf16 & 0x3f)); + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } +yy137: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy129; + if (yych >= ':') goto yy129; + } else { + if (yych <= 'F') goto yy138; + if (yych <= '`') goto yy129; + if (yych >= 'g') goto yy129; + } +yy138: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy129; + if (yych >= ':') goto yy129; + } else { + if (yych <= 'F') goto yy139; + if (yych <= '`') goto yy129; + if (yych >= 'g') goto yy129; + } +yy139: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + if (yych != '\\') goto yy136; + yych = *++YYCURSOR; + if (yych != 'u') goto yy129; + yych = *++YYCURSOR; + if (yych == 'D') goto yy142; + if (yych != 'd') goto yy129; +yy142: + yych = *++YYCURSOR; + if (yych <= 'B') goto yy129; + if (yych <= 'F') goto yy143; + if (yych <= 'b') goto yy129; + if (yych >= 'g') goto yy129; +yy143: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy129; + if (yych >= ':') goto yy129; + } else { + if (yych <= 'F') goto yy144; + if (yych <= '`') goto yy129; + if (yych >= 'g') goto yy129; + } +yy144: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy129; + if (yych >= ':') goto yy129; + } else { + if (yych <= 'F') goto yy145; + if (yych <= '`') goto yy129; + if (yych >= 'g') goto yy129; + } +yy145: + ++YYCURSOR; + { + int utf32, utf16_hi, utf16_lo; + utf16_hi = php_json_ucs2_to_int(s, 4); + utf16_lo = php_json_ucs2_to_int_ex(s, 4, 7); + utf32 = ((utf16_lo & 0x3FF) << 10) + (utf16_hi & 0x3FF) + 0x10000; + PHP_JSON_SCANNER_COPY_UTF_SP(); + *(s->pstr++) = (char) (0xf0 | (utf32 >> 18)); + *(s->pstr++) = (char) (0x80 | ((utf32 >> 12) & 0x3f)); + *(s->pstr++) = (char) (0x80 | ((utf32 >> 6) & 0x3f)); + *(s->pstr++) = (char) (0x80 | (utf32 & 0x3f)); + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } +yy147: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy129; + if (yych <= '7') goto yy152; + if (yych <= '9') goto yy149; + goto yy129; + } else { + if (yych <= 'F') goto yy149; + if (yych <= '`') goto yy129; + if (yych <= 'f') goto yy149; + goto yy129; + } +yy148: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy129; + if (yych >= ':') goto yy129; + } else { + if (yych <= 'F') goto yy149; + if (yych <= '`') goto yy129; + if (yych >= 'g') goto yy129; + } +yy149: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy129; + if (yych >= ':') goto yy129; + } else { + if (yych <= 'F') goto yy150; + if (yych <= '`') goto yy129; + if (yych >= 'g') goto yy129; + } +yy150: + ++YYCURSOR; + { + int utf16 = php_json_ucs2_to_int(s, 3); + PHP_JSON_SCANNER_COPY_UTF(); + *(s->pstr++) = (char) (0xc0 | (utf16 >> 6)); + *(s->pstr++) = (char) (0x80 | (utf16 & 0x3f)); + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } +yy152: + yych = *++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy129; + if (yych >= ':') goto yy129; + } else { + if (yych <= 'F') goto yy153; + if (yych <= '`') goto yy129; + if (yych >= 'g') goto yy129; + } +yy153: + ++YYCURSOR; + { + int utf16 = php_json_ucs2_to_int(s, 2); + PHP_JSON_SCANNER_COPY_UTF(); + *(s->pstr++) = (char) utf16; + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } + } + + +} + diff --git a/ext/json/json_scanner.re b/ext/json/json_scanner.re new file mode 100644 index 0000000000000..28743e5453cc0 --- /dev/null +++ b/ext/json/json_scanner.re @@ -0,0 +1,343 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2015 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Jakub Zelenka | + +----------------------------------------------------------------------+ +*/ + +#include "php.h" +#include "php_json_scanner.h" +#include "php_json_scanner_defs.h" +#include "php_json_parser.h" +#include "json_parser.tab.h" + +#define YYCTYPE php_json_ctype +#define YYCURSOR s->cursor +#define YYLIMIT s->limit +#define YYMARKER s->marker +#define YYCTXMARKER s->ctxmarker + +#define YYGETCONDITION() s->state +#define YYSETCONDITION(yystate) s->state = yystate + +#define YYFILL(n) + +#define PHP_JSON_CONDITION_SET(condition) YYSETCONDITION(yyc##condition) +#define PHP_JSON_CONDITION_GOTO(condition) goto yyc_##condition + +#define PHP_JSON_SCANNER_COPY_ESC() php_json_scanner_copy_string(s, 0) +#define PHP_JSON_SCANNER_COPY_UTF() php_json_scanner_copy_string(s, 5) +#define PHP_JSON_SCANNER_COPY_UTF_SP() php_json_scanner_copy_string(s, 11) + +#define PHP_JSON_INT_MAX_LENGTH (MAX_LENGTH_OF_LONG - 1) + + +static void php_json_scanner_copy_string(php_json_scanner *s, int esc_size) +{ + size_t len = s->cursor - s->str_start - esc_size - 1; + if (len) { + memcpy(s->pstr, s->str_start, len); + s->pstr += len; + } +} + +static int php_json_hex_to_int(char code) +{ + if (code >= '0' && code <= '9') { + return code - '0'; + } else if (code >= 'A' && code <= 'F') { + return code - ('A' - 10); + } else if (code >= 'a' && code <= 'f') { + return code - ('a' - 10); + } else { + /* this should never happened (just to suppress compiler warning) */ + return -1; + } +} + +static int php_json_ucs2_to_int_ex(php_json_scanner *s, int size, int start) +{ + int i, code = 0; + php_json_ctype *pc = s->cursor - start; + for (i = 0; i < size; i++) { + code |= php_json_hex_to_int(*(pc--)) << (i * 4); + } + return code; +} + +static int php_json_ucs2_to_int(php_json_scanner *s, int size) +{ + return php_json_ucs2_to_int_ex(s, size, 1); +} + +void php_json_scanner_init(php_json_scanner *s, char *str, int str_len, long options) +{ + s->cursor = (php_json_ctype *) str; + s->limit = (php_json_ctype *) str + str_len; + s->options = options; + PHP_JSON_CONDITION_SET(JS); +} + +int php_json_scan(php_json_scanner *s) +{ + ZVAL_NULL(&s->value); + +std: + s->token = s->cursor; + +/*!re2c + re2c:indent:top = 1; + re2c:yyfill:enable = 0; + + DIGIT = [0-9] ; + DIGITNZ = [1-9] ; + UINT = "0" | ( DIGITNZ DIGIT* ) ; + INT = "-"? UINT ; + HEX = DIGIT | [a-fA-F] ; + HEXNZ = DIGITNZ | [a-fA-F] ; + HEX7 = [0-7] ; + HEXC = DIGIT | [a-cA-C] ; + FLOAT = INT "." DIGIT+ ; + EXP = ( INT | FLOAT ) [eE] [+-]? DIGIT+ ; + NL = "\r"? "\n" ; + WS = [ \t\r]+ ; + EOI = "\000"; + CTRL = [\x00-\x1F] ; + UTF8T = [\x80-\xBF] ; + UTF8_1 = [\x00-\x7F] ; + UTF8_2 = [\xC2-\xDF] UTF8T ; + UTF8_3A = "\xE0" [\xA0-\xBF] UTF8T ; + UTF8_3B = [\xE1-\xEC] UTF8T{2} ; + UTF8_3C = "\xED" [\x80-\x9F] UTF8T ; + UTF8_3D = [\xEE-\xEF] UTF8T{2} ; + UTF8_3 = UTF8_3A | UTF8_3B | UTF8_3C | UTF8_3D ; + UTF8_4A = "\xF0"[\x90-\xBF] UTF8T{2} ; + UTF8_4B = [\xF1-\xF3] UTF8T{3} ; + UTF8_4C = "\xF4" [\x80-\x8F] UTF8T{2} ; + UTF8_4 = UTF8_4A | UTF8_4B | UTF8_4C ; + UTF8 = UTF8_1 | UTF8_2 | UTF8_3 | UTF8_4 ; + ANY = [^] ; + ESCPREF = "\\" ; + ESCSYM = ( "\"" | "\\" | "/" | [bfnrt] ) ; + ESC = ESCPREF ESCSYM ; + UTFSYM = "u" ; + UTFPREF = ESCPREF UTFSYM ; + UCS2 = UTFPREF HEX{4} ; + UTF16_1 = UTFPREF "00" HEX7 HEX ; + UTF16_2 = UTFPREF "0" HEX7 HEX{2} ; + UTF16_3 = UTFPREF ( ( ( HEXC | [efEF] ) HEX ) | ( [dD] HEX7 ) ) HEX{2} ; + UTF16_4 = UTFPREF [dD] [89abAB] HEX{2} UTFPREF [dD] [c-fC-F] HEX{2} ; + + "{" { return '{'; } + "}" { return '}'; } + "[" { return '['; } + "]" { return ']'; } + ":" { return ':'; } + "," { return ','; } + "null" { + ZVAL_NULL(&s->value); + return PHP_JSON_T_NUL; + } + "true" { + ZVAL_TRUE(&s->value); + return PHP_JSON_T_TRUE; + } + "false" { + ZVAL_FALSE(&s->value); + return PHP_JSON_T_FALSE; + } + INT { + zend_bool bigint = 0, negative = s->token[0] == '-'; + size_t digits = (size_t) (s->cursor - s->token - negative); + if (digits >= PHP_JSON_INT_MAX_LENGTH) { + if (digits == PHP_JSON_INT_MAX_LENGTH) { + int cmp = strncmp((char *) (s->token + negative), LONG_MIN_DIGITS, PHP_JSON_INT_MAX_LENGTH); + if (!(cmp < 0 || (cmp == 0 && negative))) { + bigint = 1; + } + } else { + bigint = 1; + } + } + if (!bigint) { + ZVAL_LONG(&s->value, strtol((char *) s->token, NULL, 10)); + return PHP_JSON_T_INT; + } else if (s->options & PHP_JSON_BIGINT_AS_STRING) { + ZVAL_STRINGL(&s->value, (char *) s->token, s->cursor - s->token); + return PHP_JSON_T_STRING; + } else { + ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL)); + return PHP_JSON_T_DOUBLE; + } + } + FLOAT|EXP { + ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL)); + return PHP_JSON_T_DOUBLE; + } + NL|WS { goto std; } + EOI { + if (s->limit < s->cursor) { + return PHP_JSON_T_EOI; + } else { + s->errcode = PHP_JSON_ERROR_SYNTAX; + return PHP_JSON_T_ERROR; + } + } + ["] { + s->str_start = s->cursor; + s->str_esc = 0; + PHP_JSON_CONDITION_SET(STR_P1); + PHP_JSON_CONDITION_GOTO(STR_P1); + } + + CTRL { + s->errcode = PHP_JSON_ERROR_CTRL_CHAR; + return PHP_JSON_T_ERROR; + } + UTF16_1 { + s->str_esc += 5; + PHP_JSON_CONDITION_GOTO(STR_P1); + } + UTF16_2 { + s->str_esc += 4; + PHP_JSON_CONDITION_GOTO(STR_P1); + } + UTF16_4 { + s->str_esc += 8; + PHP_JSON_CONDITION_GOTO(STR_P1); + } + UCS2 { + s->str_esc += 3; + PHP_JSON_CONDITION_GOTO(STR_P1); + } + ESC { + s->str_esc++; + PHP_JSON_CONDITION_GOTO(STR_P1); + } + ESCPREF { + s->errcode = PHP_JSON_ERROR_SYNTAX; + return PHP_JSON_T_ERROR; + } + ["] { + zend_string *str; + size_t len = s->cursor - s->str_start - s->str_esc - 1; + if (len == 0) { + PHP_JSON_CONDITION_SET(JS); + ZVAL_EMPTY_STRING(&s->value); + return PHP_JSON_T_ESTRING; + } + str = zend_string_alloc(len, 0); + str->val[len] = '\0'; + ZVAL_STR(&s->value, str); + if (s->str_esc) { + s->pstr = (php_json_ctype *) Z_STRVAL(s->value); + s->cursor = s->str_start; + PHP_JSON_CONDITION_SET(STR_P2); + PHP_JSON_CONDITION_GOTO(STR_P2); + } else { + memcpy(Z_STRVAL(s->value), s->str_start, len); + PHP_JSON_CONDITION_SET(JS); + return PHP_JSON_T_STRING; + } + } + UTF8 { PHP_JSON_CONDITION_GOTO(STR_P1); } + ANY { + s->errcode = PHP_JSON_ERROR_UTF8; + return PHP_JSON_T_ERROR; + } + + UTF16_1 { + int utf16 = php_json_ucs2_to_int(s, 2); + PHP_JSON_SCANNER_COPY_UTF(); + *(s->pstr++) = (char) utf16; + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } + UTF16_2 { + int utf16 = php_json_ucs2_to_int(s, 3); + PHP_JSON_SCANNER_COPY_UTF(); + *(s->pstr++) = (char) (0xc0 | (utf16 >> 6)); + *(s->pstr++) = (char) (0x80 | (utf16 & 0x3f)); + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } + UTF16_4 { + int utf32, utf16_hi, utf16_lo; + utf16_hi = php_json_ucs2_to_int(s, 4); + utf16_lo = php_json_ucs2_to_int_ex(s, 4, 7); + utf32 = ((utf16_lo & 0x3FF) << 10) + (utf16_hi & 0x3FF) + 0x10000; + PHP_JSON_SCANNER_COPY_UTF_SP(); + *(s->pstr++) = (char) (0xf0 | (utf32 >> 18)); + *(s->pstr++) = (char) (0x80 | ((utf32 >> 12) & 0x3f)); + *(s->pstr++) = (char) (0x80 | ((utf32 >> 6) & 0x3f)); + *(s->pstr++) = (char) (0x80 | (utf32 & 0x3f)); + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } + UCS2 { + int utf16 = php_json_ucs2_to_int(s, 4); + PHP_JSON_SCANNER_COPY_UTF(); + *(s->pstr++) = (char) (0xe0 | (utf16 >> 12)); + *(s->pstr++) = (char) (0x80 | ((utf16 >> 6) & 0x3f)); + *(s->pstr++) = (char) (0x80 | (utf16 & 0x3f)); + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } + ESCPREF { + char esc; + PHP_JSON_SCANNER_COPY_ESC(); + switch (*s->cursor) { + case 'b': + esc = '\b'; + break; + case 'f': + esc = '\f'; + break; + case 'n': + esc = '\n'; + break; + case 'r': + esc = '\r'; + break; + case 't': + esc = '\t'; + break; + case '\\': + case '/': + case '"': + esc = *s->cursor; + break; + default: + s->errcode = PHP_JSON_ERROR_SYNTAX; + return PHP_JSON_T_ERROR; + } + *(s->pstr++) = esc; + ++YYCURSOR; + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } + ["] => JS { + PHP_JSON_SCANNER_COPY_ESC(); + return PHP_JSON_T_STRING; + } + ANY { PHP_JSON_CONDITION_GOTO(STR_P2); } + + <*>ANY { + s->errcode = PHP_JSON_ERROR_SYNTAX; + return PHP_JSON_T_ERROR; + } +*/ + +} + diff --git a/ext/json/php_json.h b/ext/json/php_json.h index 0ba4154f2f962..65ae8308082fa 100644 --- a/ext/json/php_json.h +++ b/ext/json/php_json.h @@ -13,6 +13,7 @@ | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ | Author: Omar Kilani | + | Jakub Zelenka | +----------------------------------------------------------------------+ */ @@ -37,25 +38,21 @@ extern zend_module_entry json_module_entry; #include "TSRM.h" #endif -ZEND_BEGIN_MODULE_GLOBALS(json) - int encoder_depth; - int error_code; - int encode_max_depth; -ZEND_END_MODULE_GLOBALS(json) - -#ifdef ZTS -# define JSON_G(v) ZEND_TSRMG(json_globals_id, zend_json_globals *, v) -# ifdef COMPILE_DL_JSON -ZEND_TSRMLS_CACHE_EXTERN; -# endif -#else -# define JSON_G(v) (json_globals.v) -#endif - -PHP_JSON_API void php_json_encode(smart_str *buf, zval *val, int options); -PHP_JSON_API void php_json_decode_ex(zval *return_value, char *str, size_t str_len, zend_long options, zend_long depth); extern PHP_JSON_API zend_class_entry *php_json_serializable_ce; +/* error codes */ +typedef enum { + PHP_JSON_ERROR_NONE = 0, + PHP_JSON_ERROR_DEPTH, + PHP_JSON_ERROR_STATE_MISMATCH, + PHP_JSON_ERROR_CTRL_CHAR, + PHP_JSON_ERROR_SYNTAX, + PHP_JSON_ERROR_UTF8, + PHP_JSON_ERROR_RECURSION, + PHP_JSON_ERROR_INF_OR_NAN, + PHP_JSON_ERROR_UNSUPPORTED_TYPE, + PHP_JSON_ERROR_UTF16 +} php_json_error_code; /* json_encode() options */ #define PHP_JSON_HEX_TAG (1<<0) @@ -74,10 +71,32 @@ extern PHP_JSON_API zend_class_entry *php_json_serializable_ce; #define PHP_JSON_OUTPUT_ARRAY 0 #define PHP_JSON_OUTPUT_OBJECT 1 +/* default depth */ +#define PHP_JSON_PARSER_DEFAULT_DEPTH 512 + +ZEND_BEGIN_MODULE_GLOBALS(json) + int encoder_depth; + int encode_max_depth; + php_json_error_code error_code; +ZEND_END_MODULE_GLOBALS(json) + + +#ifdef ZTS +# define JSON_G(v) ZEND_TSRMG(json_globals_id, zend_json_globals *, v) +# ifdef COMPILE_DL_JSON +ZEND_TSRMLS_CACHE_EXTERN; +# endif +#else +# define JSON_G(v) (json_globals.v) +#endif + /* json_decode() options */ #define PHP_JSON_OBJECT_AS_ARRAY (1<<0) #define PHP_JSON_BIGINT_AS_STRING (1<<1) +PHP_JSON_API void php_json_encode(smart_str *buf, zval *val, int options); +PHP_JSON_API void php_json_decode_ex(zval *return_value, char *str, size_t str_len, zend_long options, zend_long depth); + static inline void php_json_decode(zval *return_value, char *str, int str_len, zend_bool assoc, zend_long depth) { php_json_decode_ex(return_value, str, str_len, assoc ? PHP_JSON_OBJECT_AS_ARRAY : 0, depth); diff --git a/ext/json/php_json_encoder.h b/ext/json/php_json_encoder.h new file mode 100644 index 0000000000000..357994e512c0c --- /dev/null +++ b/ext/json/php_json_encoder.h @@ -0,0 +1,27 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2015 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Jakub Zelenka | + +----------------------------------------------------------------------+ +*/ + +#ifndef PHP_JSON_ENCODER_H +#define PHP_JSON_ENCODER_H + +#include "php.h" +#include "zend_smart_str.h" + +void php_json_encode_zval(smart_str *buf, zval *val, int options); + +#endif /* PHP_JSON_ENCODER_H */ diff --git a/ext/json/php_json_parser.h b/ext/json/php_json_parser.h new file mode 100644 index 0000000000000..ec20950aa33dd --- /dev/null +++ b/ext/json/php_json_parser.h @@ -0,0 +1,39 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2015 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Jakub Zelenka | + +----------------------------------------------------------------------+ +*/ + +#ifndef PHP_JSON_PARSER_H +#define PHP_JSON_PARSER_H + +#include "php.h" +#include "php_json_scanner.h" + +typedef struct _php_json_parser { + php_json_scanner scanner; + zval *return_value; + long depth; + long max_depth; +} php_json_parser; + +void php_json_parser_init(php_json_parser *parser, zval *return_value, char *str, int str_len, long options, long max_depth); + +php_json_error_code php_json_parser_error_code(php_json_parser *parser); + +int php_json_yyparse(php_json_parser *parser); + +#endif /* PHP_JSON_PARSER_H */ + diff --git a/ext/json/php_json_scanner.h b/ext/json/php_json_scanner.h new file mode 100644 index 0000000000000..4b031a4f12dba --- /dev/null +++ b/ext/json/php_json_scanner.h @@ -0,0 +1,47 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2015 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Jakub Zelenka | + +----------------------------------------------------------------------+ +*/ + +#ifndef PHP_JSON_SCANNER_H +#define PHP_JSON_SCANNER_H + +#include "php.h" +#include "php_json.h" + +typedef unsigned char php_json_ctype; + +typedef struct _php_json_scanner { + php_json_ctype *cursor; /* cursor position */ + php_json_ctype *token; /* token position */ + php_json_ctype *limit; /* the last read character + 1 position */ + php_json_ctype *marker; /* marker position for backtracking */ + php_json_ctype *ctxmarker; /* marker position for context backtracking */ + php_json_ctype *str_start; /* start position of the string */ + php_json_ctype *pstr; /* string pointer for escapes conversion */ + int str_esc; /* number of extra characters for escaping */ + int state; /* condition state */ + zval value; /* value */ + long options; /* options */ + php_json_error_code errcode; /* error type if there is an error */ +} php_json_scanner; + + +void php_json_scanner_init(php_json_scanner *scanner, char *str, int str_len, long options); +int php_json_scan(php_json_scanner *s); + +#endif /* PHP_JSON_SCANNER_H */ + diff --git a/ext/json/php_json_scanner_defs.h b/ext/json/php_json_scanner_defs.h new file mode 100644 index 0000000000000..e1a5824dd3c6c --- /dev/null +++ b/ext/json/php_json_scanner_defs.h @@ -0,0 +1,7 @@ +/* Generated by re2c 0.13.6 */ + +enum YYCONDTYPE { + yycJS, + yycSTR_P1, + yycSTR_P2, +}; diff --git a/ext/json/tests/bug54484.phpt b/ext/json/tests/bug54484.phpt index e56d8bd86be92..897c655afe44e 100644 --- a/ext/json/tests/bug54484.phpt +++ b/ext/json/tests/bug54484.phpt @@ -15,7 +15,7 @@ json_decode("invalid json"); var_dump(json_last_error()); -json_decode("\001 invalid json"); +json_decode("\"\001 invalid json\""); var_dump(json_last_error()); diff --git a/ext/json/tests/pass003.phpt b/ext/json/tests/pass003.phpt index 36da4a1de6a11..e82bf5cb93d67 100644 --- a/ext/json/tests/pass003.phpt +++ b/ext/json/tests/pass003.phpt @@ -39,7 +39,7 @@ $arr = json_decode($arr_enc, true); var_dump($arr); ?> ---EXPECT-- +--EXPECTF-- Testing: { "JSON Test Pattern pass3": { @@ -49,9 +49,9 @@ Testing: } DECODE: AS OBJECT -object(stdClass)#1 (1) { +object(stdClass)#%d (1) { ["JSON Test Pattern pass3"]=> - object(stdClass)#2 (2) { + object(stdClass)#%d (2) { ["The outermost value"]=> string(27) "must be an object or array." ["In this test"]=> @@ -73,9 +73,9 @@ ENCODE: FROM OBJECT ENCODE: FROM ARRAY {"JSON Test Pattern pass3":{"The outermost value":"must be an object or array.","In this test":"It is an object."}} DECODE AGAIN: AS OBJECT -object(stdClass)#3 (1) { +object(stdClass)#%d (1) { ["JSON Test Pattern pass3"]=> - object(stdClass)#4 (2) { + object(stdClass)#%d (2) { ["The outermost value"]=> string(27) "must be an object or array." ["In this test"]=> diff --git a/ext/json/utf8_decode.c b/ext/json/utf8_decode.c deleted file mode 100644 index f704b1bf0791b..0000000000000 --- a/ext/json/utf8_decode.c +++ /dev/null @@ -1,179 +0,0 @@ -/* utf8_decode.c */ - -/* 2005-12-25 */ - -/* -Copyright (c) 2005 JSON.org - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -The Software shall be used for Good, not Evil. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ - -#include "utf8_decode.h" - -/* - Very Strict UTF-8 Decoder - - UTF-8 is a multibyte character encoding of Unicode. A character can be - represented by 1-4 bytes. The bit pattern of the first byte indicates the - number of continuation bytes. - - Most UTF-8 decoders tend to be lenient, attempting to recover as much - information as possible, even from badly encoded input. This UTF-8 - decoder is not lenient. It will reject input which does not include - proper continuation bytes. It will reject aliases (or suboptimal - codings). It will reject surrogates. (Surrogate encoding should only be - used with UTF-16.) - - Code Contination Minimum Maximum - 0xxxxxxx 0 0 127 - 10xxxxxx error - 110xxxxx 1 128 2047 - 1110xxxx 2 2048 65535 excluding 55296 - 57343 - 11110xxx 3 65536 1114111 - 11111xxx error -*/ - - -/* - Get the next byte. It returns UTF8_END if there are no more bytes. -*/ -static int -get(json_utf8_decode *utf8) -{ - int c; - if (utf8->the_index >= utf8->the_length) { - return UTF8_END; - } - c = utf8->the_input[utf8->the_index] & 0xFF; - utf8->the_index += 1; - return c; -} - - -/* - Get the 6-bit payload of the next continuation byte. - Return UTF8_ERROR if it is not a contination byte. -*/ -static int -cont(json_utf8_decode *utf8) -{ - int c = get(utf8); - return ((c & 0xC0) == 0x80) ? (c & 0x3F) : UTF8_ERROR; -} - - -/* - Initialize the UTF-8 decoder. The decoder is not reentrant, -*/ -void -utf8_decode_init(json_utf8_decode *utf8, char p[], int length) -{ - utf8->the_index = 0; - utf8->the_input = p; - utf8->the_length = length; - utf8->the_char = 0; - utf8->the_byte = 0; -} - - -/* - Get the current byte offset. This is generally used in error reporting. -*/ -int -utf8_decode_at_byte(json_utf8_decode *utf8) -{ - return utf8->the_byte; -} - - -/* - Get the current character offset. This is generally used in error reporting. - The character offset matches the byte offset if the text is strictly ASCII. -*/ -int -utf8_decode_at_character(json_utf8_decode *utf8) -{ - return utf8->the_char > 0 ? utf8->the_char - 1 : 0; -} - - -/* - Extract the next character. - Returns: the character (between 0 and 1114111) - or UTF8_END (the end) - or UTF8_ERROR (error) -*/ -int -utf8_decode_next(json_utf8_decode *utf8) -{ - int c; /* the first byte of the character */ - int r; /* the result */ - - if (utf8->the_index >= utf8->the_length) { - return utf8->the_index == utf8->the_length ? UTF8_END : UTF8_ERROR; - } - utf8->the_byte = utf8->the_index; - utf8->the_char += 1; - c = get(utf8); -/* - Zero continuation (0 to 127) -*/ - if ((c & 0x80) == 0) { - return c; - } -/* - One contination (128 to 2047) -*/ - if ((c & 0xE0) == 0xC0) { - int c1 = cont(utf8); - if (c1 < 0) { - return UTF8_ERROR; - } - r = ((c & 0x1F) << 6) | c1; - return r >= 128 ? r : UTF8_ERROR; - } -/* - Two continuation (2048 to 55295 and 57344 to 65535) -*/ - if ((c & 0xF0) == 0xE0) { - int c1 = cont(utf8); - int c2 = cont(utf8); - if (c1 < 0 || c2 < 0) { - return UTF8_ERROR; - } - r = ((c & 0x0F) << 12) | (c1 << 6) | c2; - return r >= 2048 && (r < 55296 || r > 57343) ? r : UTF8_ERROR; - } -/* - Three continuation (65536 to 1114111) -*/ - if ((c & 0xF8) == 0xF0) { - int c1 = cont(utf8); - int c2 = cont(utf8); - int c3 = cont(utf8); - if (c1 < 0 || c2 < 0 || c3 < 0) { - return UTF8_ERROR; - } - r = ((c & 0x0F) << 18) | (c1 << 12) | (c2 << 6) | c3; - return r >= 65536 && r <= 1114111 ? r : UTF8_ERROR; - } - return UTF8_ERROR; -} diff --git a/ext/json/utf8_decode.h b/ext/json/utf8_decode.h deleted file mode 100644 index 0908edd2d49c3..0000000000000 --- a/ext/json/utf8_decode.h +++ /dev/null @@ -1,18 +0,0 @@ -/* utf8_decode.h */ - -#define UTF8_END -1 -#define UTF8_ERROR -2 - -typedef struct json_utf8_decode -{ - char *the_input; - int the_index; - int the_length; - int the_char; - int the_byte; -} json_utf8_decode; - -extern int utf8_decode_at_byte(json_utf8_decode *utf8); -extern int utf8_decode_at_character(json_utf8_decode *utf8); -extern void utf8_decode_init(json_utf8_decode *utf8, char p[], int length); -extern int utf8_decode_next(json_utf8_decode *utf8);