Permalink
Browse files

Fix lexing of nested heredoc strings in token_get_all()

This fixes bug #60097.

Before two global variables CG(heredoc) and CG(heredoc_len) were used to
track the current heredoc label. In order to support nested heredoc
strings the *previous* heredoc label was assigned as the token value of
T_START_HEREDOC and the language_parser.y assigned that to CG(heredoc).

This created a dependency of the lexer on the parser. Thus the
token_get_all() function, which accesses the lexer directly without
also running the parser, was not able to tokenize nested heredoc strings
(and leaked memory). Same applies for the source-code highlighting
functions.

The new approach is to maintain a heredoc_label_stack in the lexer, which
contains all active heredoc labels.

As it is no longer required, T_START_HEREDOC and T_END_HEREDOC now don't
carry a token value anymore.

In order to make the work with zend_ptr_stack in this context more
convenient I added a new function zend_ptr_stack_top(), which retrieves the
top element of the stack (similar to zend_stack_top()).
  • Loading branch information...
1 parent 15a98ec commit 4cf90e06c9834a52195384da760503ea055c726d @nikic nikic committed Mar 30, 2012
View
3 NEWS
@@ -41,4 +41,7 @@ PHP NEWS
- pgsql
. Added pg_escape_literal() and pg_escape_identifier() (Yasuo)
+- Tokenizer:
+ . Fixed bug #60097 (token_get_all fails to lex nested heredoc). (Nikita Popov)
+
<<< NOTE: Insert NEWS from last stable release here prior to actual release! >>>
View
@@ -6708,9 +6708,6 @@ int zendlex(znode *zendlval TSRMLS_DC) /* {{{ */
case T_OPEN_TAG_WITH_ECHO:
retval = T_ECHO;
break;
- case T_END_HEREDOC:
- efree(Z_STRVAL(zendlval->u.constant));
- break;
}
INIT_PZVAL(&zendlval->u.constant);
View
@@ -89,9 +89,6 @@ struct _zend_compiler_globals {
int zend_lineno;
- char *heredoc;
- int heredoc_len;
-
zend_op_array *active_op_array;
HashTable *function_table; /* function symbol table */
@@ -297,6 +294,7 @@ struct _zend_php_scanner_globals {
unsigned char *yy_limit;
int yy_state;
zend_stack state_stack;
+ zend_ptr_stack heredoc_label_stack;
/* original (unfiltered) script */
unsigned char *script_org;
View
@@ -153,8 +153,6 @@ ZEND_API void zend_highlight(zend_syntax_highlighter_ini *syntax_highlighter_ini
efree(token.value.str.val);
break;
}
- } else if (token_type == T_END_HEREDOC) {
- efree(token.value.str.val);
}
token.type = 0;
}
@@ -911,8 +911,8 @@ common_scalar:
| T_METHOD_C { $$ = $1; }
| T_FUNC_C { $$ = $1; }
| T_NS_C { $$ = $1; }
- | T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC { $$ = $2; CG(heredoc) = Z_STRVAL($1.u.constant); CG(heredoc_len) = Z_STRLEN($1.u.constant); }
- | T_START_HEREDOC T_END_HEREDOC { ZVAL_EMPTY_STRING(&$$.u.constant); INIT_PZVAL(&$$.u.constant); $$.op_type = IS_CONST; CG(heredoc) = Z_STRVAL($1.u.constant); CG(heredoc_len) = Z_STRLEN($1.u.constant); }
+ | T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC { $$ = $2; }
+ | T_START_HEREDOC T_END_HEREDOC { ZVAL_EMPTY_STRING(&$$.u.constant); INIT_PZVAL(&$$.u.constant); $$.op_type = IS_CONST; }
;
@@ -941,7 +941,7 @@ scalar:
| T_NS_SEPARATOR namespace_name { char *tmp = estrndup(Z_STRVAL($2.u.constant), Z_STRLEN($2.u.constant)+1); memcpy(&(tmp[1]), Z_STRVAL($2.u.constant), Z_STRLEN($2.u.constant)+1); tmp[0] = '\\'; efree(Z_STRVAL($2.u.constant)); Z_STRVAL($2.u.constant) = tmp; ++Z_STRLEN($2.u.constant); zend_do_fetch_constant(&$$, NULL, &$2, ZEND_RT, 0 TSRMLS_CC); }
| common_scalar { $$ = $1; }
| '"' encaps_list '"' { $$ = $2; }
- | T_START_HEREDOC encaps_list T_END_HEREDOC { $$ = $2; CG(heredoc) = Z_STRVAL($1.u.constant); CG(heredoc_len) = Z_STRLEN($1.u.constant); }
+ | T_START_HEREDOC encaps_list T_END_HEREDOC { $$ = $2; }
| T_CLASS_C { if (Z_TYPE($1.u.constant) == IS_CONSTANT) {zend_do_fetch_constant(&$$, NULL, &$1, ZEND_RT, 1 TSRMLS_CC);} else {$$ = $1;} }
;
Oops, something went wrong.

1 comment on commit 4cf90e0

THUMBS UP

Please sign in to comment.