From a54e1237ec7d51e063f8bbf7c94241245573dd9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Sat, 7 Mar 2015 02:29:16 -0300 Subject: [PATCH 1/5] add tests for semi reserved words and remove obsolete ones --- Zend/tests/grammar/regression_001.phpt | 33 ++++ Zend/tests/grammar/regression_002.phpt | 22 +++ Zend/tests/grammar/regression_003.phpt | 12 ++ Zend/tests/grammar/regression_004.phpt | 15 ++ Zend/tests/grammar/regression_005.phpt | 14 ++ Zend/tests/grammar/regression_006.phpt | 30 ++++ Zend/tests/grammar/regression_007.phpt | 44 +++++ Zend/tests/grammar/regression_008.phpt | 21 +++ Zend/tests/grammar/regression_009.phpt | 18 ++ Zend/tests/grammar/regression_010.phpt | 14 ++ Zend/tests/grammar/regression_011.phpt | 18 ++ Zend/tests/grammar/regression_012.phpt | 13 ++ Zend/tests/grammar/regression_013.phpt | 13 ++ Zend/tests/grammar/semi_reserved_001.phpt | 188 +++++++++++++++++++ Zend/tests/grammar/semi_reserved_002.phpt | 186 +++++++++++++++++++ Zend/tests/grammar/semi_reserved_003.phpt | 210 ++++++++++++++++++++++ Zend/tests/grammar/semi_reserved_004.phpt | 210 ++++++++++++++++++++++ Zend/tests/grammar/semi_reserved_005.phpt | 189 +++++++++++++++++++ Zend/tests/grammar/semi_reserved_006.phpt | 80 +++++++++ Zend/tests/grammar/semi_reserved_007.phpt | 37 ++++ Zend/tests/grammar/semi_reserved_008.phpt | 68 +++++++ Zend/tests/grammar/semi_reserved_009.phpt | 25 +++ Zend/tests/grammar/semi_reserved_010.phpt | 31 ++++ tests/basic/bug51709_1.phpt | 16 -- tests/basic/bug51709_2.phpt | 16 -- 25 files changed, 1491 insertions(+), 32 deletions(-) create mode 100644 Zend/tests/grammar/regression_001.phpt create mode 100644 Zend/tests/grammar/regression_002.phpt create mode 100644 Zend/tests/grammar/regression_003.phpt create mode 100644 Zend/tests/grammar/regression_004.phpt create mode 100644 Zend/tests/grammar/regression_005.phpt create mode 100644 Zend/tests/grammar/regression_006.phpt create mode 100644 Zend/tests/grammar/regression_007.phpt create mode 100644 Zend/tests/grammar/regression_008.phpt create mode 100644 Zend/tests/grammar/regression_009.phpt create mode 100644 Zend/tests/grammar/regression_010.phpt create mode 100644 Zend/tests/grammar/regression_011.phpt create mode 100644 Zend/tests/grammar/regression_012.phpt create mode 100644 Zend/tests/grammar/regression_013.phpt create mode 100644 Zend/tests/grammar/semi_reserved_001.phpt create mode 100644 Zend/tests/grammar/semi_reserved_002.phpt create mode 100644 Zend/tests/grammar/semi_reserved_003.phpt create mode 100644 Zend/tests/grammar/semi_reserved_004.phpt create mode 100644 Zend/tests/grammar/semi_reserved_005.phpt create mode 100644 Zend/tests/grammar/semi_reserved_006.phpt create mode 100644 Zend/tests/grammar/semi_reserved_007.phpt create mode 100644 Zend/tests/grammar/semi_reserved_008.phpt create mode 100644 Zend/tests/grammar/semi_reserved_009.phpt create mode 100644 Zend/tests/grammar/semi_reserved_010.phpt delete mode 100644 tests/basic/bug51709_1.phpt delete mode 100644 tests/basic/bug51709_2.phpt diff --git a/Zend/tests/grammar/regression_001.phpt b/Zend/tests/grammar/regression_001.phpt new file mode 100644 index 0000000000000..73d5eacdf6e2c --- /dev/null +++ b/Zend/tests/grammar/regression_001.phpt @@ -0,0 +1,33 @@ +--TEST-- +Test to check static method calls syntax regression +--FILE-- +self()::new()->self()->self()::use +); + +Foo::{'new'}(); + +var_dump(Foo::use); + +echo "\nDone\n"; + +--EXPECTF-- +Foo::new +Foo::new +Foo::new +Foo::self +Foo::new +Foo::self +Foo::self +string(3) "yay" +Foo::new +string(3) "yay" + +Done diff --git a/Zend/tests/grammar/regression_008.phpt b/Zend/tests/grammar/regression_008.phpt new file mode 100644 index 0000000000000..7741ed036ca76 --- /dev/null +++ b/Zend/tests/grammar/regression_008.phpt @@ -0,0 +1,21 @@ +--TEST-- +Test to check regressions on string interpolation with class members access +--FILE-- +require ($friday->require) {$friday->require}", PHP_EOL; + +echo "\nDone\n"; + + +--EXPECTF-- + +fun (fun) fun + +Done diff --git a/Zend/tests/grammar/regression_009.phpt b/Zend/tests/grammar/regression_009.phpt new file mode 100644 index 0000000000000..589d90316b251 --- /dev/null +++ b/Zend/tests/grammar/regression_009.phpt @@ -0,0 +1,18 @@ +--TEST-- +Test to check regressions on use statements and lexer state +--FILE-- + +--EXPECTF-- +object(foo\bar)#%d (0) { +} diff --git a/Zend/tests/grammar/regression_012.phpt b/Zend/tests/grammar/regression_012.phpt new file mode 100644 index 0000000000000..3b4925afa6f0d --- /dev/null +++ b/Zend/tests/grammar/regression_012.phpt @@ -0,0 +1,13 @@ +--TEST-- +Testing for regression on const list syntax and arrays +--FILE-- + +--EXPECTF-- + +Parse error: syntax error, unexpected 'FOREACH' (T_FOREACH), expecting ']' in %s on line %d diff --git a/Zend/tests/grammar/regression_013.phpt b/Zend/tests/grammar/regression_013.phpt new file mode 100644 index 0000000000000..1c60ffc273ba4 --- /dev/null +++ b/Zend/tests/grammar/regression_013.phpt @@ -0,0 +1,13 @@ +--TEST-- +Testing for regression with encapsed variables in class declaration context +--FILE-- + +--EXPECTF-- + +Done diff --git a/Zend/tests/grammar/semi_reserved_001.phpt b/Zend/tests/grammar/semi_reserved_001.phpt new file mode 100644 index 0000000000000..06b2532fb6735 --- /dev/null +++ b/Zend/tests/grammar/semi_reserved_001.phpt @@ -0,0 +1,188 @@ +--TEST-- +Test semi-reserved words as class methods +--FILE-- +empty(); +$obj->callable(); +$obj->trait(); +$obj->extends(); +$obj->implements(); +$obj->const(); +$obj->enddeclare(); +$obj->endfor(); +$obj->endforeach(); +$obj->endif(); +$obj->endwhile(); +$obj->and(); +$obj->global(); +$obj->goto(); +$obj->instanceof(); +$obj->insteadof(); +$obj->interface(); +$obj->new(); +$obj->or(); +$obj->xor(); +$obj->try(); +$obj->use(); +$obj->var(); +$obj->exit(); +$obj->list(); +$obj->clone(); +$obj->include(); +$obj->include_once(); +$obj->throw(); +$obj->array(); +$obj->print(); +$obj->echo(); +$obj->require(); +$obj->require_once(); +$obj->return(); +$obj->else(); +$obj->elseif(); +$obj->default(); +$obj->break(); +$obj->continue(); +$obj->switch(); +$obj->yield(); +$obj->function(); +$obj->if(); +$obj->endswitch(); +$obj->finally(); +$obj->for(); +$obj->foreach(); +$obj->declare(); +$obj->case(); +$obj->do(); +$obj->while(); +$obj->as(); +$obj->catch(); +$obj->die(); +$obj->self(); +$obj->parent(); + +echo "\nDone\n"; + +--EXPECTF-- +Obj::empty +Obj::callable +Obj::trait +Obj::extends +Obj::implements +Obj::const +Obj::enddeclare +Obj::endfor +Obj::endforeach +Obj::endif +Obj::endwhile +Obj::and +Obj::global +Obj::goto +Obj::instanceof +Obj::insteadof +Obj::interface +Obj::new +Obj::or +Obj::xor +Obj::try +Obj::use +Obj::var +Obj::exit +Obj::list +Obj::clone +Obj::include +Obj::include_once +Obj::throw +Obj::array +Obj::print +Obj::echo +Obj::require +Obj::require_once +Obj::return +Obj::else +Obj::elseif +Obj::default +Obj::break +Obj::continue +Obj::switch +Obj::yield +Obj::function +Obj::if +Obj::endswitch +Obj::finally +Obj::for +Obj::foreach +Obj::declare +Obj::case +Obj::do +Obj::while +Obj::as +Obj::catch +Obj::die +Obj::self +Obj::parent + +Done diff --git a/Zend/tests/grammar/semi_reserved_002.phpt b/Zend/tests/grammar/semi_reserved_002.phpt new file mode 100644 index 0000000000000..e4c49cd7a80b9 --- /dev/null +++ b/Zend/tests/grammar/semi_reserved_002.phpt @@ -0,0 +1,186 @@ +--TEST-- +Test semi-reserved words as static class methods +--FILE-- +empty, PHP_EOL; +echo $obj->callable, PHP_EOL; +echo $obj->class, PHP_EOL; +echo $obj->trait, PHP_EOL; +echo $obj->extends, PHP_EOL; +echo $obj->implements, PHP_EOL; +echo $obj->static, PHP_EOL; +echo $obj->abstract, PHP_EOL; +echo $obj->final, PHP_EOL; +echo $obj->public, PHP_EOL; +echo $obj->protected, PHP_EOL; +echo $obj->private, PHP_EOL; +echo $obj->const, PHP_EOL; +echo $obj->enddeclare, PHP_EOL; +echo $obj->endfor, PHP_EOL; +echo $obj->endforeach, PHP_EOL; +echo $obj->endif, PHP_EOL; +echo $obj->endwhile, PHP_EOL; +echo $obj->and, PHP_EOL; +echo $obj->global, PHP_EOL; +echo $obj->goto, PHP_EOL; +echo $obj->instanceof, PHP_EOL; +echo $obj->insteadof, PHP_EOL; +echo $obj->interface, PHP_EOL; +echo $obj->namespace, PHP_EOL; +echo $obj->new, PHP_EOL; +echo $obj->or, PHP_EOL; +echo $obj->xor, PHP_EOL; +echo $obj->try, PHP_EOL; +echo $obj->use, PHP_EOL; +echo $obj->var, PHP_EOL; +echo $obj->exit, PHP_EOL; +echo $obj->list, PHP_EOL; +echo $obj->clone, PHP_EOL; +echo $obj->include, PHP_EOL; +echo $obj->include_once, PHP_EOL; +echo $obj->throw, PHP_EOL; +echo $obj->array, PHP_EOL; +echo $obj->print, PHP_EOL; +echo $obj->echo, PHP_EOL; +echo $obj->require, PHP_EOL; +echo $obj->require_once, PHP_EOL; +echo $obj->return, PHP_EOL; +echo $obj->else, PHP_EOL; +echo $obj->elseif, PHP_EOL; +echo $obj->default, PHP_EOL; +echo $obj->break, PHP_EOL; +echo $obj->continue, PHP_EOL; +echo $obj->switch, PHP_EOL; +echo $obj->yield, PHP_EOL; +echo $obj->function, PHP_EOL; +echo $obj->if, PHP_EOL; +echo $obj->endswitch, PHP_EOL; +echo $obj->finally, PHP_EOL; +echo $obj->for, PHP_EOL; +echo $obj->foreach, PHP_EOL; +echo $obj->declare, PHP_EOL; +echo $obj->case, PHP_EOL; +echo $obj->do, PHP_EOL; +echo $obj->while, PHP_EOL; +echo $obj->as, PHP_EOL; +echo $obj->catch, PHP_EOL; +echo $obj->die, PHP_EOL; +echo $obj->self, PHP_EOL; + +echo "\nDone\n"; + +?> +--EXPECTF-- +empty +callable +class +trait +extends +implements +static +abstract +final +public +protected +private +const +enddeclare +endfor +endforeach +endif +endwhile +and +global +goto +instanceof +insteadof +interface +namespace +new +or +xor +try +use +var +exit +list +clone +include +include_once +throw +array +print +echo +require +require_once +return +else +elseif +default +break +continue +switch +yield +function +if +endswitch +finally +for +foreach +declare +case +do +while +as +catch +die +self + +Done diff --git a/Zend/tests/grammar/semi_reserved_004.phpt b/Zend/tests/grammar/semi_reserved_004.phpt new file mode 100644 index 0000000000000..40c5df14ef0a2 --- /dev/null +++ b/Zend/tests/grammar/semi_reserved_004.phpt @@ -0,0 +1,210 @@ +--TEST-- +Test semi-reserved words as static class properties +--FILE-- + ['b' => ['c']]]; + + public static function catch(){ echo __METHOD__, PHP_EOL; } + private static function throw(){ echo __METHOD__, PHP_EOL; } + private static function self(){ echo __METHOD__, PHP_EOL; } +} + +trait TraitC +{ + public static function exit(){ echo __METHOD__, PHP_EOL; } + protected static function try(){ echo __METHOD__, PHP_EOL; } +} + +class Foo +{ + use TraitA, TraitB { + TraitA + :: + catch insteadof namespace\TraitB; + TraitA::list as public foreach; + TraitB::throw as public; + TraitB::self as public; + } + + use TraitC { + try as public attempt; + exit as die; + \TraitC::exit as bye; + namespace\TraitC::exit as byebye; + TraitC + :: + exit as farewell; + } +} + +(new Foo)->catch(); +(new Foo)->foreach(); +Foo::throw(); +Foo::self(); +var_dump(Foo::$list['a']); +Foo::attempt(); +Foo::die(); +Foo::bye(); +Foo::byebye(); +Foo::farewell(); + +echo "\nDone\n"; + +--EXPECTF-- +TraitA::catch +TraitA::list +TraitB::throw +TraitB::self +array(1) { + ["b"]=> + array(1) { + [0]=> + string(1) "c" + } +} +TraitC::try +TraitC::exit +TraitC::exit +TraitC::exit +TraitC::exit + +Done diff --git a/Zend/tests/grammar/semi_reserved_007.phpt b/Zend/tests/grammar/semi_reserved_007.phpt new file mode 100644 index 0000000000000..5105629cbea1c --- /dev/null +++ b/Zend/tests/grammar/semi_reserved_007.phpt @@ -0,0 +1,37 @@ +--TEST-- +Edge case: self::self, self::parent, parent::self semi reserved constants access +--FILE-- + as T_STRING +--FILE-- + insteadof T_STRING +--FILE-- + -===DONE=== - ---EXPECTF-- -Parse error: syntax error, unexpected %s, expecting %s in %sbug51709_1.php on line %d diff --git a/tests/basic/bug51709_2.phpt b/tests/basic/bug51709_2.phpt deleted file mode 100644 index bb1f91cc4c7e2..0000000000000 --- a/tests/basic/bug51709_2.phpt +++ /dev/null @@ -1,16 +0,0 @@ ---TEST-- -Bug #51709 (Can't use keywords as method names) ---FILE-- - -===DONE=== - ---EXPECTF-- -Parse error: syntax error, unexpected %s, expecting %s in %sbug51709_2.php on line %d From a75decd452caea67a8afc175510ea5cf5c4c3aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Mon, 9 Mar 2015 02:24:50 -0300 Subject: [PATCH 2/5] implement context sensitive language with lexical feedback The implementation has no regression risks, has an even smaller footprint compared to the previous attempt involving a pure lexical approach, is higly predictable and higly configurable. To turn a word semi-reserved you only need to edit the "SEMI_RESERVED" parser rule, it's an inclusive list of all the words that should be matched as T_STRING on specific contexts. Example: ``` method_modifiers function returns_ref indentifier '(' parameter_list ')' ... ``` instead of: ``` method_modifiers function returns_ref T_STRING '(' parameter_list ')' ... ``` TODO: port ext tokenizer --- Zend/zend_language_parser.y | 62 ++++++++++++++++++++++++++++-------- Zend/zend_language_scanner.l | 18 +++++++++-- 2 files changed, 63 insertions(+), 17 deletions(-) diff --git a/Zend/zend_language_parser.y b/Zend/zend_language_parser.y index 2541c9f5713ea..cefcd0cad928e 100644 --- a/Zend/zend_language_parser.y +++ b/Zend/zend_language_parser.y @@ -35,6 +35,7 @@ #include "zend_globals.h" #include "zend_API.h" #include "zend_constants.h" +#include "zend_language_scanner_defs.h" #define YYSIZE_T size_t #define yytnamerr zend_yytnamerr @@ -48,6 +49,12 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); #define YYFREE free #endif +#define REWIND { \ + zend_stack_push(&LANG_SCNG(state_stack), (void *) &LANG_SCNG(yy_state)); \ + LANG_SCNG(yy_state) = yycST_LOOKING_FOR_SEMI_RESERVED_NAME; \ + LANG_SCNG(yy_cursor) = (unsigned char*)LANG_SCNG(yy_text); \ + LANG_SCNG(yy_leng) = 0; } + %} %pure_parser @@ -243,7 +250,7 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %type absolute_trait_method_reference trait_method_reference property echo_expr %type new_expr anonymous_class class_name class_name_reference simple_variable %type internal_functions_in_yacc -%type exit_expr scalar backticks_expr lexical_var function_call member_name +%type exit_expr scalar backticks_expr lexical_var function_call member_name property_name %type variable_class_name dereferencable_scalar class_name_scalar constant dereferencable %type callable_expr callable_variable static_member new_variable %type assignment_list_element array_pair encaps_var encaps_var_offset isset_variables @@ -252,10 +259,11 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %type echo_expr_list unset_variables catch_list parameter_list class_statement_list %type implements_list case_list if_stmt_without_else %type non_empty_parameter_list argument_list non_empty_argument_list property_list -%type class_const_list name_list trait_adaptations method_body non_empty_for_exprs +%type class_const_list class_const_decl name_list trait_adaptations method_body non_empty_for_exprs %type ctor_arguments alt_if_stmt_without_else trait_adaptation_list lexical_vars %type lexical_var_list encaps_list array_pair_list non_empty_array_pair_list %type assignment_list isset_variable type return_type +%type identifier %type returns_ref function is_reference is_variadic variable_modifiers %type method_modifiers trait_modifiers non_empty_member_modifiers member_modifier @@ -269,6 +277,22 @@ start: top_statement_list { CG(ast) = $1; } ; +semi_reserved: + T_INCLUDE | T_INCLUDE_ONCE | T_EVAL | T_REQUIRE | T_REQUIRE_ONCE | T_LOGICAL_OR | T_LOGICAL_XOR | T_LOGICAL_AND + | T_INSTANCEOF | T_NEW | T_CLONE | T_EXIT | T_IF | T_ELSEIF | T_ELSE | T_ENDIF | T_ECHO | T_DO | T_WHILE | T_ENDWHILE + | T_FOR | T_ENDFOR | T_FOREACH | T_ENDFOREACH | T_DECLARE | T_ENDDECLARE | T_AS | T_TRY | T_CATCH | T_FINALLY + | T_THROW | T_USE | T_INSTEADOF | T_GLOBAL | T_VAR | T_UNSET | T_ISSET | T_EMPTY | T_CONTINUE | T_GOTO + | T_FUNCTION | T_CONST | T_RETURN | T_PRINT | T_YIELD | T_LIST | T_SWITCH | T_ENDSWITCH | T_CASE | T_DEFAULT | T_BREAK + | T_ARRAY | T_CALLABLE | T_EXTENDS | T_IMPLEMENTS | T_NAMESPACE | T_TRAIT | T_INTERFACE + // | T_STATIC | T_ABSTRACT | T_FINAL | T_PRIVATE | T_PROTECTED | T_PUBLIC + // | T_CLASS +; + +identifier: + T_STRING { $$ = $1; } + | /* if */ semi_reserved { REWIND } /* and rematch as */ T_STRING { $$ = $3; } +; + top_statement_list: top_statement_list top_statement { $$ = zend_ast_list_add($1, $2); } | /* empty */ { $$ = zend_ast_create_list(0, ZEND_AST_STMT_LIST); } @@ -673,7 +697,7 @@ class_statement: { $$ = $2; RESET_DOC_COMMENT(); } | T_USE name_list trait_adaptations { $$ = zend_ast_create(ZEND_AST_USE_TRAIT, $2, $3); } - | method_modifiers function returns_ref T_STRING '(' parameter_list ')' + | method_modifiers function returns_ref identifier '(' parameter_list ')' return_type backup_doc_comment method_body { $$ = zend_ast_create_decl(ZEND_AST_METHOD, $3 | $1, $2, $9, zend_ast_get_str($4), $6, NULL, $10, $8); } @@ -708,20 +732,20 @@ trait_precedence: ; trait_alias: - trait_method_reference T_AS trait_modifiers T_STRING + trait_method_reference T_AS trait_modifiers identifier { $$ = zend_ast_create_ex(ZEND_AST_TRAIT_ALIAS, $3, $1, $4); } | trait_method_reference T_AS member_modifier { $$ = zend_ast_create_ex(ZEND_AST_TRAIT_ALIAS, $3, $1, NULL); } ; trait_method_reference: - T_STRING + identifier { $$ = zend_ast_create(ZEND_AST_METHOD_REFERENCE, NULL, $1); } | absolute_trait_method_reference { $$ = $1; } ; absolute_trait_method_reference: - name T_PAAMAYIM_NEKUDOTAYIM T_STRING + name T_PAAMAYIM_NEKUDOTAYIM identifier { $$ = zend_ast_create(ZEND_AST_METHOD_REFERENCE, $1, $3); } ; @@ -773,8 +797,12 @@ property: ; class_const_list: - class_const_list ',' const_decl { $$ = zend_ast_list_add($1, $3); } - | const_decl { $$ = zend_ast_create_list(1, ZEND_AST_CLASS_CONST_DECL, $1); } + class_const_list ',' class_const_decl { $$ = zend_ast_list_add($1, $3); } + | class_const_decl { $$ = zend_ast_create_list(1, ZEND_AST_CLASS_CONST_DECL, $1); } +; + +class_const_decl: + identifier '=' expr { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $1, $3); } ; const_decl: @@ -1034,9 +1062,9 @@ scalar: constant: name { $$ = zend_ast_create(ZEND_AST_CONST, $1); } - | class_name T_PAAMAYIM_NEKUDOTAYIM T_STRING + | class_name T_PAAMAYIM_NEKUDOTAYIM identifier { $$ = zend_ast_create(ZEND_AST_CLASS_CONST, $1, $3); } - | variable_class_name T_PAAMAYIM_NEKUDOTAYIM T_STRING + | variable_class_name T_PAAMAYIM_NEKUDOTAYIM identifier { $$ = zend_ast_create(ZEND_AST_CLASS_CONST, $1, $3); } ; @@ -1080,7 +1108,7 @@ callable_variable: { $$ = zend_ast_create(ZEND_AST_DIM, $1, $3); } | dereferencable '{' expr '}' { $$ = zend_ast_create(ZEND_AST_DIM, $1, $3); } - | dereferencable T_OBJECT_OPERATOR member_name argument_list + | dereferencable T_OBJECT_OPERATOR property_name argument_list { $$ = zend_ast_create(ZEND_AST_METHOD_CALL, $1, $3, $4); } | function_call { $$ = $1; } ; @@ -1090,7 +1118,7 @@ variable: { $$ = $1; } | static_member { $$ = $1; } - | dereferencable T_OBJECT_OPERATOR member_name + | dereferencable T_OBJECT_OPERATOR property_name { $$ = zend_ast_create(ZEND_AST_PROP, $1, $3); } ; @@ -1114,7 +1142,7 @@ new_variable: { $$ = zend_ast_create(ZEND_AST_DIM, $1, $3); } | new_variable '{' expr '}' { $$ = zend_ast_create(ZEND_AST_DIM, $1, $3); } - | new_variable T_OBJECT_OPERATOR member_name + | new_variable T_OBJECT_OPERATOR property_name { $$ = zend_ast_create(ZEND_AST_PROP, $1, $3); } | class_name T_PAAMAYIM_NEKUDOTAYIM simple_variable { $$ = zend_ast_create(ZEND_AST_STATIC_PROP, $1, $3); } @@ -1123,7 +1151,13 @@ new_variable: ; member_name: - T_STRING { $$ = $1; } + identifier { $$ = $1; } + | '{' expr '}' { $$ = $2; } + | simple_variable { $$ = zend_ast_create(ZEND_AST_VAR, $1); } +; + +property_name: + T_STRING { $$ = $1; } | '{' expr '}' { $$ = $2; } | simple_variable { $$ = zend_ast_create(ZEND_AST_VAR, $1); } ; diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index fdba4b9f07685..2481af605b7df 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -1271,7 +1271,7 @@ NEWLINE ("\r"|"\n"|"\r\n") return T_OBJECT_OPERATOR; } -{WHITESPACE}+ { +{WHITESPACE}+ { HANDLE_NEWLINES(yytext, yyleng); return T_WHITESPACE; } @@ -1875,7 +1875,7 @@ inline_char_handler: } -"#"|"//" { +"#"|"//" { while (YYCURSOR < YYLIMIT) { switch (*YYCURSOR++) { case '\r': @@ -1904,7 +1904,7 @@ inline_char_handler: return T_COMMENT; } -"/*"|"/**"{WHITESPACE} { +"/*"|"/**"{WHITESPACE} { int doc_com; if (yyleng > 2) { @@ -1937,6 +1937,18 @@ inline_char_handler: return T_COMMENT; } +{LABEL} { + zend_copy_value(zendlval, yytext, yyleng); + yy_pop_state(); + return T_STRING; +} + +{ANY_CHAR} { + yyless(0); + yy_pop_state(); + goto restart; +} + "?>"{NEWLINE}? { BEGIN(INITIAL); return T_CLOSE_TAG; /* implicit ';' at php-end tag */ From 02a9eb4f8c736089808b51d862def0e648383e09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Sat, 21 Mar 2015 18:17:14 -0300 Subject: [PATCH 3/5] fix indentation + remove c++ comments --- ext/tokenizer/tokenizer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c index c0118944418b0..c4b9d14359fd9 100644 --- a/ext/tokenizer/tokenizer.c +++ b/ext/tokenizer/tokenizer.c @@ -104,7 +104,7 @@ static void tokenize(zval *return_value) int token_type; zend_bool destroy; int token_line = 1; - int need_tokens = -1; // for __halt_compiler lexing. -1 = disabled + int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */ array_init(return_value); @@ -147,13 +147,13 @@ static void tokenize(zval *return_value) } ZVAL_NULL(&token); - // after T_HALT_COMPILER collect the next three non-dropped tokens + /* after T_HALT_COMPILER collect the next three non-dropped tokens */ if (need_tokens != -1) { if (token_type != T_WHITESPACE && token_type != T_OPEN_TAG - && token_type != T_COMMENT && token_type != T_DOC_COMMENT - && --need_tokens == 0 + && token_type != T_COMMENT && token_type != T_DOC_COMMENT + && --need_tokens == 0 ) { - // fetch the rest into a T_INLINE_HTML + /* fetch the rest into a T_INLINE_HTML */ if (zendcursor != zendlimit) { array_init(&keyword); add_next_index_long(&keyword, T_INLINE_HTML); From 110759386e2f9b4d88bf68c669b6c54ad4b5c04f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Sun, 5 Apr 2015 08:50:35 -0300 Subject: [PATCH 4/5] ext tokenizer port + cleanup unused lexer states we basically added a mechanism to store the token stream during parsing and exposed the entire parser stack on the tokenizer extension through an opt in flag: token_get_all($src, TOKEN_PARSE). this change allows easy future language enhancements regarding context aware parsing & scanning without further maintance on the tokenizer extension while solves known inconsistencies "parseless" tokenizer extension has when it handles `__halt_compiler()` presence. --- Zend/zend_compile.c | 6 +- Zend/zend_globals.h | 9 + Zend/zend_language_parser.y | 14 +- Zend/zend_language_scanner.h | 4 + Zend/zend_language_scanner.l | 389 +++++++++--------- .../tests/token_get_all_TOKEN_PARSE_000.phpt | 19 + .../tests/token_get_all_TOKEN_PARSE_001.phpt | 81 ++++ .../tests/token_get_all_TOKEN_PARSE_002.phpt | 68 +++ ext/tokenizer/tests/token_get_all_error.phpt | 8 +- ext/tokenizer/tokenizer.c | 132 +++++- 10 files changed, 510 insertions(+), 220 deletions(-) create mode 100644 ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt create mode 100644 ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt create mode 100644 ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index c92a25a705389..210810379f58f 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -30,7 +30,6 @@ #include "zend_interfaces.h" #include "zend_virtual_cwd.h" #include "zend_multibyte.h" -#include "zend_language_scanner.h" #include "zend_inheritance.h" #define SET_NODE(target, src) do { \ @@ -568,7 +567,10 @@ static int zend_add_const_name_literal(zend_op_array *op_array, zend_string *nam op.constant = zend_add_literal(CG(active_op_array), &_c); \ } while (0) -void zend_stop_lexing(void) { +void zend_stop_lexing(void) +{ + if(LANG_SCNG(on_event)) LANG_SCNG(on_event)(ON_STOP, END, 0); + LANG_SCNG(yy_cursor) = LANG_SCNG(yy_limit); } diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index 326955a103b3c..28487a2a4a185 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -249,6 +249,12 @@ struct _zend_ini_scanner_globals { int scanner_mode; }; +typedef enum { + ON_TOKEN, + ON_FEEDBACK, + ON_STOP +} zend_php_scanner_event; + struct _zend_php_scanner_globals { zend_file_handle *yy_in; zend_file_handle *yy_out; @@ -278,6 +284,9 @@ struct _zend_php_scanner_globals { /* initial string length after scanning to first variable */ int scanned_string_len; + + /* hooks */ + void (* on_event)(zend_php_scanner_event event, int token, int line); }; #endif /* ZEND_GLOBALS_H */ diff --git a/Zend/zend_language_parser.y b/Zend/zend_language_parser.y index cefcd0cad928e..f6318ec0c0e33 100644 --- a/Zend/zend_language_parser.y +++ b/Zend/zend_language_parser.y @@ -35,7 +35,7 @@ #include "zend_globals.h" #include "zend_API.h" #include "zend_constants.h" -#include "zend_language_scanner_defs.h" +#include "zend_language_scanner.h" #define YYSIZE_T size_t #define yytnamerr zend_yytnamerr @@ -49,12 +49,6 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); #define YYFREE free #endif -#define REWIND { \ - zend_stack_push(&LANG_SCNG(state_stack), (void *) &LANG_SCNG(yy_state)); \ - LANG_SCNG(yy_state) = yycST_LOOKING_FOR_SEMI_RESERVED_NAME; \ - LANG_SCNG(yy_cursor) = (unsigned char*)LANG_SCNG(yy_text); \ - LANG_SCNG(yy_leng) = 0; } - %} %pure_parser @@ -290,7 +284,11 @@ semi_reserved: identifier: T_STRING { $$ = $1; } - | /* if */ semi_reserved { REWIND } /* and rematch as */ T_STRING { $$ = $3; } + | semi_reserved { + zval zv; + zend_lex_tstring(&zv); + $$ = zend_ast_create_zval(&zv); + } ; top_statement_list: diff --git a/Zend/zend_language_scanner.h b/Zend/zend_language_scanner.h index c82b3069c5906..3b75ff8cc45a0 100644 --- a/Zend/zend_language_scanner.h +++ b/Zend/zend_language_scanner.h @@ -50,6 +50,9 @@ typedef struct _zend_lex_state { zend_encoding_filter output_filter; const zend_encoding *script_encoding; + /* hooks */ + void (* on_event)(zend_php_scanner_event event, int token, int line); + zend_ast *ast; zend_arena *ast_arena; } zend_lex_state; @@ -66,6 +69,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state); ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename); ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding); ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding); +ZEND_API void zend_lex_tstring(zval *zv); END_EXTERN_C() diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index 2481af605b7df..cde0621df0e6c 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -193,6 +193,7 @@ void shutdown_scanner(void) zend_stack_destroy(&SCNG(state_stack)); zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1); zend_ptr_stack_destroy(&SCNG(heredoc_label_stack)); + SCNG(on_event) = NULL; } ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state) @@ -223,6 +224,8 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state) lex_state->output_filter = SCNG(output_filter); lex_state->script_encoding = SCNG(script_encoding); + lex_state->on_event = SCNG(on_event); + lex_state->ast = CG(ast); lex_state->ast_arena = CG(ast_arena); } @@ -260,6 +263,8 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state) SCNG(output_filter) = lex_state->output_filter; SCNG(script_encoding) = lex_state->script_encoding; + SCNG(on_event) = lex_state->on_event; + CG(ast) = lex_state->ast; CG(ast_arena) = lex_state->ast_arena; @@ -276,6 +281,13 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle) } } +ZEND_API void zend_lex_tstring(zval *zv) +{ + if (SCNG(on_event)) SCNG(on_event)(ON_FEEDBACK, T_STRING, 0); + + ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng)); +} + #define BOM_UTF32_BE "\x00\x00\xfe\xff" #define BOM_UTF32_LE "\xff\xfe\x00\x00" #define BOM_UTF16_BE "\xfe\xff" @@ -1083,9 +1095,20 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot return SUCCESS; } +static zend_always_inline int emit_token(int token, int token_line) +{ + if(SCNG(on_event)) SCNG(on_event)(ON_TOKEN, token, token_line); + + return token; +} + +#define RETURN_TOKEN(token) return emit_token(token, start_line); int lex_scan(zval *zendlval) { + +int start_line = CG(zend_lineno); + restart: SCNG(yy_text) = YYCURSOR; @@ -1107,183 +1130,183 @@ NEWLINE ("\r"|"\n"|"\r\n") := yyleng = YYCURSOR - SCNG(yy_text); "exit" { - return T_EXIT; + RETURN_TOKEN(T_EXIT); } "die" { - return T_EXIT; + RETURN_TOKEN(T_EXIT); } "function" { - return T_FUNCTION; + RETURN_TOKEN(T_FUNCTION); } "const" { - return T_CONST; + RETURN_TOKEN(T_CONST); } "return" { - return T_RETURN; + RETURN_TOKEN(T_RETURN); } "yield"{WHITESPACE}"from" { - return T_YIELD_FROM; + RETURN_TOKEN(T_YIELD_FROM); } "yield" { - return T_YIELD; + RETURN_TOKEN(T_YIELD); } "try" { - return T_TRY; + RETURN_TOKEN(T_TRY); } "catch" { - return T_CATCH; + RETURN_TOKEN(T_CATCH); } "finally" { - return T_FINALLY; + RETURN_TOKEN(T_FINALLY); } "throw" { - return T_THROW; + RETURN_TOKEN(T_THROW); } "if" { - return T_IF; + RETURN_TOKEN(T_IF); } "elseif" { - return T_ELSEIF; + RETURN_TOKEN(T_ELSEIF); } "endif" { - return T_ENDIF; + RETURN_TOKEN(T_ENDIF); } "else" { - return T_ELSE; + RETURN_TOKEN(T_ELSE); } "while" { - return T_WHILE; + RETURN_TOKEN(T_WHILE); } "endwhile" { - return T_ENDWHILE; + RETURN_TOKEN(T_ENDWHILE); } "do" { - return T_DO; + RETURN_TOKEN(T_DO); } "for" { - return T_FOR; + RETURN_TOKEN(T_FOR); } "endfor" { - return T_ENDFOR; + RETURN_TOKEN(T_ENDFOR); } "foreach" { - return T_FOREACH; + RETURN_TOKEN(T_FOREACH); } "endforeach" { - return T_ENDFOREACH; + RETURN_TOKEN(T_ENDFOREACH); } "declare" { - return T_DECLARE; + RETURN_TOKEN(T_DECLARE); } "enddeclare" { - return T_ENDDECLARE; + RETURN_TOKEN(T_ENDDECLARE); } "instanceof" { - return T_INSTANCEOF; + RETURN_TOKEN(T_INSTANCEOF); } "as" { - return T_AS; + RETURN_TOKEN(T_AS); } "switch" { - return T_SWITCH; + RETURN_TOKEN(T_SWITCH); } "endswitch" { - return T_ENDSWITCH; + RETURN_TOKEN(T_ENDSWITCH); } "case" { - return T_CASE; + RETURN_TOKEN(T_CASE); } "default" { - return T_DEFAULT; + RETURN_TOKEN(T_DEFAULT); } "break" { - return T_BREAK; + RETURN_TOKEN(T_BREAK); } "continue" { - return T_CONTINUE; + RETURN_TOKEN(T_CONTINUE); } "goto" { - return T_GOTO; + RETURN_TOKEN(T_GOTO); } "echo" { - return T_ECHO; + RETURN_TOKEN(T_ECHO); } "print" { - return T_PRINT; + RETURN_TOKEN(T_PRINT); } "class" { - return T_CLASS; + RETURN_TOKEN(T_CLASS); } "interface" { - return T_INTERFACE; + RETURN_TOKEN(T_INTERFACE); } "trait" { - return T_TRAIT; + RETURN_TOKEN(T_TRAIT); } "extends" { - return T_EXTENDS; + RETURN_TOKEN(T_EXTENDS); } "implements" { - return T_IMPLEMENTS; + RETURN_TOKEN(T_IMPLEMENTS); } "->" { yy_push_state(ST_LOOKING_FOR_PROPERTY); - return T_OBJECT_OPERATOR; + RETURN_TOKEN(T_OBJECT_OPERATOR); } -{WHITESPACE}+ { +{WHITESPACE}+ { HANDLE_NEWLINES(yytext, yyleng); - return T_WHITESPACE; + RETURN_TOKEN(T_WHITESPACE); } "->" { - return T_OBJECT_OPERATOR; + RETURN_TOKEN(T_OBJECT_OPERATOR); } {LABEL} { yy_pop_state(); zend_copy_value(zendlval, yytext, yyleng); - return T_STRING; + RETURN_TOKEN(T_STRING); } {ANY_CHAR} { @@ -1293,283 +1316,283 @@ NEWLINE ("\r"|"\n"|"\r\n") } "::" { - return T_PAAMAYIM_NEKUDOTAYIM; + RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM); } "\\" { - return T_NS_SEPARATOR; + RETURN_TOKEN(T_NS_SEPARATOR); } "..." { - return T_ELLIPSIS; + RETURN_TOKEN(T_ELLIPSIS); } "??" { - return T_COALESCE; + RETURN_TOKEN(T_COALESCE); } "new" { - return T_NEW; + RETURN_TOKEN(T_NEW); } "clone" { - return T_CLONE; + RETURN_TOKEN(T_CLONE); } "var" { - return T_VAR; + RETURN_TOKEN(T_VAR); } "("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" { - return T_INT_CAST; + RETURN_TOKEN(T_INT_CAST); } "("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" { - return T_DOUBLE_CAST; + RETURN_TOKEN(T_DOUBLE_CAST); } "("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" { - return T_STRING_CAST; + RETURN_TOKEN(T_STRING_CAST); } "("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" { - return T_ARRAY_CAST; + RETURN_TOKEN(T_ARRAY_CAST); } "("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" { - return T_OBJECT_CAST; + RETURN_TOKEN(T_OBJECT_CAST); } "("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" { - return T_BOOL_CAST; + RETURN_TOKEN(T_BOOL_CAST); } "("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" { - return T_UNSET_CAST; + RETURN_TOKEN(T_UNSET_CAST); } "eval" { - return T_EVAL; + RETURN_TOKEN(T_EVAL); } "include" { - return T_INCLUDE; + RETURN_TOKEN(T_INCLUDE); } "include_once" { - return T_INCLUDE_ONCE; + RETURN_TOKEN(T_INCLUDE_ONCE); } "require" { - return T_REQUIRE; + RETURN_TOKEN(T_REQUIRE); } "require_once" { - return T_REQUIRE_ONCE; + RETURN_TOKEN(T_REQUIRE_ONCE); } "namespace" { - return T_NAMESPACE; + RETURN_TOKEN(T_NAMESPACE); } "use" { - return T_USE; + RETURN_TOKEN(T_USE); } "insteadof" { - return T_INSTEADOF; + RETURN_TOKEN(T_INSTEADOF); } "global" { - return T_GLOBAL; + RETURN_TOKEN(T_GLOBAL); } "isset" { - return T_ISSET; + RETURN_TOKEN(T_ISSET); } "empty" { - return T_EMPTY; + RETURN_TOKEN(T_EMPTY); } "__halt_compiler" { - return T_HALT_COMPILER; + RETURN_TOKEN(T_HALT_COMPILER); } "static" { - return T_STATIC; + RETURN_TOKEN(T_STATIC); } "abstract" { - return T_ABSTRACT; + RETURN_TOKEN(T_ABSTRACT); } "final" { - return T_FINAL; + RETURN_TOKEN(T_FINAL); } "private" { - return T_PRIVATE; + RETURN_TOKEN(T_PRIVATE); } "protected" { - return T_PROTECTED; + RETURN_TOKEN(T_PROTECTED); } "public" { - return T_PUBLIC; + RETURN_TOKEN(T_PUBLIC); } "unset" { - return T_UNSET; + RETURN_TOKEN(T_UNSET); } "=>" { - return T_DOUBLE_ARROW; + RETURN_TOKEN(T_DOUBLE_ARROW); } "list" { - return T_LIST; + RETURN_TOKEN(T_LIST); } "array" { - return T_ARRAY; + RETURN_TOKEN(T_ARRAY); } "callable" { - return T_CALLABLE; + RETURN_TOKEN(T_CALLABLE); } "++" { - return T_INC; + RETURN_TOKEN(T_INC); } "--" { - return T_DEC; + RETURN_TOKEN(T_DEC); } "===" { - return T_IS_IDENTICAL; + RETURN_TOKEN(T_IS_IDENTICAL); } "!==" { - return T_IS_NOT_IDENTICAL; + RETURN_TOKEN(T_IS_NOT_IDENTICAL); } "==" { - return T_IS_EQUAL; + RETURN_TOKEN(T_IS_EQUAL); } "!="|"<>" { - return T_IS_NOT_EQUAL; + RETURN_TOKEN(T_IS_NOT_EQUAL); } "<=>" { - return T_SPACESHIP; + RETURN_TOKEN(T_SPACESHIP); } "<=" { - return T_IS_SMALLER_OR_EQUAL; + RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL); } ">=" { - return T_IS_GREATER_OR_EQUAL; + RETURN_TOKEN(T_IS_GREATER_OR_EQUAL); } "+=" { - return T_PLUS_EQUAL; + RETURN_TOKEN(T_PLUS_EQUAL); } "-=" { - return T_MINUS_EQUAL; + RETURN_TOKEN(T_MINUS_EQUAL); } "*=" { - return T_MUL_EQUAL; + RETURN_TOKEN(T_MUL_EQUAL); } "*\*" { - return T_POW; + RETURN_TOKEN(T_POW); } "*\*=" { - return T_POW_EQUAL; + RETURN_TOKEN(T_POW_EQUAL); } "/=" { - return T_DIV_EQUAL; + RETURN_TOKEN(T_DIV_EQUAL); } ".=" { - return T_CONCAT_EQUAL; + RETURN_TOKEN(T_CONCAT_EQUAL); } "%=" { - return T_MOD_EQUAL; + RETURN_TOKEN(T_MOD_EQUAL); } "<<=" { - return T_SL_EQUAL; + RETURN_TOKEN(T_SL_EQUAL); } ">>=" { - return T_SR_EQUAL; + RETURN_TOKEN(T_SR_EQUAL); } "&=" { - return T_AND_EQUAL; + RETURN_TOKEN(T_AND_EQUAL); } "|=" { - return T_OR_EQUAL; + RETURN_TOKEN(T_OR_EQUAL); } "^=" { - return T_XOR_EQUAL; + RETURN_TOKEN(T_XOR_EQUAL); } "||" { - return T_BOOLEAN_OR; + RETURN_TOKEN(T_BOOLEAN_OR); } "&&" { - return T_BOOLEAN_AND; + RETURN_TOKEN(T_BOOLEAN_AND); } "OR" { - return T_LOGICAL_OR; + RETURN_TOKEN(T_LOGICAL_OR); } "AND" { - return T_LOGICAL_AND; + RETURN_TOKEN(T_LOGICAL_AND); } "XOR" { - return T_LOGICAL_XOR; + RETURN_TOKEN(T_LOGICAL_XOR); } "<<" { - return T_SL; + RETURN_TOKEN(T_SL); } ">>" { - return T_SR; + RETURN_TOKEN(T_SR); } {TOKENS} { - return yytext[0]; + RETURN_TOKEN(yytext[0]); } "{" { yy_push_state(ST_IN_SCRIPTING); - return '{'; + RETURN_TOKEN('{'); } "${" { yy_push_state(ST_LOOKING_FOR_VARNAME); - return T_DOLLAR_OPEN_CURLY_BRACES; + RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES); } @@ -1578,7 +1601,7 @@ NEWLINE ("\r"|"\n"|"\r\n") if (!zend_stack_is_empty(&SCNG(state_stack))) { yy_pop_state(); } - return '}'; + RETURN_TOKEN('}'); } @@ -1587,7 +1610,7 @@ NEWLINE ("\r"|"\n"|"\r\n") zend_copy_value(zendlval, yytext, yyleng); yy_pop_state(); yy_push_state(ST_IN_SCRIPTING); - return T_STRING_VARNAME; + RETURN_TOKEN(T_STRING_VARNAME); } @@ -1617,12 +1640,12 @@ NEWLINE ("\r"|"\n"|"\r\n") ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2)); ZEND_ASSERT(!errno && end == yytext + yyleng); } - return T_LNUMBER; + RETURN_TOKEN(T_LNUMBER); } else { ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end)); /* errno isn't checked since we allow HUGE_VAL/INF overflow */ ZEND_ASSERT(end == yytext + yyleng); - return T_DNUMBER; + RETURN_TOKEN(T_DNUMBER); } } @@ -1636,7 +1659,7 @@ NEWLINE ("\r"|"\n"|"\r\n") */ if (end != yytext + yyleng) { zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", E_PARSE); - return T_ERROR; + RETURN_TOKEN(T_ERROR); } } else { errno = 0; @@ -1653,19 +1676,19 @@ NEWLINE ("\r"|"\n"|"\r\n") if (end != yytext + yyleng) { zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", E_PARSE); - return T_ERROR; + RETURN_TOKEN(T_ERROR); } ZEND_ASSERT(!errno); - return T_DNUMBER; + RETURN_TOKEN(T_DNUMBER); } /* Also not an assert for the same reason */ if (end != yytext + yyleng) { zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", E_PARSE); - return T_ERROR; + RETURN_TOKEN(T_ERROR); } } ZEND_ASSERT(!errno); - return T_LNUMBER; + RETURN_TOKEN(T_LNUMBER); } {HNUM} { @@ -1687,12 +1710,12 @@ NEWLINE ("\r"|"\n"|"\r\n") ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16)); ZEND_ASSERT(!errno && end == hex + len); } - return T_LNUMBER; + RETURN_TOKEN(T_LNUMBER); } else { ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end)); /* errno isn't checked since we allow HUGE_VAL/INF overflow */ ZEND_ASSERT(end == hex + len); - return T_DNUMBER; + RETURN_TOKEN(T_DNUMBER); } } @@ -1709,12 +1732,12 @@ NEWLINE ("\r"|"\n"|"\r\n") string: ZVAL_STRINGL(zendlval, yytext, yyleng); } - return T_NUM_STRING; + RETURN_TOKEN(T_NUM_STRING); } {LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */ ZVAL_STRINGL(zendlval, yytext, yyleng); - return T_NUM_STRING; + RETURN_TOKEN(T_NUM_STRING); } {DNUM}|{EXPONENT_DNUM} { @@ -1723,59 +1746,59 @@ string: ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end)); /* errno isn't checked since we allow HUGE_VAL/INF overflow */ ZEND_ASSERT(end == yytext + yyleng); - return T_DNUMBER; + RETURN_TOKEN(T_DNUMBER); } "__CLASS__" { - return T_CLASS_C; + RETURN_TOKEN(T_CLASS_C); } "__TRAIT__" { - return T_TRAIT_C; + RETURN_TOKEN(T_TRAIT_C); } "__FUNCTION__" { - return T_FUNC_C; + RETURN_TOKEN(T_FUNC_C); } "__METHOD__" { - return T_METHOD_C; + RETURN_TOKEN(T_METHOD_C); } "__LINE__" { - return T_LINE; + RETURN_TOKEN(T_LINE); } "__FILE__" { - return T_FILE; + RETURN_TOKEN(T_FILE); } "__DIR__" { - return T_DIR; + RETURN_TOKEN(T_DIR); } "__NAMESPACE__" { - return T_NS_C; + RETURN_TOKEN(T_NS_C); } """{ANY_CHAR} { if (YYCURSOR > YYLIMIT) { - return 0; + RETURN_TOKEN(END); } inline_char_handler: @@ -1823,7 +1846,7 @@ inline_char_handler: ZVAL_STRINGL(zendlval, yytext, yyleng); } HANDLE_NEWLINES(yytext, yyleng); - return T_INLINE_HTML; + RETURN_TOKEN(T_INLINE_HTML); } @@ -1834,7 +1857,7 @@ inline_char_handler: yyless(yyleng - 3); yy_push_state(ST_LOOKING_FOR_PROPERTY); zend_copy_value(zendlval, (yytext+1), (yyleng-1)); - return T_VARIABLE; + RETURN_TOKEN(T_VARIABLE); } /* A [ always designates a variable offset, regardless of what follows @@ -1843,22 +1866,22 @@ inline_char_handler: yyless(yyleng - 1); yy_push_state(ST_VAR_OFFSET); zend_copy_value(zendlval, (yytext+1), (yyleng-1)); - return T_VARIABLE; + RETURN_TOKEN(T_VARIABLE); } "$"{LABEL} { zend_copy_value(zendlval, (yytext+1), (yyleng-1)); - return T_VARIABLE; + RETURN_TOKEN(T_VARIABLE); } "]" { yy_pop_state(); - return ']'; + RETURN_TOKEN(']'); } {TOKENS}|[{}"`] { /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */ - return yytext[0]; + RETURN_TOKEN(yytext[0]); } [ \n\r\t\\'#] { @@ -1866,16 +1889,16 @@ inline_char_handler: yyless(0); yy_pop_state(); ZVAL_NULL(zendlval); - return T_ENCAPSED_AND_WHITESPACE; + RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE); } {LABEL} { zend_copy_value(zendlval, yytext, yyleng); - return T_STRING; + RETURN_TOKEN(T_STRING); } -"#"|"//" { +"#"|"//" { while (YYCURSOR < YYLIMIT) { switch (*YYCURSOR++) { case '\r': @@ -1901,10 +1924,10 @@ inline_char_handler: yyleng = YYCURSOR - SCNG(yy_text); - return T_COMMENT; + RETURN_TOKEN(T_COMMENT); } -"/*"|"/**"{WHITESPACE} { +"/*"|"/**"{WHITESPACE} { int doc_com; if (yyleng > 2) { @@ -1931,27 +1954,15 @@ inline_char_handler: if (doc_com) { CG(doc_comment) = zend_string_init(yytext, yyleng, 0); - return T_DOC_COMMENT; + RETURN_TOKEN(T_DOC_COMMENT); } - return T_COMMENT; -} - -{LABEL} { - zend_copy_value(zendlval, yytext, yyleng); - yy_pop_state(); - return T_STRING; -} - -{ANY_CHAR} { - yyless(0); - yy_pop_state(); - goto restart; + RETURN_TOKEN(T_COMMENT); } "?>"{NEWLINE}? { BEGIN(INITIAL); - return T_CLOSE_TAG; /* implicit ';' at php-end tag */ + RETURN_TOKEN(T_CLOSE_TAG); /* implicit ';' at php-end tag */ } @@ -1977,7 +1988,7 @@ inline_char_handler: * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..." * rule, which continued in ST_IN_SCRIPTING state after the quote */ ZVAL_NULL(zendlval); - return T_ENCAPSED_AND_WHITESPACE; + RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE); } } @@ -2020,7 +2031,7 @@ inline_char_handler: SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval)); ZVAL_STRINGL(zendlval, str, sz); } - return T_CONSTANT_ENCAPSED_STRING; + RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING); } @@ -2032,9 +2043,9 @@ inline_char_handler: case '"': yyleng = YYCURSOR - SCNG(yy_text); if (zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') == FAILURE) { - return T_ERROR; + RETURN_TOKEN(T_ERROR); } - return T_CONSTANT_ENCAPSED_STRING; + RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING); case '$': if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') { break; @@ -2064,7 +2075,7 @@ inline_char_handler: YYCURSOR = SCNG(yy_text) + yyleng; BEGIN(ST_DOUBLE_QUOTES); - return '"'; + RETURN_TOKEN('"'); } @@ -2112,13 +2123,13 @@ inline_char_handler: zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label); - return T_START_HEREDOC; + RETURN_TOKEN(T_START_HEREDOC); } [`] { BEGIN(ST_BACKQUOTE); - return '`'; + RETURN_TOKEN('`'); } @@ -2132,7 +2143,7 @@ inline_char_handler: efree(heredoc_label); BEGIN(ST_IN_SCRIPTING); - return T_END_HEREDOC; + RETURN_TOKEN(T_END_HEREDOC); } @@ -2140,18 +2151,18 @@ inline_char_handler: Z_LVAL_P(zendlval) = (zend_long) '{'; yy_push_state(ST_IN_SCRIPTING); yyless(1); - return T_CURLY_OPEN; + RETURN_TOKEN(T_CURLY_OPEN); } ["] { BEGIN(ST_IN_SCRIPTING); - return '"'; + RETURN_TOKEN('"'); } [`] { BEGIN(ST_IN_SCRIPTING); - return '`'; + RETURN_TOKEN('`'); } @@ -2164,7 +2175,7 @@ inline_char_handler: } if (YYCURSOR > YYLIMIT) { - return 0; + RETURN_TOKEN(END); } if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) { YYCURSOR++; @@ -2201,15 +2212,15 @@ double_quotes_scan_done: yyleng = YYCURSOR - SCNG(yy_text); if (zend_scan_escape_string(zendlval, yytext, yyleng, '"') == FAILURE) { - return T_ERROR; + RETURN_TOKEN(T_ERROR); } - return T_ENCAPSED_AND_WHITESPACE; + RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE); } {ANY_CHAR} { if (YYCURSOR > YYLIMIT) { - return 0; + RETURN_TOKEN(END); } if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) { YYCURSOR++; @@ -2245,9 +2256,9 @@ double_quotes_scan_done: yyleng = YYCURSOR - SCNG(yy_text); if (zend_scan_escape_string(zendlval, yytext, yyleng, '`') == FAILURE) { - return T_ERROR; + RETURN_TOKEN(T_ERROR); } - return T_ENCAPSED_AND_WHITESPACE; + RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE); } @@ -2257,7 +2268,7 @@ double_quotes_scan_done: zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack)); if (YYCURSOR > YYLIMIT) { - return 0; + RETURN_TOKEN(END); } YYCURSOR--; @@ -2321,9 +2332,9 @@ heredoc_scan_done: yyleng = YYCURSOR - SCNG(yy_text); if (zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0) == FAILURE) { - return T_ERROR; + RETURN_TOKEN(T_ERROR); } - return T_ENCAPSED_AND_WHITESPACE; + RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE); } @@ -2333,7 +2344,7 @@ heredoc_scan_done: zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack)); if (YYCURSOR > YYLIMIT) { - return 0; + RETURN_TOKEN(END); } YYCURSOR--; @@ -2380,13 +2391,13 @@ nowdoc_scan_done: zend_copy_value(zendlval, yytext, yyleng - newline); HANDLE_NEWLINES(yytext, yyleng - newline); - return T_ENCAPSED_AND_WHITESPACE; + RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE); } {ANY_CHAR} { if (YYCURSOR > YYLIMIT) { - return 0; + RETURN_TOKEN(END); } zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt new file mode 100644 index 0000000000000..03b991b1a5db7 --- /dev/null +++ b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt @@ -0,0 +1,19 @@ +--TEST-- +Parse errors during token_get_all() with TOKEN_PARSE flag +--SKIPIF-- + +--FILE-- +getMessage(), PHP_EOL; +} + +echo "Done"; + +?> +--EXPECT-- +syntax error, unexpected 'code' (T_STRING) +Done diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt new file mode 100644 index 0000000000000..ab334358abc04 --- /dev/null +++ b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt @@ -0,0 +1,81 @@ +--TEST-- +Semi reserved words support: member access +--SKIPIF-- + +--FILE-- +$continue; +X::continue(); +$x->continue(); +X::class; + +class X { + const CONTINUE = 1; + public $x = self::CONTINUE + 1; +} +', TOKEN_PARSE); + +array_walk($tokens, function($tk) { + if(is_array($tk)) { + if(($t = token_name($tk[0])) == 'T_WHITESPACE') return; + echo "L{$tk[2]}: ".$t." {$tk[1]}", PHP_EOL; + } + else echo $tk, PHP_EOL; +}); + +echo "Done"; + +?> +--EXPECTF-- +L1: T_OPEN_TAG +L4: T_VARIABLE $continue +; +L5: T_STRING X +L5: T_DOUBLE_COLON :: +L5: T_STRING continue +( +) +; +L6: T_VARIABLE $x +L6: T_OBJECT_OPERATOR -> +L6: T_STRING continue +( +) +; +L7: T_STRING X +L7: T_DOUBLE_COLON :: +L7: T_CLASS class +; +L9: T_CLASS class +L9: T_STRING X +{ +L10: T_CONST const +L10: T_STRING CONTINUE += +L10: T_LNUMBER 1 +; +L11: T_PUBLIC public +L11: T_VARIABLE $x += +L11: T_STRING self +L11: T_DOUBLE_COLON :: +L11: T_STRING CONTINUE ++ +L11: T_LNUMBER 1 +; +} +Done diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt new file mode 100644 index 0000000000000..3dd8e14d8423a --- /dev/null +++ b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt @@ -0,0 +1,68 @@ +--TEST-- +Semi reserved words support: class const +--SKIPIF-- + +--FILE-- + [3, 4], 5]; + } +', TOKEN_PARSE); + +array_walk($tokens, function($tk) { + if(is_array($tk)) { + if(($t = token_name($tk[0])) == 'T_WHITESPACE') return; + echo "L{$tk[2]}: ".$t." {$tk[1]}", PHP_EOL; + } + else echo $tk, PHP_EOL; +}); + +echo "Done"; + +?> +--EXPECTF-- +L1: T_OPEN_TAG +[ +L5: T_LNUMBER 3 +, +L5: T_LNUMBER 4 +] +, +L5: T_LNUMBER 5 +] +; +} +Done diff --git a/ext/tokenizer/tests/token_get_all_error.phpt b/ext/tokenizer/tests/token_get_all_error.phpt index 29e97c38c4071..9ded0a177425f 100644 --- a/ext/tokenizer/tests/token_get_all_error.phpt +++ b/ext/tokenizer/tests/token_get_all_error.phpt @@ -19,7 +19,7 @@ var_dump( token_get_all()); echo "-- Testing token_get_all() function with more than expected no. of arguments --\n"; $source = ''; $extra_arg = 10; -var_dump( token_get_all($source, $extra_arg)); +var_dump( token_get_all($source, true, $extra_arg)); echo "Done" ?> @@ -28,10 +28,10 @@ echo "Done" -- Testing token_get_all() function with zero arguments -- -Warning: token_get_all() expects exactly 1 parameter, 0 given in %s on line %d +Warning: token_get_all() expects at least 1 parameter, 0 given in %s on line 11 NULL -- Testing token_get_all() function with more than expected no. of arguments -- -Warning: token_get_all() expects exactly 1 parameter, 2 given in %s on line %d +Warning: token_get_all() expects at most 2 parameters, 3 given in %s on line 17 NULL -Done +Done \ No newline at end of file diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c index c4b9d14359fd9..2a4fa90ca2798 100644 --- a/ext/tokenizer/tokenizer.c +++ b/ext/tokenizer/tokenizer.c @@ -37,6 +37,12 @@ #define zendcursor LANG_SCNG(yy_cursor) #define zendlimit LANG_SCNG(yy_limit) +#define TOKEN_PARSE 1 + +void tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS) { + REGISTER_LONG_CONSTANT("TOKEN_PARSE", TOKEN_PARSE, CONST_CS|CONST_PERSISTENT); +} + /* {{{ arginfo */ ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1) ZEND_ARG_INFO(0, source) @@ -83,6 +89,7 @@ ZEND_GET_MODULE(tokenizer) PHP_MINIT_FUNCTION(tokenizer) { tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU); + tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS_PASSTHRU); return SUCCESS; } /* }}} */ @@ -97,8 +104,10 @@ PHP_MINFO_FUNCTION(tokenizer) } /* }}} */ -static void tokenize(zval *return_value) +static zend_bool tokenize(zval *return_value, zend_string *source) { + zval source_zval; + zend_lex_state original_lex_state; zval token; zval keyword; int token_type; @@ -106,10 +115,22 @@ static void tokenize(zval *return_value) int token_line = 1; int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */ + ZVAL_STR_COPY(&source_zval, source); + zend_save_lexical_state(&original_lex_state); + + if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) { + zend_restore_lexical_state(&original_lex_state); + return 0; + } + + LANG_SCNG(yy_state) = yycINITIAL; array_init(return_value); ZVAL_NULL(&token); while ((token_type = lex_scan(&token))) { + + if(token_type == T_ERROR) break; + destroy = 1; switch (token_type) { case T_CLOSE_TAG: @@ -123,8 +144,6 @@ static void tokenize(zval *return_value) case T_DOC_COMMENT: destroy = 0; break; - case T_ERROR: - return; } if (token_type >= 256) { @@ -169,34 +188,113 @@ static void tokenize(zval *return_value) token_line = CG(zend_lineno); } + + zval_dtor(&source_zval); + zend_restore_lexical_state(&original_lex_state); + + return 1; } -/* {{{ proto array token_get_all(string source) - */ -PHP_FUNCTION(token_get_all) +zval token_stream; + +void on_event(zend_php_scanner_event event, int token, int line) { - zend_string *source; - zval source_zval; - zend_lex_state original_lex_state; + zval keyword; + HashTable *tokens_ht; + zval *token_zv; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &source) == FAILURE) { - return; + switch(event) { + case ON_TOKEN: + if (token == T_ERROR || token == END) break; + if (token >= 256) { + array_init(&keyword); + add_next_index_long(&keyword, token); + add_next_index_stringl(&keyword, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); + add_next_index_long(&keyword, line); + add_next_index_zval(&token_stream, &keyword); + } else { + add_next_index_stringl(&token_stream, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); + } + break; + case ON_FEEDBACK: + tokens_ht = Z_ARRVAL(token_stream); + token_zv = zend_hash_index_find(tokens_ht, zend_hash_num_elements(tokens_ht) - 1); + if (token_zv && Z_TYPE_P(token_zv) == IS_ARRAY) { + ZVAL_LONG(zend_hash_index_find(Z_ARRVAL_P(token_zv), 0), token); + } + break; + case ON_STOP: + if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) { + array_init(&keyword); + add_next_index_long(&keyword, T_INLINE_HTML); + add_next_index_stringl(&keyword, + (char *)LANG_SCNG(yy_cursor), LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor)); + add_next_index_long(&keyword, CG(zend_lineno)); + add_next_index_zval(&token_stream, &keyword); + } + break; } +} + +static zend_bool tokenize_parse(zval *return_value, zend_string *source) +{ + zval source_zval; + zend_lex_state original_lex_state; + zend_bool original_in_compilation; + zend_bool success; ZVAL_STR_COPY(&source_zval, source); + + original_in_compilation = CG(in_compilation); + CG(in_compilation) = 1; zend_save_lexical_state(&original_lex_state); - if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) { - zend_restore_lexical_state(&original_lex_state); - RETURN_FALSE; - } + if ((success = (zend_prepare_string_for_scanning(&source_zval, "") == SUCCESS))) { + CG(ast) = NULL; + CG(ast_arena) = zend_arena_create(1024 * 32); + LANG_SCNG(yy_state) = yycINITIAL; + LANG_SCNG(on_event) = on_event; - LANG_SCNG(yy_state) = yycINITIAL; + array_init(&token_stream); + if((success = (zendparse() == SUCCESS))) { + ZVAL_ZVAL(return_value, &token_stream, 1, 0); + } + zval_dtor(&token_stream); - tokenize(return_value); + zend_ast_destroy(CG(ast)); + zend_arena_destroy(CG(ast_arena)); + } + /* restore compiler and scanner global states */ zend_restore_lexical_state(&original_lex_state); + CG(in_compilation) = original_in_compilation; + zval_dtor(&source_zval); + + return success; +} + +/* }}} */ + +/* {{{ proto array token_get_all(string source) + */ +PHP_FUNCTION(token_get_all) +{ + zend_string *source; + zend_long flags = 0; + zend_bool success; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|l", &source, &flags) == FAILURE) { + return; + } + + if (flags & TOKEN_PARSE) { + success = tokenize_parse(return_value, source); + } else { + success = tokenize(return_value, source); + } + + if (!success) RETURN_FALSE; } /* }}} */ From c2f3091b987dd7e0fc7a5400dca2395c3eb25d9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rcio=20Almada?= Date: Sat, 25 Apr 2015 15:06:47 -0300 Subject: [PATCH 5/5] add missing SKIPIF section on test --- ext/tokenizer/tests/bug67395.phpt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ext/tokenizer/tests/bug67395.phpt b/ext/tokenizer/tests/bug67395.phpt index c9b7f3012f5f4..8101c81edb67b 100644 --- a/ext/tokenizer/tests/bug67395.phpt +++ b/ext/tokenizer/tests/bug67395.phpt @@ -1,5 +1,7 @@ --TEST-- Bug 67395: token_name() does not return name for T_POW and T_POW_EQUAL token +--SKIPIF-- + --FILE--