Skip to content

Commit

Permalink
Add support for PHP 8.1
Browse files Browse the repository at this point in the history
With the introduction of intersection types, PHP now lexes the
token '&' either as T_AMPERSAND_(NOT_)FOLLOWED_BY_VAR_OR_VARARG.
This completely breaks parsing of any code containing '&'.

Fix this by canonicalizing to the new token format (unconditionally,
independent of emulation) and adjusting the parser to use the two
new tokens.

This doesn't add actual support for intersection types yet.
  • Loading branch information
nikic committed Jul 9, 2021
1 parent feed91c commit c758510
Show file tree
Hide file tree
Showing 9 changed files with 2,578 additions and 2,439 deletions.
36 changes: 25 additions & 11 deletions grammar/php5.y
Expand Up @@ -20,6 +20,11 @@ top_statement_list:
if ($nop !== null) { $1[] = $nop; } $$ = $1; }
;

ampersand:
T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG
| T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG
;

reserved_non_modifiers:
T_INCLUDE | T_INCLUDE_ONCE | T_EVAL | T_REQUIRE | T_REQUIRE_ONCE | T_LOGICAL_OR | T_LOGICAL_XOR | T_LOGICAL_AND
| T_INSTANCEOF | T_NEW | T_CLONE | T_EXIT | T_IF | T_ELSEIF | T_ELSE | T_ENDIF | T_ECHO | T_DO | T_WHILE
Expand Down Expand Up @@ -246,7 +251,12 @@ variables_list:

optional_ref:
/* empty */ { $$ = false; }
| '&' { $$ = true; }
| ampersand { $$ = true; }
;

optional_arg_ref:
/* empty */ { $$ = false; }
| T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG { $$ = true; }
;

optional_ellipsis:
Expand Down Expand Up @@ -378,7 +388,7 @@ new_else_single:

foreach_variable:
variable { $$ = array($1, false); }
| '&' variable { $$ = array($2, true); }
| ampersand variable { $$ = array($2, true); }
| list_expr { $$ = array($1, false); }
;

Expand All @@ -393,9 +403,9 @@ non_empty_parameter_list:
;

parameter:
optional_param_type optional_ref optional_ellipsis plain_variable
optional_param_type optional_arg_ref optional_ellipsis plain_variable
{ $$ = Node\Param[$4, null, $1, $2, $3]; $this->checkParam($$); }
| optional_param_type optional_ref optional_ellipsis plain_variable '=' static_scalar
| optional_param_type optional_arg_ref optional_ellipsis plain_variable '=' static_scalar
{ $$ = Node\Param[$4, $6, $1, $2, $3]; $this->checkParam($$); }
;

Expand Down Expand Up @@ -428,7 +438,7 @@ non_empty_argument_list:

argument:
expr { $$ = Node\Arg[$1, false, false]; }
| '&' variable { $$ = Node\Arg[$2, true, false]; }
| ampersand variable { $$ = Node\Arg[$2, true, false]; }
| T_ELLIPSIS expr { $$ = Node\Arg[$2, false, true]; }
;

Expand Down Expand Up @@ -562,8 +572,8 @@ expr:
variable { $$ = $1; }
| list_expr '=' expr { $$ = Expr\Assign[$1, $3]; }
| variable '=' expr { $$ = Expr\Assign[$1, $3]; }
| variable '=' '&' variable { $$ = Expr\AssignRef[$1, $4]; }
| variable '=' '&' new_expr { $$ = Expr\AssignRef[$1, $4]; }
| variable '=' ampersand variable { $$ = Expr\AssignRef[$1, $4]; }
| variable '=' ampersand new_expr { $$ = Expr\AssignRef[$1, $4]; }
| new_expr { $$ = $1; }
| T_CLONE expr { $$ = Expr\Clone_[$2]; }
| variable T_PLUS_EQUAL expr { $$ = Expr\AssignOp\Plus [$1, $3]; }
Expand All @@ -589,7 +599,8 @@ expr:
| expr T_LOGICAL_AND expr { $$ = Expr\BinaryOp\LogicalAnd[$1, $3]; }
| expr T_LOGICAL_XOR expr { $$ = Expr\BinaryOp\LogicalXor[$1, $3]; }
| expr '|' expr { $$ = Expr\BinaryOp\BitwiseOr [$1, $3]; }
| expr '&' expr { $$ = Expr\BinaryOp\BitwiseAnd[$1, $3]; }
| expr T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG expr { $$ = Expr\BinaryOp\BitwiseAnd[$1, $3]; }
| expr T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG expr { $$ = Expr\BinaryOp\BitwiseAnd[$1, $3]; }
| expr '^' expr { $$ = Expr\BinaryOp\BitwiseXor[$1, $3]; }
| expr '.' expr { $$ = Expr\BinaryOp\Concat [$1, $3]; }
| expr '+' expr { $$ = Expr\BinaryOp\Plus [$1, $3]; }
Expand Down Expand Up @@ -816,7 +827,10 @@ static_operation:
| static_scalar T_LOGICAL_AND static_scalar { $$ = Expr\BinaryOp\LogicalAnd[$1, $3]; }
| static_scalar T_LOGICAL_XOR static_scalar { $$ = Expr\BinaryOp\LogicalXor[$1, $3]; }
| static_scalar '|' static_scalar { $$ = Expr\BinaryOp\BitwiseOr [$1, $3]; }
| static_scalar '&' static_scalar { $$ = Expr\BinaryOp\BitwiseAnd[$1, $3]; }
| static_scalar T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG static_scalar
{ $$ = Expr\BinaryOp\BitwiseAnd[$1, $3]; }
| static_scalar T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG static_scalar
{ $$ = Expr\BinaryOp\BitwiseAnd[$1, $3]; }
| static_scalar '^' static_scalar { $$ = Expr\BinaryOp\BitwiseXor[$1, $3]; }
| static_scalar '.' static_scalar { $$ = Expr\BinaryOp\Concat [$1, $3]; }
| static_scalar '+' static_scalar { $$ = Expr\BinaryOp\Plus [$1, $3]; }
Expand Down Expand Up @@ -986,8 +1000,8 @@ non_empty_array_pair_list:
array_pair:
expr T_DOUBLE_ARROW expr { $$ = Expr\ArrayItem[$3, $1, false]; }
| expr { $$ = Expr\ArrayItem[$1, null, false]; }
| expr T_DOUBLE_ARROW '&' variable { $$ = Expr\ArrayItem[$4, $1, true]; }
| '&' variable { $$ = Expr\ArrayItem[$2, null, true]; }
| expr T_DOUBLE_ARROW ampersand variable { $$ = Expr\ArrayItem[$4, $1, true]; }
| ampersand variable { $$ = Expr\ArrayItem[$2, null, true]; }
| T_ELLIPSIS expr { $$ = Expr\ArrayItem[$2, null, false, attributes(), true]; }
;

Expand Down
34 changes: 24 additions & 10 deletions grammar/php7.y
Expand Up @@ -20,6 +20,11 @@ top_statement_list:
if ($nop !== null) { $1[] = $nop; } $$ = $1; }
;

ampersand:
T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG
| T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG
;

reserved_non_modifiers:
T_INCLUDE | T_INCLUDE_ONCE | T_EVAL | T_REQUIRE | T_REQUIRE_ONCE | T_LOGICAL_OR | T_LOGICAL_XOR | T_LOGICAL_AND
| T_INSTANCEOF | T_NEW | T_CLONE | T_EXIT | T_IF | T_ELSEIF | T_ELSE | T_ENDIF | T_ECHO | T_DO | T_WHILE
Expand Down Expand Up @@ -327,7 +332,12 @@ non_empty_variables_list:

optional_ref:
/* empty */ { $$ = false; }
| '&' { $$ = true; }
| ampersand { $$ = true; }
;

optional_arg_ref:
/* empty */ { $$ = false; }
| T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG { $$ = true; }
;

optional_ellipsis:
Expand Down Expand Up @@ -505,7 +515,7 @@ new_else_single:

foreach_variable:
variable { $$ = array($1, false); }
| '&' variable { $$ = array($2, true); }
| ampersand variable { $$ = array($2, true); }
| list_expr { $$ = array($1, false); }
| array_short_syntax { $$ = array($1, false); }
;
Expand All @@ -528,13 +538,16 @@ optional_visibility_modifier:
;

parameter:
optional_attributes optional_visibility_modifier optional_type_without_static optional_ref optional_ellipsis plain_variable
optional_attributes optional_visibility_modifier optional_type_without_static
optional_arg_ref optional_ellipsis plain_variable
{ $$ = new Node\Param($6, null, $3, $4, $5, attributes(), $2, $1);
$this->checkParam($$); }
| optional_attributes optional_visibility_modifier optional_type_without_static optional_ref optional_ellipsis plain_variable '=' expr
| optional_attributes optional_visibility_modifier optional_type_without_static
optional_arg_ref optional_ellipsis plain_variable '=' expr
{ $$ = new Node\Param($6, $8, $3, $4, $5, attributes(), $2, $1);
$this->checkParam($$); }
| optional_attributes optional_visibility_modifier optional_type_without_static optional_ref optional_ellipsis error
| optional_attributes optional_visibility_modifier optional_type_without_static
optional_arg_ref optional_ellipsis error
{ $$ = new Node\Param(Expr\Error[], null, $3, $4, $5, attributes(), $2, $1); }
;

Expand Down Expand Up @@ -594,7 +607,7 @@ non_empty_argument_list:

argument:
expr { $$ = Node\Arg[$1, false, false]; }
| '&' variable { $$ = Node\Arg[$2, true, false]; }
| ampersand variable { $$ = Node\Arg[$2, true, false]; }
| T_ELLIPSIS expr { $$ = Node\Arg[$2, false, true]; }
| identifier_ex ':' expr
{ $$ = new Node\Arg($3, false, false, attributes(), $1); }
Expand Down Expand Up @@ -756,7 +769,7 @@ expr:
| list_expr '=' expr { $$ = Expr\Assign[$1, $3]; }
| array_short_syntax '=' expr { $$ = Expr\Assign[$1, $3]; }
| variable '=' expr { $$ = Expr\Assign[$1, $3]; }
| variable '=' '&' variable { $$ = Expr\AssignRef[$1, $4]; }
| variable '=' ampersand variable { $$ = Expr\AssignRef[$1, $4]; }
| new_expr { $$ = $1; }
| match { $$ = $1; }
| T_CLONE expr { $$ = Expr\Clone_[$2]; }
Expand All @@ -783,7 +796,8 @@ expr:
| expr T_LOGICAL_AND expr { $$ = Expr\BinaryOp\LogicalAnd[$1, $3]; }
| expr T_LOGICAL_XOR expr { $$ = Expr\BinaryOp\LogicalXor[$1, $3]; }
| expr '|' expr { $$ = Expr\BinaryOp\BitwiseOr [$1, $3]; }
| expr '&' expr { $$ = Expr\BinaryOp\BitwiseAnd[$1, $3]; }
| expr T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG expr { $$ = Expr\BinaryOp\BitwiseAnd[$1, $3]; }
| expr T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG expr { $$ = Expr\BinaryOp\BitwiseAnd[$1, $3]; }
| expr '^' expr { $$ = Expr\BinaryOp\BitwiseXor[$1, $3]; }
| expr '.' expr { $$ = Expr\BinaryOp\Concat [$1, $3]; }
| expr '+' expr { $$ = Expr\BinaryOp\Plus [$1, $3]; }
Expand Down Expand Up @@ -1106,10 +1120,10 @@ inner_array_pair_list:

array_pair:
expr { $$ = Expr\ArrayItem[$1, null, false]; }
| '&' variable { $$ = Expr\ArrayItem[$2, null, true]; }
| ampersand variable { $$ = Expr\ArrayItem[$2, null, true]; }
| list_expr { $$ = Expr\ArrayItem[$1, null, false]; }
| expr T_DOUBLE_ARROW expr { $$ = Expr\ArrayItem[$3, $1, false]; }
| expr T_DOUBLE_ARROW '&' variable { $$ = Expr\ArrayItem[$4, $1, true]; }
| expr T_DOUBLE_ARROW ampersand variable { $$ = Expr\ArrayItem[$4, $1, true]; }
| expr T_DOUBLE_ARROW list_expr { $$ = Expr\ArrayItem[$3, $1, false]; }
| T_ELLIPSIS expr { $$ = Expr\ArrayItem[$2, null, false, attributes(), true]; }
| /* empty */ { $$ = null; }
Expand Down
2 changes: 1 addition & 1 deletion grammar/tokens.y
Expand Up @@ -18,7 +18,7 @@
%left T_BOOLEAN_AND
%left '|'
%left '^'
%left '&'
%left T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG
%nonassoc T_IS_EQUAL T_IS_NOT_EQUAL T_IS_IDENTICAL T_IS_NOT_IDENTICAL T_SPACESHIP
%nonassoc '<' T_IS_SMALLER_OR_EQUAL '>' T_IS_GREATER_OR_EQUAL
%left T_SL T_SR
Expand Down
29 changes: 25 additions & 4 deletions lib/PhpParser/Lexer.php
Expand Up @@ -134,10 +134,11 @@ protected function postprocessTokens(ErrorHandler $errorHandler) {
// detected by finding "gaps" in the token array. Unterminated comments are detected
// by checking if a trailing comment has a "*/" at the end.
//
// Additionally, we canonicalize to the PHP 8 comment format here, which does not include
// the trailing whitespace anymore.
//
// We also canonicalize to the PHP 8 T_NAME_* tokens.
// Additionally, we perform a number of canonicalizations here:
// * Use the PHP 8.0 comment format, which does not include trailing whitespace anymore.
// * Use PHP 8.0 T_NAME_* tokens.
// * Use PHP 8.1 T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG and
// T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG tokens used to disambiguate intersection types.

$filePos = 0;
$line = 1;
Expand Down Expand Up @@ -208,6 +209,22 @@ protected function postprocessTokens(ErrorHandler $errorHandler) {
}
}

if ($token === '&') {
$next = $i + 1;
while (isset($this->tokens[$next]) && $this->tokens[$next][0] === \T_WHITESPACE) {
$next++;
}
$followedByVarOrVarArg = isset($this->tokens[$next]) &&
($this->tokens[$next][0] === \T_VARIABLE || $this->tokens[$next][0] === \T_ELLIPSIS);
$this->tokens[$i] = $token = [
$followedByVarOrVarArg
? \T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG
: \T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG,
'&',
$line,
];
}

$tokenValue = \is_string($token) ? $token : $token[1];
$tokenLen = \strlen($tokenValue);

Expand Down Expand Up @@ -424,6 +441,8 @@ private function defineCompatibilityTokens() {
'T_ATTRIBUTE',
// PHP 8.1
'T_ENUM',
'T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG',
'T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG',
];

// PHP-Parser might be used together with another library that also emulates some or all
Expand Down Expand Up @@ -514,6 +533,8 @@ protected function createTokenMap() : array {
$tokenMap[\T_MATCH] = Tokens::T_MATCH;
$tokenMap[\T_NULLSAFE_OBJECT_OPERATOR] = Tokens::T_NULLSAFE_OBJECT_OPERATOR;
$tokenMap[\T_ATTRIBUTE] = Tokens::T_ATTRIBUTE;
$tokenMap[\T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG] = Tokens::T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG;
$tokenMap[\T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG] = Tokens::T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG;
$tokenMap[\T_ENUM] = Tokens::T_ENUM;

return $tokenMap;
Expand Down

0 comments on commit c758510

Please sign in to comment.