From 9bb043e643ed07f243f85cc4a878fc0d0a923387 Mon Sep 17 00:00:00 2001 From: Tyson Andre Date: Sun, 2 Aug 2020 11:32:36 -0400 Subject: [PATCH 1/2] Support php 8.0's "Treat namespaced names as single token" Without this, tolerant-php-parser won't work in php 8.0. Fixes #327 --- src/PhpTokenizer.php | 51 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/PhpTokenizer.php b/src/PhpTokenizer.php index 88be930b..43c5ddae 100644 --- a/src/PhpTokenizer.php +++ b/src/PhpTokenizer.php @@ -130,6 +130,57 @@ public static function getTokensArrayFromContent( $arr[] = new Token(TokenKind::ScriptSectionStartTag, $fullStart, $start, $pos-$fullStart); $start = $fullStart = $pos; break; + case \PHP_VERSION_ID >= 80000 ? \T_NAME_QUALIFIED : -1000: + case \PHP_VERSION_ID >= 80000 ? \T_NAME_FULLY_QUALIFIED : -1001: + // NOTE: This switch is called on every token of every file being parsed, so this traded performance for readability. + // + // PHP's Opcache is able to optimize switches that are exclusively known longs, + // but not switches that mix strings and longs or have unknown longs. + // Longs are only known if they're declared within the same *class* or an internal constant (tokenizer). + // + // For some reason, the SWITCH_LONG opcode was not generated when the expression was part of a class constant. + // (seen with php -d opcache.opt_debug_level=0x20000) + // + // Use negative values because that's not expected to overlap with token kinds that token_get_all() will return. + // + // T_NAME_* was added in php 8.0 to forbid whitespace between parts of names. + // Here, emulate the tokenization of php 7 by splitting it up into 1 or more tokens. + foreach (\explode('\\', $token[1]) as $i => $name) { + if ($i) { + $arr[] = new Token(TokenKind::BackslashToken, $fullStart, $start, 1 + $start - $fullStart); + $start++; + $fullStart = $start; + } + if ($name === '') { + continue; + } + // TODO: TokenStringMaps::RESERVED_WORDS[$name] ?? TokenKind::Name for compatibility? + $len = \strlen($name); + $arr[] = new Token(TokenKind::Name, $fullStart, $start, $len + $start - $fullStart); + $start += $len; + $fullStart = $start; + } + break; + case \PHP_VERSION_ID >= 80000 ? \T_NAME_RELATIVE : -1002: + // This is a namespace-relative name: namespace\... + foreach (\explode('\\', $token[1]) as $i => $name) { + $len = \strlen($name); + if (!$i) { + $arr[] = new Token(TokenKind::NamespaceKeyword, $fullStart, $start, $len + $start - $fullStart); + $start += $len; + $fullStart = $start; + continue; + } + $arr[] = new Token(TokenKind::BackslashToken, $fullStart, $start, 1); + $start++; + + // TODO: TokenStringMaps::RESERVED_WORDS[$name] ?? TokenKind::Name for compatibility? + $arr[] = new Token(TokenKind::Name, $start, $start, $len); + + $start += $len; + $fullStart = $start; + } + break; case \T_COMMENT: case \T_DOC_COMMENT: if ($treatCommentsAsTrivia) { From 2c63e8aec5eca47ae4c39980129f33b2e2f9b9e1 Mon Sep 17 00:00:00 2001 From: Tyson Andre Date: Sun, 2 Aug 2020 12:02:57 -0400 Subject: [PATCH 2/2] Support php 8.0's nullsafe operator See https://wiki.php.net/rfc/nullsafe_operator and the corresponding github implementation PR. --- src/Parser.php | 8 +- src/PhpTokenizer.php | 4 +- src/TokenKind.php | 1 + src/TokenStringMaps.php | 1 + tests/cases/parser80/nullsafe_operator1.php | 2 + .../parser80/nullsafe_operator1.php.diag | 1 + .../parser80/nullsafe_operator1.php.tree | 67 +++++++++++++++ tests/cases/parser80/nullsafe_operator2.php | 2 + .../parser80/nullsafe_operator2.php.diag | 1 + .../parser80/nullsafe_operator2.php.tree | 81 +++++++++++++++++++ .../parser80/nullsafe_operator3.php.diag | 1 + .../parser80/nullsafe_operator3.php.tree | 69 ++++++++++++++++ 12 files changed, 233 insertions(+), 5 deletions(-) create mode 100644 tests/cases/parser80/nullsafe_operator1.php create mode 100644 tests/cases/parser80/nullsafe_operator1.php.diag create mode 100644 tests/cases/parser80/nullsafe_operator1.php.tree create mode 100644 tests/cases/parser80/nullsafe_operator2.php create mode 100644 tests/cases/parser80/nullsafe_operator2.php.diag create mode 100644 tests/cases/parser80/nullsafe_operator2.php.tree create mode 100644 tests/cases/parser80/nullsafe_operator3.php.diag create mode 100644 tests/cases/parser80/nullsafe_operator3.php.tree diff --git a/src/Parser.php b/src/Parser.php index 54cddd05..cbb44437 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -1199,7 +1199,7 @@ private function parseTemplateStringExpression($parentNode) { $token = $this->getCurrentToken(); if ($token->kind === TokenKind::OpenBracketToken) { return $this->parseTemplateStringSubscriptExpression($var); - } else if ($token->kind === TokenKind::ArrowToken) { + } else if ($token->kind === TokenKind::ArrowToken || $token->kind === TokenKind::QuestionArrowToken) { return $this->parseTemplateStringMemberAccessExpression($var); } else { return $var; @@ -1248,7 +1248,7 @@ private function parseTemplateStringMemberAccessExpression($expression) : Member $expression->parent = $memberAccessExpression; $memberAccessExpression->dereferencableExpression = $expression; - $memberAccessExpression->arrowToken = $this->eat1(TokenKind::ArrowToken); + $memberAccessExpression->arrowToken = $this->eat(TokenKind::ArrowToken, TokenKind::QuestionArrowToken); $memberAccessExpression->memberName = $this->eat1(TokenKind::Name); return $memberAccessExpression; @@ -2711,7 +2711,7 @@ private function parsePostfixExpressionRest($expression, $allowUpdateExpression return $expression; } - if ($tokenKind === TokenKind::ArrowToken) { + if ($tokenKind === TokenKind::ArrowToken || $tokenKind === TokenKind::QuestionArrowToken) { $expression = $this->parseMemberAccessExpression($expression); return $this->parsePostfixExpressionRest($expression); } @@ -2836,7 +2836,7 @@ private function parseMemberAccessExpression($expression):MemberAccessExpression $expression->parent = $memberAccessExpression; $memberAccessExpression->dereferencableExpression = $expression; - $memberAccessExpression->arrowToken = $this->eat1(TokenKind::ArrowToken); + $memberAccessExpression->arrowToken = $this->eat(TokenKind::ArrowToken, TokenKind::QuestionArrowToken); $memberAccessExpression->memberName = $this->parseMemberName($memberAccessExpression); return $memberAccessExpression; diff --git a/src/PhpTokenizer.php b/src/PhpTokenizer.php index 43c5ddae..10c2df8a 100644 --- a/src/PhpTokenizer.php +++ b/src/PhpTokenizer.php @@ -10,8 +10,9 @@ // The replacement value is arbitrary - it just has to be different from other values of token constants. define(__NAMESPACE__ . '\T_COALESCE_EQUAL', defined('T_COALESCE_EQUAL') ? constant('T_COALESCE_EQUAL') : 'T_COALESCE_EQUAL'); define(__NAMESPACE__ . '\T_FN', defined('T_FN') ? constant('T_FN') : 'T_FN'); -// If this predaates PHP 8.0, T_MATCH is unavailable. The replacement value is arbitrary - it just has to be different from other values of token constants. +// If this predates PHP 8.0, T_MATCH is unavailable. The replacement value is arbitrary - it just has to be different from other values of token constants. define(__NAMESPACE__ . '\T_MATCH', defined('T_MATCH') ? constant('T_MATCH') : 'T_MATCH'); +define(__NAMESPACE__ . '\T_NULLSAFE_OBJECT_OPERATOR', defined('T_NULLSAFE_OBJECT_OPERATOR') ? constant('T_NULLSAFE_OBJECT_OPERATOR') : 'T_MATCH'); /** * Tokenizes content using PHP's built-in `token_get_all`, and converts to "lightweight" Token representation. @@ -307,6 +308,7 @@ protected static function tokenGetAll(string $content, $parseContext): array "}" => TokenKind::CloseBraceToken, "." => TokenKind::DotToken, T_OBJECT_OPERATOR => TokenKind::ArrowToken, + T_NULLSAFE_OBJECT_OPERATOR => TokenKind::QuestionArrowToken, T_INC => TokenKind::PlusPlusToken, T_DEC => TokenKind::MinusMinusToken, T_POW => TokenKind::AsteriskAsteriskToken, diff --git a/src/TokenKind.php b/src/TokenKind.php index 6207325b..26bb9d84 100644 --- a/src/TokenKind.php +++ b/src/TokenKind.php @@ -149,6 +149,7 @@ class TokenKind { const BacktickToken = 260; const QuestionToken = 261; const QuestionQuestionEqualsToken = 262; + const QuestionArrowToken = 263; const DecimalLiteralToken = 301; const OctalLiteralToken = 302; diff --git a/src/TokenStringMaps.php b/src/TokenStringMaps.php index bae4733a..e344a267 100644 --- a/src/TokenStringMaps.php +++ b/src/TokenStringMaps.php @@ -160,6 +160,7 @@ class TokenStringMaps { "^=" => TokenKind::CaretEqualsToken, "|=" => TokenKind::BarEqualsToken, "," => TokenKind::CommaToken, + "?->" => TokenKind::QuestionArrowToken, "??" => TokenKind::QuestionQuestionToken, "??=" => TokenKind::QuestionQuestionEqualsToken, "<=>" => TokenKind::LessThanEqualsGreaterThanToken, diff --git a/tests/cases/parser80/nullsafe_operator1.php b/tests/cases/parser80/nullsafe_operator1.php new file mode 100644 index 00000000..1a3303ba --- /dev/null +++ b/tests/cases/parser80/nullsafe_operator1.php @@ -0,0 +1,2 @@ +b; diff --git a/tests/cases/parser80/nullsafe_operator1.php.diag b/tests/cases/parser80/nullsafe_operator1.php.diag new file mode 100644 index 00000000..0637a088 --- /dev/null +++ b/tests/cases/parser80/nullsafe_operator1.php.diag @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/tests/cases/parser80/nullsafe_operator1.php.tree b/tests/cases/parser80/nullsafe_operator1.php.tree new file mode 100644 index 00000000..f01f7a4d --- /dev/null +++ b/tests/cases/parser80/nullsafe_operator1.php.tree @@ -0,0 +1,67 @@ +{ + "SourceFileNode": { + "statementList": [ + { + "InlineHtml": { + "scriptSectionEndTag": null, + "text": null, + "scriptSectionStartTag": { + "kind": "ScriptSectionStartTag", + "textLength": 6 + } + } + }, + { + "ExpressionStatement": { + "expression": { + "AssignmentExpression": { + "leftOperand": { + "Variable": { + "dollar": null, + "name": { + "kind": "VariableName", + "textLength": 2 + } + } + }, + "operator": { + "kind": "EqualsToken", + "textLength": 1 + }, + "byRef": null, + "rightOperand": { + "MemberAccessExpression": { + "dereferencableExpression": { + "Variable": { + "dollar": null, + "name": { + "kind": "VariableName", + "textLength": 2 + } + } + }, + "arrowToken": { + "kind": "QuestionArrowToken", + "textLength": 3 + }, + "memberName": { + "kind": "Name", + "textLength": 1 + } + } + } + } + }, + "semicolon": { + "kind": "SemicolonToken", + "textLength": 1 + } + } + } + ], + "endOfFileToken": { + "kind": "EndOfFileToken", + "textLength": 0 + } + } +} \ No newline at end of file diff --git a/tests/cases/parser80/nullsafe_operator2.php b/tests/cases/parser80/nullsafe_operator2.php new file mode 100644 index 00000000..e56ce8af --- /dev/null +++ b/tests/cases/parser80/nullsafe_operator2.php @@ -0,0 +1,2 @@ +foo(1); diff --git a/tests/cases/parser80/nullsafe_operator2.php.diag b/tests/cases/parser80/nullsafe_operator2.php.diag new file mode 100644 index 00000000..0637a088 --- /dev/null +++ b/tests/cases/parser80/nullsafe_operator2.php.diag @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/tests/cases/parser80/nullsafe_operator2.php.tree b/tests/cases/parser80/nullsafe_operator2.php.tree new file mode 100644 index 00000000..e7eb045d --- /dev/null +++ b/tests/cases/parser80/nullsafe_operator2.php.tree @@ -0,0 +1,81 @@ +{ + "SourceFileNode": { + "statementList": [ + { + "InlineHtml": { + "scriptSectionEndTag": null, + "text": null, + "scriptSectionStartTag": { + "kind": "ScriptSectionStartTag", + "textLength": 6 + } + } + }, + { + "ExpressionStatement": { + "expression": { + "CallExpression": { + "callableExpression": { + "MemberAccessExpression": { + "dereferencableExpression": { + "Variable": { + "dollar": null, + "name": { + "kind": "VariableName", + "textLength": 2 + } + } + }, + "arrowToken": { + "kind": "QuestionArrowToken", + "textLength": 3 + }, + "memberName": { + "kind": "Name", + "textLength": 3 + } + } + }, + "openParen": { + "kind": "OpenParenToken", + "textLength": 1 + }, + "argumentExpressionList": { + "ArgumentExpressionList": { + "children": [ + { + "ArgumentExpression": { + "byRefToken": null, + "dotDotDotToken": null, + "expression": { + "NumericLiteral": { + "children": { + "kind": "IntegerLiteralToken", + "textLength": 1 + } + } + } + } + } + ] + } + }, + "closeParen": { + "kind": "CloseParenToken", + "textLength": 1 + } + } + }, + "semicolon": { + "kind": "SemicolonToken", + "textLength": 1 + } + } + } + ], + "endOfFileToken": { + "kind": "EndOfFileToken", + "textLength": 0 + } + } +} \ No newline at end of file diff --git a/tests/cases/parser80/nullsafe_operator3.php.diag b/tests/cases/parser80/nullsafe_operator3.php.diag new file mode 100644 index 00000000..0637a088 --- /dev/null +++ b/tests/cases/parser80/nullsafe_operator3.php.diag @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/tests/cases/parser80/nullsafe_operator3.php.tree b/tests/cases/parser80/nullsafe_operator3.php.tree new file mode 100644 index 00000000..cf677531 --- /dev/null +++ b/tests/cases/parser80/nullsafe_operator3.php.tree @@ -0,0 +1,69 @@ +{ + "SourceFileNode": { + "statementList": [ + { + "InlineHtml": { + "scriptSectionEndTag": null, + "text": null, + "scriptSectionStartTag": { + "kind": "ScriptSectionStartTag", + "textLength": 6 + } + } + }, + { + "ExpressionStatement": { + "expression": { + "ObjectCreationExpression": { + "newKeword": { + "kind": "NewKeyword", + "textLength": 3 + }, + "classTypeDesignator": { + "MemberAccessExpression": { + "dereferencableExpression": { + "Variable": { + "dollar": null, + "name": { + "kind": "VariableName", + "textLength": 2 + } + } + }, + "arrowToken": { + "kind": "QuestionArrowToken", + "textLength": 3 + }, + "memberName": { + "kind": "Name", + "textLength": 1 + } + } + }, + "openParen": { + "kind": "OpenParenToken", + "textLength": 1 + }, + "argumentExpressionList": null, + "closeParen": { + "kind": "CloseParenToken", + "textLength": 1 + }, + "classBaseClause": null, + "classInterfaceClause": null, + "classMembers": null + } + }, + "semicolon": { + "kind": "SemicolonToken", + "textLength": 1 + } + } + } + ], + "endOfFileToken": { + "kind": "EndOfFileToken", + "textLength": 0 + } + } +} \ No newline at end of file