diff --git a/src/Parser.php b/src/Parser.php index 722c447e..5687d751 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -135,6 +135,21 @@ public function __construct() { $this->returnTypeDeclarationTokens = \array_merge([TokenKind::VoidReservedWord, TokenKind::NullReservedWord, TokenKind::FalseReservedWord, TokenKind::StaticKeyword], $this->parameterTypeDeclarationTokens); } + /** + * This method exists so that it can be overridden in subclasses. + * Any subclass must return a token stream that is equivalent to the contents in $fileContents for this to work properly. + * + * Possible reasons for applications to override the lexer: + * + * - Imitate token stream of a newer/older PHP version (e.g. T_FN is only available in php 7.4) + * - Reuse the result of token_get_all to create a Node again. + * - Reuse the result of token_get_all in a different library. + */ + protected function makeLexer(string $fileContents): TokenStreamProviderInterface + { + return TokenStreamProviderFactory::GetTokenStreamProvider($fileContents); + } + /** * Generates AST from source file contents. Returns an instance of SourceFileNode, which is always the top-most * Node-type of the tree. @@ -143,7 +158,7 @@ public function __construct() { * @return SourceFileNode */ public function parseSourceFile(string $fileContents, string $uri = null) : SourceFileNode { - $this->lexer = TokenStreamProviderFactory::GetTokenStreamProvider($fileContents); + $this->lexer = $this->makeLexer($fileContents); $this->reset(); diff --git a/src/PhpTokenizer.php b/src/PhpTokenizer.php index 756cd159..1cc96754 100644 --- a/src/PhpTokenizer.php +++ b/src/PhpTokenizer.php @@ -74,7 +74,7 @@ public static function getTokensArrayFromContent( $content = $prefix . $content; } - $tokens = @\token_get_all($content); + $tokens = static::tokenGetAll($content, $parseContext); $arr = array(); $fullStart = $start = $pos = $initialPos; @@ -147,6 +147,22 @@ public static function getTokensArrayFromContent( return $arr; } + /** + * @param string $content the raw php code + * @param ?int $parseContext can be SourceElements when extracting doc comments. + * Having this available may be useful for subclasses to decide whether or not to post-process results, cache results, etc. + * @return array[]|string[] an array of tokens. When concatenated, these tokens must equal $content. + * + * This exists so that it can be overridden in subclasses, e.g. to cache the result of tokenizing entire files. + * Applications using tolerant-php-parser may often end up needing to use the token stream for other reasons that are hard to do in the resulting AST, + * such as iterating over T_COMMENTS, checking for inline html, + * looking up all tokens (including skipped tokens) on a given line, etc. + */ + protected static function tokenGetAll(string $content, $parseContext): array + { + return @\token_get_all($content); + } + const TOKEN_MAP = [ T_CLASS_C => TokenKind::Name, T_DIR => TokenKind::Name,