Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion src/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,21 @@ public function __construct() {
$this->returnTypeDeclarationTokens = \array_merge([TokenKind::VoidReservedWord, TokenKind::NullReservedWord, TokenKind::FalseReservedWord, TokenKind::StaticKeyword], $this->parameterTypeDeclarationTokens);
}

/**
* This method exists so that it can be overridden in subclasses.
* Any subclass must return a token stream that is equivalent to the contents in $fileContents for this to work properly.
*
* Possible reasons for applications to override the lexer:
*
* - Imitate token stream of a newer/older PHP version (e.g. T_FN is only available in php 7.4)
* - Reuse the result of token_get_all to create a Node again.
* - Reuse the result of token_get_all in a different library.
*/
protected function makeLexer(string $fileContents): TokenStreamProviderInterface
{
return TokenStreamProviderFactory::GetTokenStreamProvider($fileContents);
}

/**
* Generates AST from source file contents. Returns an instance of SourceFileNode, which is always the top-most
* Node-type of the tree.
Expand All @@ -143,7 +158,7 @@ public function __construct() {
* @return SourceFileNode
*/
public function parseSourceFile(string $fileContents, string $uri = null) : SourceFileNode {
$this->lexer = TokenStreamProviderFactory::GetTokenStreamProvider($fileContents);
$this->lexer = $this->makeLexer($fileContents);

$this->reset();

Expand Down
18 changes: 17 additions & 1 deletion src/PhpTokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public static function getTokensArrayFromContent(
$content = $prefix . $content;
}

$tokens = @\token_get_all($content);
$tokens = static::tokenGetAll($content, $parseContext);

$arr = array();
$fullStart = $start = $pos = $initialPos;
Expand Down Expand Up @@ -147,6 +147,22 @@ public static function getTokensArrayFromContent(
return $arr;
}

/**
* @param string $content the raw php code
* @param ?int $parseContext can be SourceElements when extracting doc comments.
* Having this available may be useful for subclasses to decide whether or not to post-process results, cache results, etc.
* @return array[]|string[] an array of tokens. When concatenated, these tokens must equal $content.
*
* This exists so that it can be overridden in subclasses, e.g. to cache the result of tokenizing entire files.
* Applications using tolerant-php-parser may often end up needing to use the token stream for other reasons that are hard to do in the resulting AST,
* such as iterating over T_COMMENTS, checking for inline html,
* looking up all tokens (including skipped tokens) on a given line, etc.
*/
protected static function tokenGetAll(string $content, $parseContext): array
{
return @\token_get_all($content);
}

const TOKEN_MAP = [
T_CLASS_C => TokenKind::Name,
T_DIR => TokenKind::Name,
Expand Down