Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/Extensions/StandardExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ public function getTags(): array
Tags\ContinueTag::class,
Tags\CycleTag::class,
Tags\DecrementTag::class,
Tags\DocTag::class,
Tags\EchoTag::class,
Tags\ForTag::class,
Tags\IfChanged::class,
Tags\IfTag::class,
Tags\IncrementTag::class,
Tags\LiquidTag::class,
Tags\RawTag::class,
Tags\RenderTag::class,
Tags\TableRowTag::class,
Tags\UnlessTag::class,
Expand Down
148 changes: 92 additions & 56 deletions src/Parse/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Keepsuit\Liquid\Parse;

use Keepsuit\Liquid\Exceptions\SyntaxException;
use Keepsuit\Liquid\TagBlock;
use RuntimeException;

class Lexer
Expand Down Expand Up @@ -30,12 +31,17 @@ class Lexer
protected array $tokens;

/**
* @var array<int, array<int, array{0:string,1:int}>>
* @var array<int, array{0:string,1:int}>
*/
protected array $positions;

protected int $position;

/**
* @var string[]
*/
protected array $rawBodyTags;

public function __construct(
protected ParseContext $parseContext,
) {}
Expand All @@ -53,10 +59,17 @@ public function tokenize(string $source): TokenStream
$this->state = LexerState::Data;
$this->tokens = [];

$this->rawBodyTags = array_keys(array_filter($this->parseContext->environment->tagRegistry->all(), function ($tag) {
if (! is_subclass_of($tag, TagBlock::class)) {
return false;
}

return $tag::hasRawBody();
}));

$this->parseContext->lineNumber = 1;

preg_match_all(LexerOptions::tokenStartRegex(), $this->source, $matches, PREG_OFFSET_CAPTURE);
$this->positions = $matches;
$this->positions = $this->extractTokenStarts($this->source);
$this->position = -1;

while ($this->cursor < $this->end) {
Expand All @@ -79,42 +92,36 @@ public function tokenize(string $source): TokenStream
protected function lexData(): void
{
// if no matches are left we return the rest of the template as simple text token
if ($this->position == count($this->positions[0]) - 1) {
if ($this->position == count($this->positions) - 1) {
$this->pushToken(TokenType::TextData, substr($this->source, $this->cursor));
$this->cursor = $this->end;

return;
}

// Find the first token after the current cursor
$position = $this->positions[0][++$this->position];
$position = $this->positions[++$this->position];
while ($position[1] < $this->cursor) {
if ($this->position == count($this->positions[0]) - 1) {
if ($this->position == count($this->positions) - 1) {
return;
}
$position = $this->positions[0][++$this->position];
$position = $this->positions[++$this->position];
}

// push the template text before the token first
$text = $textBeforeToken = substr($this->source, $this->cursor, $position[1] - $this->cursor);

// trim?
if ($this->positions[2][$this->position][0] === LexerOptions::WhitespaceTrim->value) {
if (($this->positions[$this->position][0][2] ?? null) === LexerOptions::WhitespaceTrim->value) {
$textBeforeToken = rtrim($textBeforeToken);
}

$this->pushToken(TokenType::TextData, $textBeforeToken);
$this->moveCursor($text.$position[0]);

switch ($this->positions[1][$this->position][0]) {
switch ($this->positions[$this->position][0]) {
case LexerOptions::TagBlockStart->value:
// {% raw %}
if (preg_match(LexerOptions::blockRawStartRegex(), $this->source, $matches, offset: $this->cursor) === 1) {
$this->moveCursor($matches[0]);
$this->lexRawData();
break;
}

case LexerOptions::TagBlockStart->value.LexerOptions::WhitespaceTrim->value:
// {% comment %}
if (preg_match(LexerOptions::blockCommentStartRegex(), $this->source, $matches, offset: $this->cursor) === 1) {
$this->moveCursor($matches[0]);
Expand All @@ -127,6 +134,7 @@ protected function lexData(): void
$this->currentVarBlockLine = $this->lineNumber;
break;
case LexerOptions::TagVariableStart->value:
case LexerOptions::TagVariableStart->value.LexerOptions::WhitespaceTrim->value:
$this->pushToken(TokenType::VariableStart);
$this->pushState(LexerState::Variable);
$this->currentVarBlockLine = $this->lineNumber;
Expand All @@ -145,9 +153,8 @@ protected function lexVariable(): void
$this->popState();

// trim?
if (trim($matches[0])[0] === LexerOptions::WhitespaceTrim->value) {
preg_match('/\s+/A', $this->source, $matches, offset: $this->cursor);
$this->moveCursor($matches[0] ?? '');
if ($matches[1][0] === LexerOptions::WhitespaceTrim->value) {
$this->trimWhitespaces();
}
} else {
$this->lexExpression();
Expand All @@ -159,18 +166,40 @@ protected function lexVariable(): void
*/
protected function lexBlock(): void
{
if (preg_match(LexerOptions::blockEndRegex(), $this->source, $matches, offset: $this->cursor) === 1) {
$this->pushToken(TokenType::BlockEnd);
$this->moveCursor($matches[0]);
$this->popState();
$tag = null;

// trim?
if (trim($matches[0])[0] === LexerOptions::WhitespaceTrim->value) {
preg_match('/\s+/A', $this->source, $matches, offset: $this->cursor);
$this->moveCursor($matches[0] ?? '');
// Parse the full expression inside {% ... %}
while (preg_match(LexerOptions::blockEndRegex(), $this->source, $matches, offset: $this->cursor) !== 1) {
$this->lexExpression();

$lastToken = $this->tokens[array_key_last($this->tokens)];

if ($tag === null && $lastToken->type === TokenType::Identifier) {
$tag = $lastToken;
}
}

// Move the cursor to the end of the block
$this->moveCursor($matches[0]);

// trim?
if ($matches[1][0] === LexerOptions::WhitespaceTrim->value) {
$this->trimWhitespaces();
}

// If the last token is a block start, we remove the node
$lastToken = $this->tokens[array_key_last($this->tokens)];
if ($lastToken->type === TokenType::BlockStart) {
array_pop($this->tokens);
} else {
$this->lexExpression();
$this->pushToken(TokenType::BlockEnd);
}

$this->popState();

// If the tag is a raw body tag, we need to lex the body as raw data instead of liquid blocks
if ($tag !== null && in_array($tag->data, $this->rawBodyTags, true)) {
$this->laxRawBodyTag($tag->data);
}
}

Expand Down Expand Up @@ -227,23 +256,27 @@ protected function ensureStreamNotEnded(): void
}
}

protected function lexRawData(): void
protected function laxRawBodyTag(string $tag): void
{
if (preg_match(LexerOptions::blockRawDataRegex(), $this->source, $matches, flags: PREG_OFFSET_CAPTURE, offset: $this->cursor) !== 1) {
throw SyntaxException::tagBlockNeverClosed('raw');
if (preg_match(LexerOptions::blockRawBodyTagDataRegex($tag), $this->source, $matches, flags: PREG_OFFSET_CAPTURE, offset: $this->cursor) !== 1) {
throw SyntaxException::tagBlockNeverClosed($tag);
}

$text = substr($this->source, $this->cursor, $matches[0][1] - $this->cursor);
$rawBody = substr($this->source, $this->cursor, $matches[0][1] - $this->cursor);

$this->moveCursor($text.$matches[0][0]);
$this->moveCursor($rawBody);

// trim?
if (isset($matches[2][0])) {
preg_match('/\s+/A', $this->source, $matches2, offset: $this->cursor);
$this->moveCursor($matches2[0] ?? '');
// inner trim?
if (($matches[1][0][2] ?? null) === LexerOptions::WhitespaceTrim->value) {
$rawBody = rtrim($rawBody);
}

$this->pushToken(TokenType::RawData, $text);
$this->pushToken(TokenType::RawData, $rawBody);

// trim?
if ($matches[2][0][0] === LexerOptions::WhitespaceTrim->value) {
$this->trimWhitespaces();
}
}

protected function lexComment(): void
Expand All @@ -265,24 +298,7 @@ protected function lexInlineComment(): void

$text = substr($this->source, $this->cursor, $matches[0][1] - $this->cursor);

$this->moveCursor($text.$matches[0][0]);

if ($matches[1][0] === "\n") {
return;
}

$lastToken = $this->tokens[count($this->tokens) - 1] ?? null;

if ($lastToken?->type === TokenType::BlockStart) {
array_pop($this->tokens);
} else {
$this->pushToken(TokenType::BlockEnd);
}

if ($matches[1][0] === LexerOptions::WhitespaceTrim->value) {
preg_match('/\s+/A', $this->source, $matches2, offset: $this->cursor);
$this->moveCursor($matches2[0] ?? '');
}
$this->moveCursor($text);
}

protected function pushToken(TokenType $type, string $value = ''): void
Expand Down Expand Up @@ -322,4 +338,24 @@ protected function popState(): void

$this->state = $state;
}

protected function trimWhitespaces(): void
{
preg_match('/\s+/A', $this->source, $matches, offset: $this->cursor);
$this->moveCursor($matches[0] ?? '');
}

/**
* @return array<int,array{0:string,1:int}>
*/
protected function extractTokenStarts(string $source): array
{
preg_match_all(LexerOptions::blockStartRegex(), $source, $blocks, PREG_OFFSET_CAPTURE);
preg_match_all(LexerOptions::variableStartRegex(), $source, $variables, PREG_OFFSET_CAPTURE);

$positions = array_merge($blocks[0], $variables[0]);
usort($positions, fn (array $a, array $b) => $a[1] <=> $b[1]);

return $positions;
}
}
45 changes: 15 additions & 30 deletions src/Parse/LexerOptions.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@ enum LexerOptions: string

case WhitespaceTrim = '-';

public static function tokenStartRegex(): string
public static function blockStartRegex(): string
{
static $regex;

if ($regex === null) {
$regex = sprintf(
'{(%s|%s)(%s)?}sx',
preg_quote(LexerOptions::TagVariableStart->value),
'{(%s%s?)}sx',
preg_quote(LexerOptions::TagBlockStart->value),
preg_quote(LexerOptions::WhitespaceTrim->value)
);
Expand All @@ -33,15 +32,15 @@ public static function tokenStartRegex(): string
return $regex;
}

public static function commentBlockRegex(): string
public static function variableStartRegex(): string
{
static $regex;

if ($regex === null) {
$regex = sprintf(
"{\s*comment\s*(?:%s|%s')}Asx",
preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagBlockEnd->value),
preg_quote(LexerOptions::TagBlockEnd->value),
'{(%s%s?)}sx',
preg_quote(LexerOptions::TagVariableStart->value),
preg_quote(LexerOptions::WhitespaceTrim->value)
);
}

Expand All @@ -54,7 +53,7 @@ public static function variableEndRegex(): string

if ($regex === null) {
$regex = sprintf(
'{\s*(?:%s|%s)}Ax',
'{\s*(%s|%s)}Ax',
preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagVariableEnd->value),
preg_quote(LexerOptions::TagVariableEnd->value),
);
Expand All @@ -69,22 +68,7 @@ public static function blockEndRegex(): string

if ($regex === null) {
$regex = sprintf(
'{\s*(?:%s|%s)}Ax',
preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagBlockEnd->value),
preg_quote(LexerOptions::TagBlockEnd->value),
);
}

return $regex;
}

public static function blockRawStartRegex(): string
{
static $regex;

if ($regex === null) {
$regex = sprintf(
'{\s*raw\s*(?:%s|%s)}Ax',
'{\s*(%s|%s)}Ax',
preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagBlockEnd->value),
preg_quote(LexerOptions::TagBlockEnd->value),
);
Expand All @@ -93,21 +77,22 @@ public static function blockRawStartRegex(): string
return $regex;
}

public static function blockRawDataRegex(): string
public static function blockRawBodyTagDataRegex(string $tag): string
{
static $regex;
static $regex = [];

if ($regex === null) {
$regex = sprintf(
'{%s(%s)?\s*endraw\s*(%s)?%s}sx',
if (($regex[$tag] ?? null) === null) {
$regex[$tag] = sprintf(
'{(%s%s?)\s*end%s\s*(%s?%s)}sx',
preg_quote(LexerOptions::TagBlockStart->value),
LexerOptions::WhitespaceTrim->value,
preg_quote($tag),
LexerOptions::WhitespaceTrim->value,
preg_quote(LexerOptions::TagBlockEnd->value),
);
}

return $regex;
return $regex[$tag];
}

public static function blockCommentStartRegex(): string
Expand Down
5 changes: 5 additions & 0 deletions src/TagBlock.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,9 @@ public function parseTreeVisitorChildren(): array
{
return [];
}

public static function hasRawBody(): bool
{
return false;
}
}
Loading