diff --git a/src/Extensions/StandardExtension.php b/src/Extensions/StandardExtension.php index f5c6245..88f4c11 100644 --- a/src/Extensions/StandardExtension.php +++ b/src/Extensions/StandardExtension.php @@ -17,12 +17,14 @@ public function getTags(): array Tags\ContinueTag::class, Tags\CycleTag::class, Tags\DecrementTag::class, + Tags\DocTag::class, Tags\EchoTag::class, Tags\ForTag::class, Tags\IfChanged::class, Tags\IfTag::class, Tags\IncrementTag::class, Tags\LiquidTag::class, + Tags\RawTag::class, Tags\RenderTag::class, Tags\TableRowTag::class, Tags\UnlessTag::class, diff --git a/src/Parse/Lexer.php b/src/Parse/Lexer.php index abe2e3c..c8fb57e 100644 --- a/src/Parse/Lexer.php +++ b/src/Parse/Lexer.php @@ -3,6 +3,7 @@ namespace Keepsuit\Liquid\Parse; use Keepsuit\Liquid\Exceptions\SyntaxException; +use Keepsuit\Liquid\TagBlock; use RuntimeException; class Lexer @@ -30,12 +31,17 @@ class Lexer protected array $tokens; /** - * @var array> + * @var array */ protected array $positions; protected int $position; + /** + * @var string[] + */ + protected array $rawBodyTags; + public function __construct( protected ParseContext $parseContext, ) {} @@ -53,10 +59,17 @@ public function tokenize(string $source): TokenStream $this->state = LexerState::Data; $this->tokens = []; + $this->rawBodyTags = array_keys(array_filter($this->parseContext->environment->tagRegistry->all(), function ($tag) { + if (! is_subclass_of($tag, TagBlock::class)) { + return false; + } + + return $tag::hasRawBody(); + })); + $this->parseContext->lineNumber = 1; - preg_match_all(LexerOptions::tokenStartRegex(), $this->source, $matches, PREG_OFFSET_CAPTURE); - $this->positions = $matches; + $this->positions = $this->extractTokenStarts($this->source); $this->position = -1; while ($this->cursor < $this->end) { @@ -79,7 +92,7 @@ public function tokenize(string $source): TokenStream protected function lexData(): void { // if no matches are left we return the rest of the template as simple text token - if ($this->position == count($this->positions[0]) - 1) { + if ($this->position == count($this->positions) - 1) { $this->pushToken(TokenType::TextData, substr($this->source, $this->cursor)); $this->cursor = $this->end; @@ -87,34 +100,28 @@ protected function lexData(): void } // Find the first token after the current cursor - $position = $this->positions[0][++$this->position]; + $position = $this->positions[++$this->position]; while ($position[1] < $this->cursor) { - if ($this->position == count($this->positions[0]) - 1) { + if ($this->position == count($this->positions) - 1) { return; } - $position = $this->positions[0][++$this->position]; + $position = $this->positions[++$this->position]; } // push the template text before the token first $text = $textBeforeToken = substr($this->source, $this->cursor, $position[1] - $this->cursor); // trim? - if ($this->positions[2][$this->position][0] === LexerOptions::WhitespaceTrim->value) { + if (($this->positions[$this->position][0][2] ?? null) === LexerOptions::WhitespaceTrim->value) { $textBeforeToken = rtrim($textBeforeToken); } $this->pushToken(TokenType::TextData, $textBeforeToken); $this->moveCursor($text.$position[0]); - switch ($this->positions[1][$this->position][0]) { + switch ($this->positions[$this->position][0]) { case LexerOptions::TagBlockStart->value: - // {% raw %} - if (preg_match(LexerOptions::blockRawStartRegex(), $this->source, $matches, offset: $this->cursor) === 1) { - $this->moveCursor($matches[0]); - $this->lexRawData(); - break; - } - + case LexerOptions::TagBlockStart->value.LexerOptions::WhitespaceTrim->value: // {% comment %} if (preg_match(LexerOptions::blockCommentStartRegex(), $this->source, $matches, offset: $this->cursor) === 1) { $this->moveCursor($matches[0]); @@ -127,6 +134,7 @@ protected function lexData(): void $this->currentVarBlockLine = $this->lineNumber; break; case LexerOptions::TagVariableStart->value: + case LexerOptions::TagVariableStart->value.LexerOptions::WhitespaceTrim->value: $this->pushToken(TokenType::VariableStart); $this->pushState(LexerState::Variable); $this->currentVarBlockLine = $this->lineNumber; @@ -145,9 +153,8 @@ protected function lexVariable(): void $this->popState(); // trim? - if (trim($matches[0])[0] === LexerOptions::WhitespaceTrim->value) { - preg_match('/\s+/A', $this->source, $matches, offset: $this->cursor); - $this->moveCursor($matches[0] ?? ''); + if ($matches[1][0] === LexerOptions::WhitespaceTrim->value) { + $this->trimWhitespaces(); } } else { $this->lexExpression(); @@ -159,18 +166,40 @@ protected function lexVariable(): void */ protected function lexBlock(): void { - if (preg_match(LexerOptions::blockEndRegex(), $this->source, $matches, offset: $this->cursor) === 1) { - $this->pushToken(TokenType::BlockEnd); - $this->moveCursor($matches[0]); - $this->popState(); + $tag = null; - // trim? - if (trim($matches[0])[0] === LexerOptions::WhitespaceTrim->value) { - preg_match('/\s+/A', $this->source, $matches, offset: $this->cursor); - $this->moveCursor($matches[0] ?? ''); + // Parse the full expression inside {% ... %} + while (preg_match(LexerOptions::blockEndRegex(), $this->source, $matches, offset: $this->cursor) !== 1) { + $this->lexExpression(); + + $lastToken = $this->tokens[array_key_last($this->tokens)]; + + if ($tag === null && $lastToken->type === TokenType::Identifier) { + $tag = $lastToken; } + } + + // Move the cursor to the end of the block + $this->moveCursor($matches[0]); + + // trim? + if ($matches[1][0] === LexerOptions::WhitespaceTrim->value) { + $this->trimWhitespaces(); + } + + // If the last token is a block start, we remove the node + $lastToken = $this->tokens[array_key_last($this->tokens)]; + if ($lastToken->type === TokenType::BlockStart) { + array_pop($this->tokens); } else { - $this->lexExpression(); + $this->pushToken(TokenType::BlockEnd); + } + + $this->popState(); + + // If the tag is a raw body tag, we need to lex the body as raw data instead of liquid blocks + if ($tag !== null && in_array($tag->data, $this->rawBodyTags, true)) { + $this->laxRawBodyTag($tag->data); } } @@ -227,23 +256,27 @@ protected function ensureStreamNotEnded(): void } } - protected function lexRawData(): void + protected function laxRawBodyTag(string $tag): void { - if (preg_match(LexerOptions::blockRawDataRegex(), $this->source, $matches, flags: PREG_OFFSET_CAPTURE, offset: $this->cursor) !== 1) { - throw SyntaxException::tagBlockNeverClosed('raw'); + if (preg_match(LexerOptions::blockRawBodyTagDataRegex($tag), $this->source, $matches, flags: PREG_OFFSET_CAPTURE, offset: $this->cursor) !== 1) { + throw SyntaxException::tagBlockNeverClosed($tag); } - $text = substr($this->source, $this->cursor, $matches[0][1] - $this->cursor); + $rawBody = substr($this->source, $this->cursor, $matches[0][1] - $this->cursor); - $this->moveCursor($text.$matches[0][0]); + $this->moveCursor($rawBody); - // trim? - if (isset($matches[2][0])) { - preg_match('/\s+/A', $this->source, $matches2, offset: $this->cursor); - $this->moveCursor($matches2[0] ?? ''); + // inner trim? + if (($matches[1][0][2] ?? null) === LexerOptions::WhitespaceTrim->value) { + $rawBody = rtrim($rawBody); } - $this->pushToken(TokenType::RawData, $text); + $this->pushToken(TokenType::RawData, $rawBody); + + // trim? + if ($matches[2][0][0] === LexerOptions::WhitespaceTrim->value) { + $this->trimWhitespaces(); + } } protected function lexComment(): void @@ -265,24 +298,7 @@ protected function lexInlineComment(): void $text = substr($this->source, $this->cursor, $matches[0][1] - $this->cursor); - $this->moveCursor($text.$matches[0][0]); - - if ($matches[1][0] === "\n") { - return; - } - - $lastToken = $this->tokens[count($this->tokens) - 1] ?? null; - - if ($lastToken?->type === TokenType::BlockStart) { - array_pop($this->tokens); - } else { - $this->pushToken(TokenType::BlockEnd); - } - - if ($matches[1][0] === LexerOptions::WhitespaceTrim->value) { - preg_match('/\s+/A', $this->source, $matches2, offset: $this->cursor); - $this->moveCursor($matches2[0] ?? ''); - } + $this->moveCursor($text); } protected function pushToken(TokenType $type, string $value = ''): void @@ -322,4 +338,24 @@ protected function popState(): void $this->state = $state; } + + protected function trimWhitespaces(): void + { + preg_match('/\s+/A', $this->source, $matches, offset: $this->cursor); + $this->moveCursor($matches[0] ?? ''); + } + + /** + * @return array + */ + protected function extractTokenStarts(string $source): array + { + preg_match_all(LexerOptions::blockStartRegex(), $source, $blocks, PREG_OFFSET_CAPTURE); + preg_match_all(LexerOptions::variableStartRegex(), $source, $variables, PREG_OFFSET_CAPTURE); + + $positions = array_merge($blocks[0], $variables[0]); + usort($positions, fn (array $a, array $b) => $a[1] <=> $b[1]); + + return $positions; + } } diff --git a/src/Parse/LexerOptions.php b/src/Parse/LexerOptions.php index e57e012..62b11ff 100644 --- a/src/Parse/LexerOptions.php +++ b/src/Parse/LexerOptions.php @@ -17,14 +17,13 @@ enum LexerOptions: string case WhitespaceTrim = '-'; - public static function tokenStartRegex(): string + public static function blockStartRegex(): string { static $regex; if ($regex === null) { $regex = sprintf( - '{(%s|%s)(%s)?}sx', - preg_quote(LexerOptions::TagVariableStart->value), + '{(%s%s?)}sx', preg_quote(LexerOptions::TagBlockStart->value), preg_quote(LexerOptions::WhitespaceTrim->value) ); @@ -33,15 +32,15 @@ public static function tokenStartRegex(): string return $regex; } - public static function commentBlockRegex(): string + public static function variableStartRegex(): string { static $regex; if ($regex === null) { $regex = sprintf( - "{\s*comment\s*(?:%s|%s')}Asx", - preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagBlockEnd->value), - preg_quote(LexerOptions::TagBlockEnd->value), + '{(%s%s?)}sx', + preg_quote(LexerOptions::TagVariableStart->value), + preg_quote(LexerOptions::WhitespaceTrim->value) ); } @@ -54,7 +53,7 @@ public static function variableEndRegex(): string if ($regex === null) { $regex = sprintf( - '{\s*(?:%s|%s)}Ax', + '{\s*(%s|%s)}Ax', preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagVariableEnd->value), preg_quote(LexerOptions::TagVariableEnd->value), ); @@ -69,22 +68,7 @@ public static function blockEndRegex(): string if ($regex === null) { $regex = sprintf( - '{\s*(?:%s|%s)}Ax', - preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagBlockEnd->value), - preg_quote(LexerOptions::TagBlockEnd->value), - ); - } - - return $regex; - } - - public static function blockRawStartRegex(): string - { - static $regex; - - if ($regex === null) { - $regex = sprintf( - '{\s*raw\s*(?:%s|%s)}Ax', + '{\s*(%s|%s)}Ax', preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagBlockEnd->value), preg_quote(LexerOptions::TagBlockEnd->value), ); @@ -93,21 +77,22 @@ public static function blockRawStartRegex(): string return $regex; } - public static function blockRawDataRegex(): string + public static function blockRawBodyTagDataRegex(string $tag): string { - static $regex; + static $regex = []; - if ($regex === null) { - $regex = sprintf( - '{%s(%s)?\s*endraw\s*(%s)?%s}sx', + if (($regex[$tag] ?? null) === null) { + $regex[$tag] = sprintf( + '{(%s%s?)\s*end%s\s*(%s?%s)}sx', preg_quote(LexerOptions::TagBlockStart->value), LexerOptions::WhitespaceTrim->value, + preg_quote($tag), LexerOptions::WhitespaceTrim->value, preg_quote(LexerOptions::TagBlockEnd->value), ); } - return $regex; + return $regex[$tag]; } public static function blockCommentStartRegex(): string diff --git a/src/TagBlock.php b/src/TagBlock.php index 32254ba..8bfecff 100644 --- a/src/TagBlock.php +++ b/src/TagBlock.php @@ -20,4 +20,9 @@ public function parseTreeVisitorChildren(): array { return []; } + + public static function hasRawBody(): bool + { + return false; + } } diff --git a/src/Tags/DocTag.php b/src/Tags/DocTag.php new file mode 100644 index 0000000..817dcd9 --- /dev/null +++ b/src/Tags/DocTag.php @@ -0,0 +1,62 @@ +params->assertEnd(); + + assert($context->body instanceof BodyNode); + + $body = $context->body->children()[0] ?? null; + $this->body = match (true) { + $body instanceof Raw => $body, + default => throw new SyntaxException('doc tag must have a single raw body'), + }; + + $this->ensureNoNestedDocTags(); + + return $this; + } + + public function render(RenderContext $context): string + { + return ''; + } + + /** + * @throws SyntaxException + */ + protected function ensureNoNestedDocTags(): void + { + if (preg_match('/{%-?\s*doc\s*-?%}/', $this->body->value) === 1) { + throw new SyntaxException('Nested doc tags are not allowed'); + } + } + + public function getBody(): Raw + { + return $this->body; + } +} diff --git a/src/Tags/RawTag.php b/src/Tags/RawTag.php new file mode 100644 index 0000000..d3eabe2 --- /dev/null +++ b/src/Tags/RawTag.php @@ -0,0 +1,50 @@ +params->assertEnd(); + + assert($context->body instanceof BodyNode); + + $body = $context->body->children()[0] ?? null; + $this->body = match (true) { + $body instanceof Raw => $body, + default => throw new SyntaxException('raw tag must have a single raw body'), + }; + + return $this; + } + + public function render(RenderContext $context): string + { + return $this->body->render($context); + } + + public function getBody(): Raw + { + return $this->body; + } +} diff --git a/tests/Integration/Tags/DocTagTest.php b/tests/Integration/Tags/DocTagTest.php new file mode 100644 index 0000000..95dd6db --- /dev/null +++ b/tests/Integration/Tags/DocTagTest.php @@ -0,0 +1,162 @@ + new \Keepsuit\Liquid\Tests\Stubs\ErrorDrop], + renderErrors: true + ); +}); + +test('doc tag whitespace control', function () { + assertTemplateResult('Hello!', ' {%- doc -%}123{%- enddoc -%}Hello!'); + assertTemplateResult('Hello!', '{%- doc -%}123{%- enddoc -%} Hello!'); + assertTemplateResult('Hello!', ' {%- doc -%}123{%- enddoc -%} Hello!'); + assertTemplateResult('Hello!', <<<'LIQUID' + {%- doc %}Whitespace control!{% enddoc -%} + Hello! + LIQUID); +}); + +test('doc tag delimiter handling', function () { + assertTemplateResult('', <<<'LIQUID' + {% if true -%} + {% doc %} + {% docEXTRA %}wut{% enddocEXTRA %}xyz + {% enddoc %} + {%- endif %} + LIQUID); + assertMatchSyntaxError("Liquid syntax error (line 1): 'doc' tag was never closed", '{% doc %}123{% enddoc xyz %}'); + assertTemplateResult('', "{% doc %}123{% enddoc\n xyz %}{% enddoc %}"); +}); + +test('access doc tag body', function () { + $content = <<<'EOF' + Renders loading-spinner. + @param {string} foo - some foo + @param {string} [bar] - optional bar + EOF; + + $template = <<root->body->children()[0] ?? null; + + expect($docTag) + ->toBeInstanceOf(\Keepsuit\Liquid\Tags\DocTag::class) + ->getBody()->toBeInstanceOf(\Keepsuit\Liquid\Nodes\Raw::class) + ->getBody()->value->toBe($content); +}); diff --git a/tests/Integration/Tags/InlineCommentTagTest.php b/tests/Integration/Tags/InlineCommentTagTest.php index e048d9a..48266ef 100644 --- a/tests/Integration/Tags/InlineCommentTagTest.php +++ b/tests/Integration/Tags/InlineCommentTagTest.php @@ -51,5 +51,5 @@ }); test('inline comment does not support nested tags', function () { - assertMatchSyntaxError('Liquid syntax error (line 1): Unexpected token type: %}', "{%- # {% echo 'hello world' %} -%}"); + assertTemplateResult(' -%}', "{%- # {% echo 'hello world' %} -%}"); }); diff --git a/tests/Integration/Tags/RawTagTest.php b/tests/Integration/Tags/RawTagTest.php index 1379828..829b2c9 100644 --- a/tests/Integration/Tags/RawTagTest.php +++ b/tests/Integration/Tags/RawTagTest.php @@ -9,8 +9,9 @@ test('output in raw', function () { assertTemplateResult('>{{ test }}<', '> {%- raw -%}{{ test }}{%- endraw -%} <'); - assertTemplateResult('> inner <', '> {%- raw -%} inner {%- endraw %} <'); - assertTemplateResult('> inner <', '> {%- raw -%} inner {%- endraw -%} <'); + assertTemplateResult('>inner <', '> {%- raw -%} inner {%- endraw %} <'); + assertTemplateResult('>inner<', '> {%- raw -%} inner {%- endraw -%} <'); + assertTemplateResult('{Hello}', '{% raw %}{{% endraw %}Hello{% raw %}}{% endraw %}'); }); test('open tag in raw', function () { @@ -30,3 +31,25 @@ assertMatchSyntaxError('Liquid syntax error (line 1): Unexpected character }', '{% raw } foo {% endraw %}'); assertMatchSyntaxError('Liquid syntax error (line 1): Unexpected character }', '{% raw } foo %}{% endraw %}'); }); + +test('access raw tag body', function () { + $content = <<<'EOF' + {% if true %} + true + {% else %} + false + {% endif %} + EOF; + + $template = <<root->body->children()[0] ?? null; + + expect($rawTag) + ->toBeInstanceOf(\Keepsuit\Liquid\Tags\RawTag::class) + ->getBody()->toBeInstanceOf(\Keepsuit\Liquid\Nodes\Raw::class) + ->getBody()->value->toBe($content); +}); diff --git a/tests/Unit/BlockTest.php b/tests/Unit/BlockTest.php index 74db631..2004167 100644 --- a/tests/Unit/BlockTest.php +++ b/tests/Unit/BlockTest.php @@ -2,6 +2,7 @@ use Keepsuit\Liquid\Nodes\Text; use Keepsuit\Liquid\Nodes\Variable; +use Keepsuit\Liquid\Tags\DocTag; use Keepsuit\Liquid\Tags\IfTag; test('blankspace', function () { @@ -66,3 +67,13 @@ ->{1}->children()->{0}->children()->{0}->toBeInstanceOf(Text::class) ->{2}->toBeInstanceOf(Text::class); }); + +test('doc tag with block', function () { + $template = parseTemplate(' {% doc %} {% enddoc %} '); + + expect($template->root->body->children()) + ->toHaveCount(3) + ->{0}->toBeInstanceOf(Text::class) + ->{1}->toBeInstanceOf(DocTag::class) + ->{2}->toBeInstanceOf(Text::class); +}); diff --git a/tests/Unit/EnvironmentTest.php b/tests/Unit/EnvironmentTest.php index 8a82387..e71cf22 100644 --- a/tests/Unit/EnvironmentTest.php +++ b/tests/Unit/EnvironmentTest.php @@ -15,19 +15,21 @@ $tags = $env->tagRegistry->all(); - expect($tags)->toHaveCount(16) + expect($tags)->toHaveCount(18) ->toHaveKey('assign') ->toHaveKey('break') ->toHaveKey('capture') ->toHaveKey('case') ->toHaveKey('cycle') ->toHaveKey('decrement') + ->toHaveKey('doc') ->toHaveKey('echo') ->toHaveKey('for') ->toHaveKey('ifchanged') ->toHaveKey('if') ->toHaveKey('increment') ->toHaveKey('liquid') + ->toHaveKey('raw') ->toHaveKey('render') ->toHaveKey('tablerow') ->toHaveKey('unless'); diff --git a/tests/Unit/ParseTreeVisitorTest.php b/tests/Unit/ParseTreeVisitorTest.php index fbe2813..0329194 100644 --- a/tests/Unit/ParseTreeVisitorTest.php +++ b/tests/Unit/ParseTreeVisitorTest.php @@ -152,6 +152,10 @@ ]); }); +test('doc', function () { + expect(visit('{% doc %}{{ test }}{% enddoc %}'))->toBe([]); +}); + function traversal(string $source): ParseTreeVisitor { $environment = EnvironmentFactory::new()