keepsuit · cappuc · Jun 15, 2025 · Apr 25, 2025 · Apr 25, 2025 · Apr 25, 2025
diff --git a/src/Extensions/StandardExtension.php b/src/Extensions/StandardExtension.php
@@ -17,12 +17,14 @@ public function getTags(): array
             Tags\ContinueTag::class,
             Tags\CycleTag::class,
             Tags\DecrementTag::class,
+            Tags\DocTag::class,
             Tags\EchoTag::class,
             Tags\ForTag::class,
             Tags\IfChanged::class,
             Tags\IfTag::class,
             Tags\IncrementTag::class,
             Tags\LiquidTag::class,
+            Tags\RawTag::class,
             Tags\RenderTag::class,
             Tags\TableRowTag::class,
             Tags\UnlessTag::class,

diff --git a/src/Parse/Lexer.php b/src/Parse/Lexer.php
@@ -3,6 +3,7 @@
 namespace Keepsuit\Liquid\Parse;
 
 use Keepsuit\Liquid\Exceptions\SyntaxException;
+use Keepsuit\Liquid\TagBlock;
 use RuntimeException;
 
 class Lexer
@@ -30,12 +31,17 @@ class Lexer
     protected array $tokens;
 
     /**
-     * @var array<int, array<int, array{0:string,1:int}>>
+     * @var array<int, array{0:string,1:int}>
      */
     protected array $positions;
 
     protected int $position;
 
+    /**
+     * @var string[]
+     */
+    protected array $rawBodyTags;
+
     public function __construct(
         protected ParseContext $parseContext,
     ) {}
@@ -53,10 +59,17 @@ public function tokenize(string $source): TokenStream
         $this->state = LexerState::Data;
         $this->tokens = [];
 
+        $this->rawBodyTags = array_keys(array_filter($this->parseContext->environment->tagRegistry->all(), function ($tag) {
+            if (! is_subclass_of($tag, TagBlock::class)) {
+                return false;
+            }
+
+            return $tag::hasRawBody();
+        }));
+
         $this->parseContext->lineNumber = 1;
 
-        preg_match_all(LexerOptions::tokenStartRegex(), $this->source, $matches, PREG_OFFSET_CAPTURE);
-        $this->positions = $matches;
+        $this->positions = $this->extractTokenStarts($this->source);
         $this->position = -1;
 
         while ($this->cursor < $this->end) {
@@ -79,42 +92,36 @@ public function tokenize(string $source): TokenStream
     protected function lexData(): void
     {
         // if no matches are left we return the rest of the template as simple text token
-        if ($this->position == count($this->positions[0]) - 1) {
+        if ($this->position == count($this->positions) - 1) {
             $this->pushToken(TokenType::TextData, substr($this->source, $this->cursor));
             $this->cursor = $this->end;
 
             return;
         }
 
         // Find the first token after the current cursor
-        $position = $this->positions[0][++$this->position];
+        $position = $this->positions[++$this->position];
         while ($position[1] < $this->cursor) {
-            if ($this->position == count($this->positions[0]) - 1) {
+            if ($this->position == count($this->positions) - 1) {
                 return;
             }
-            $position = $this->positions[0][++$this->position];
+            $position = $this->positions[++$this->position];
         }
 
         // push the template text before the token first
         $text = $textBeforeToken = substr($this->source, $this->cursor, $position[1] - $this->cursor);
 
         // trim?
-        if ($this->positions[2][$this->position][0] === LexerOptions::WhitespaceTrim->value) {
+        if (($this->positions[$this->position][0][2] ?? null) === LexerOptions::WhitespaceTrim->value) {
             $textBeforeToken = rtrim($textBeforeToken);
         }
 
         $this->pushToken(TokenType::TextData, $textBeforeToken);
         $this->moveCursor($text.$position[0]);
 
-        switch ($this->positions[1][$this->position][0]) {
+        switch ($this->positions[$this->position][0]) {
             case LexerOptions::TagBlockStart->value:
-                // {% raw %}
-                if (preg_match(LexerOptions::blockRawStartRegex(), $this->source, $matches, offset: $this->cursor) === 1) {
-                    $this->moveCursor($matches[0]);
-                    $this->lexRawData();
-                    break;
-                }
-
+            case LexerOptions::TagBlockStart->value.LexerOptions::WhitespaceTrim->value:
                 // {% comment %}
                 if (preg_match(LexerOptions::blockCommentStartRegex(), $this->source, $matches, offset: $this->cursor) === 1) {
                     $this->moveCursor($matches[0]);
@@ -127,6 +134,7 @@ protected function lexData(): void
                 $this->currentVarBlockLine = $this->lineNumber;
                 break;
             case LexerOptions::TagVariableStart->value:
+            case LexerOptions::TagVariableStart->value.LexerOptions::WhitespaceTrim->value:
                 $this->pushToken(TokenType::VariableStart);
                 $this->pushState(LexerState::Variable);
                 $this->currentVarBlockLine = $this->lineNumber;
@@ -145,9 +153,8 @@ protected function lexVariable(): void
             $this->popState();
 
             // trim?
-            if (trim($matches[0])[0] === LexerOptions::WhitespaceTrim->value) {
-                preg_match('/\s+/A', $this->source, $matches, offset: $this->cursor);
-                $this->moveCursor($matches[0] ?? '');
+            if ($matches[1][0] === LexerOptions::WhitespaceTrim->value) {
+                $this->trimWhitespaces();
             }
         } else {
             $this->lexExpression();
@@ -159,18 +166,40 @@ protected function lexVariable(): void
      */
     protected function lexBlock(): void
     {
-        if (preg_match(LexerOptions::blockEndRegex(), $this->source, $matches, offset: $this->cursor) === 1) {
-            $this->pushToken(TokenType::BlockEnd);
-            $this->moveCursor($matches[0]);
-            $this->popState();
+        $tag = null;
 
-            // trim?
-            if (trim($matches[0])[0] === LexerOptions::WhitespaceTrim->value) {
-                preg_match('/\s+/A', $this->source, $matches, offset: $this->cursor);
-                $this->moveCursor($matches[0] ?? '');
+        // Parse the full expression inside {% ... %}
+        while (preg_match(LexerOptions::blockEndRegex(), $this->source, $matches, offset: $this->cursor) !== 1) {
+            $this->lexExpression();
+
+            $lastToken = $this->tokens[array_key_last($this->tokens)];
+
+            if ($tag === null && $lastToken->type === TokenType::Identifier) {
+                $tag = $lastToken;
             }
+        }
+
+        // Move the cursor to the end of the block
+        $this->moveCursor($matches[0]);
+
+        // trim?
+        if ($matches[1][0] === LexerOptions::WhitespaceTrim->value) {
+            $this->trimWhitespaces();
+        }
+
+        // If the last token is a block start, we remove the node
+        $lastToken = $this->tokens[array_key_last($this->tokens)];
+        if ($lastToken->type === TokenType::BlockStart) {
+            array_pop($this->tokens);
         } else {
-            $this->lexExpression();
+            $this->pushToken(TokenType::BlockEnd);
+        }
+
+        $this->popState();
+
+        // If the tag is a raw body tag, we need to lex the body as raw data instead of liquid blocks
+        if ($tag !== null && in_array($tag->data, $this->rawBodyTags, true)) {
+            $this->laxRawBodyTag($tag->data);
         }
     }
 
@@ -227,23 +256,27 @@ protected function ensureStreamNotEnded(): void
         }
     }
 
-    protected function lexRawData(): void
+    protected function laxRawBodyTag(string $tag): void
     {
-        if (preg_match(LexerOptions::blockRawDataRegex(), $this->source, $matches, flags: PREG_OFFSET_CAPTURE, offset: $this->cursor) !== 1) {
-            throw SyntaxException::tagBlockNeverClosed('raw');
+        if (preg_match(LexerOptions::blockRawBodyTagDataRegex($tag), $this->source, $matches, flags: PREG_OFFSET_CAPTURE, offset: $this->cursor) !== 1) {
+            throw SyntaxException::tagBlockNeverClosed($tag);
         }
 
-        $text = substr($this->source, $this->cursor, $matches[0][1] - $this->cursor);
+        $rawBody = substr($this->source, $this->cursor, $matches[0][1] - $this->cursor);
 
-        $this->moveCursor($text.$matches[0][0]);
+        $this->moveCursor($rawBody);
 
-        // trim?
-        if (isset($matches[2][0])) {
-            preg_match('/\s+/A', $this->source, $matches2, offset: $this->cursor);
-            $this->moveCursor($matches2[0] ?? '');
+        // inner trim?
+        if (($matches[1][0][2] ?? null) === LexerOptions::WhitespaceTrim->value) {
+            $rawBody = rtrim($rawBody);
         }
 
-        $this->pushToken(TokenType::RawData, $text);
+        $this->pushToken(TokenType::RawData, $rawBody);
+
+        // trim?
+        if ($matches[2][0][0] === LexerOptions::WhitespaceTrim->value) {
+            $this->trimWhitespaces();
+        }
     }
 
     protected function lexComment(): void
@@ -265,24 +298,7 @@ protected function lexInlineComment(): void
 
         $text = substr($this->source, $this->cursor, $matches[0][1] - $this->cursor);
 
-        $this->moveCursor($text.$matches[0][0]);
-
-        if ($matches[1][0] === "\n") {
-            return;
-        }
-
-        $lastToken = $this->tokens[count($this->tokens) - 1] ?? null;
-
-        if ($lastToken?->type === TokenType::BlockStart) {
-            array_pop($this->tokens);
-        } else {
-            $this->pushToken(TokenType::BlockEnd);
-        }
-
-        if ($matches[1][0] === LexerOptions::WhitespaceTrim->value) {
-            preg_match('/\s+/A', $this->source, $matches2, offset: $this->cursor);
-            $this->moveCursor($matches2[0] ?? '');
-        }
+        $this->moveCursor($text);
     }
 
     protected function pushToken(TokenType $type, string $value = ''): void
@@ -322,4 +338,24 @@ protected function popState(): void
 
         $this->state = $state;
     }
+
+    protected function trimWhitespaces(): void
+    {
+        preg_match('/\s+/A', $this->source, $matches, offset: $this->cursor);
+        $this->moveCursor($matches[0] ?? '');
+    }
+
+    /**
+     * @return array<int,array{0:string,1:int}>
+     */
+    protected function extractTokenStarts(string $source): array
+    {
+        preg_match_all(LexerOptions::blockStartRegex(), $source, $blocks, PREG_OFFSET_CAPTURE);
+        preg_match_all(LexerOptions::variableStartRegex(), $source, $variables, PREG_OFFSET_CAPTURE);
+
+        $positions = array_merge($blocks[0], $variables[0]);
+        usort($positions, fn (array $a, array $b) => $a[1] <=> $b[1]);
+
+        return $positions;
+    }
 }
diff --git a/src/Parse/LexerOptions.php b/src/Parse/LexerOptions.php
@@ -17,14 +17,13 @@ enum LexerOptions: string
 
     case WhitespaceTrim = '-';
 
-    public static function tokenStartRegex(): string
+    public static function blockStartRegex(): string
     {
         static $regex;
 
         if ($regex === null) {
             $regex = sprintf(
-                '{(%s|%s)(%s)?}sx',
-                preg_quote(LexerOptions::TagVariableStart->value),
+                '{(%s%s?)}sx',
                 preg_quote(LexerOptions::TagBlockStart->value),
                 preg_quote(LexerOptions::WhitespaceTrim->value)
             );
@@ -33,15 +32,15 @@ public static function tokenStartRegex(): string
         return $regex;
     }
 
-    public static function commentBlockRegex(): string
+    public static function variableStartRegex(): string
     {
         static $regex;
 
         if ($regex === null) {
             $regex = sprintf(
-                "{\s*comment\s*(?:%s|%s')}Asx",
-                preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagBlockEnd->value),
-                preg_quote(LexerOptions::TagBlockEnd->value),
+                '{(%s%s?)}sx',
+                preg_quote(LexerOptions::TagVariableStart->value),
+                preg_quote(LexerOptions::WhitespaceTrim->value)
             );
         }
 
@@ -54,7 +53,7 @@ public static function variableEndRegex(): string
 
         if ($regex === null) {
             $regex = sprintf(
-                '{\s*(?:%s|%s)}Ax',
+                '{\s*(%s|%s)}Ax',
                 preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagVariableEnd->value),
                 preg_quote(LexerOptions::TagVariableEnd->value),
             );
@@ -69,22 +68,7 @@ public static function blockEndRegex(): string
 
         if ($regex === null) {
             $regex = sprintf(
-                '{\s*(?:%s|%s)}Ax',
-                preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagBlockEnd->value),
-                preg_quote(LexerOptions::TagBlockEnd->value),
-            );
-        }
-
-        return $regex;
-    }
-
-    public static function blockRawStartRegex(): string
-    {
-        static $regex;
-
-        if ($regex === null) {
-            $regex = sprintf(
-                '{\s*raw\s*(?:%s|%s)}Ax',
+                '{\s*(%s|%s)}Ax',
                 preg_quote(LexerOptions::WhitespaceTrim->value.LexerOptions::TagBlockEnd->value),
                 preg_quote(LexerOptions::TagBlockEnd->value),
             );
@@ -93,21 +77,22 @@ public static function blockRawStartRegex(): string
         return $regex;
     }
 
-    public static function blockRawDataRegex(): string
+    public static function blockRawBodyTagDataRegex(string $tag): string
     {
-        static $regex;
+        static $regex = [];
 
-        if ($regex === null) {
-            $regex = sprintf(
-                '{%s(%s)?\s*endraw\s*(%s)?%s}sx',
+        if (($regex[$tag] ?? null) === null) {
+            $regex[$tag] = sprintf(
+                '{(%s%s?)\s*end%s\s*(%s?%s)}sx',
                 preg_quote(LexerOptions::TagBlockStart->value),
                 LexerOptions::WhitespaceTrim->value,
+                preg_quote($tag),
                 LexerOptions::WhitespaceTrim->value,
                 preg_quote(LexerOptions::TagBlockEnd->value),
             );
         }
 
-        return $regex;
+        return $regex[$tag];
     }
 
     public static function blockCommentStartRegex(): string

diff --git a/src/TagBlock.php b/src/TagBlock.php
@@ -20,4 +20,9 @@ public function parseTreeVisitorChildren(): array
     {
         return [];
     }
+
+    public static function hasRawBody(): bool
+    {
+        return false;
+    }
 }