From 84e5cfabc61d03f840384a1c51f4de2903e45a20 Mon Sep 17 00:00:00 2001 From: Jaapio Date: Sat, 20 Sep 2025 21:33:59 +0200 Subject: [PATCH 1/4] Improve buffer calls --- .../src/RestructuredText/Parser/Buffer.php | 24 +++++++++----- .../tests/benchmarks/BufferBench.php | 31 +++++++++++++++++++ 2 files changed, 47 insertions(+), 8 deletions(-) create mode 100644 packages/guides-restructured-text/tests/benchmarks/BufferBench.php diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Buffer.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Buffer.php index 9f8a2fa50..87cd4a891 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Parser/Buffer.php +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Buffer.php @@ -103,21 +103,29 @@ public function clear(): void public function trimLines(): void { - array_walk($this->lines, static function (&$value): void { - $value = trim($value); - }); + foreach ($this->lines as $i => $line) { + $this->lines[$i] = trim($line); + } } private function unIndent(): void { + if ($this->unindentStrategy === UnindentStrategy::NONE) { + return; + } + $indentation = $this->detectIndentation(); - array_walk($this->lines, static function (&$value) use ($indentation): void { - if (strlen($value) < $indentation) { - return; + if ($indentation === 0) { + return; + } + + foreach ($this->lines as $i => $line) { + if (strlen($line) < $indentation) { + continue; } - $value = substr($value, $indentation); - }); + $this->lines[$i] = substr($line, $indentation); + } } private function detectIndentation(): int diff --git a/packages/guides-restructured-text/tests/benchmarks/BufferBench.php b/packages/guides-restructured-text/tests/benchmarks/BufferBench.php new file mode 100644 index 000000000..776cd091b --- /dev/null +++ b/packages/guides-restructured-text/tests/benchmarks/BufferBench.php @@ -0,0 +1,31 @@ +buffer = new Buffer(); + $this->buffer->push(' This is a line with leading spaces. '); + $this->buffer->push(' This is another line.'); + $this->buffer->push(' Yet another line with spaces. '); + $this->buffer->push(' Final line.'); + } + + + #[Revs([1000, 10_000])] + #[Iterations(5)] + public function benchGetLines(): void + { + $this->buffer->getLines(); + } +} From 817517898c87b4803ebc16f936a79393738c8d0b Mon Sep 17 00:00:00 2001 From: Jaapio Date: Sat, 20 Sep 2025 23:56:59 +0200 Subject: [PATCH 2/4] Optimizations in parser - array_walk is slower than a foreach, so buffer now uses foreach. - By caching some of the inline node parsers we can strip out about 9% of the overhead in the inline parser, as we do not have to check them for every token. --- .../src/RestructuredText/Parser/Buffer.php | 1 - .../RestructuredText/Parser/InlineParser.php | 19 ++++++++++++++-- .../InlineRules/AnonymousReferenceRule.php | 7 +++++- .../InlineRules/CachableInlineRule.php | 22 +++++++++++++++++++ .../Productions/InlineRules/LiteralRule.php | 7 +++++- .../InlineRules/NamedReferenceRule.php | 7 +++++- .../Productions/InlineRules/NbspRule.php | 7 +++++- .../InlineRules/StandaloneEmailRule.php | 7 +++++- .../InlineRules/StandaloneHyperlinkRule.php | 7 +++++- .../tests/benchmarks/BufferBench.php | 10 ++++++++- 10 files changed, 84 insertions(+), 10 deletions(-) create mode 100644 packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/CachableInlineRule.php diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Buffer.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Buffer.php index 87cd4a891..3c90e308c 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Parser/Buffer.php +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Buffer.php @@ -14,7 +14,6 @@ namespace phpDocumentor\Guides\RestructuredText\Parser; use function array_pop; -use function array_walk; use function count; use function implode; use function ltrim; diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/InlineParser.php b/packages/guides-restructured-text/src/RestructuredText/Parser/InlineParser.php index cac7236f1..fcf4d8416 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Parser/InlineParser.php +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/InlineParser.php @@ -16,8 +16,11 @@ use Exception; use phpDocumentor\Guides\Nodes\Inline\PlainTextInlineNode; use phpDocumentor\Guides\Nodes\InlineCompoundNode; +use phpDocumentor\Guides\RestructuredText\Parser\Productions\InlineRules\CachableInlineRule; use phpDocumentor\Guides\RestructuredText\Parser\Productions\InlineRules\InlineRule; +use function array_filter; +use function array_key_exists; use function usort; /** @internal */ @@ -26,11 +29,21 @@ class InlineParser /** @var InlineRule[] */ private array $rules; + /** @var array */ + private array $cache = []; + /** @param iterable $inlineRules */ public function __construct(iterable $inlineRules) { - $this->rules = [...$inlineRules]; + $this->rules = array_filter([...$inlineRules], static fn ($rule) => $rule instanceof CachableInlineRule === false); usort($this->rules, static fn (InlineRule $a, InlineRule $b): int => $a->getPriority() > $b->getPriority() ? -1 : 1); + foreach ($inlineRules as $rule) { + if (!($rule instanceof CachableInlineRule)) { + continue; + } + + $this->cache[$rule->getToken()] = $rule; + } } public function parse(string $content, BlockContext $blockContext): InlineCompoundNode @@ -44,7 +57,9 @@ public function parse(string $content, BlockContext $blockContext): InlineCompou while ($lexer->token !== null) { foreach ($this->rules as $inlineRule) { $node = null; - if ($inlineRule->applies($lexer)) { + if (array_key_exists($lexer->token->type ?? -1, $this->cache)) { + $node = $this->cache[$lexer->token->type]->apply($blockContext, $lexer); + } elseif ($inlineRule->applies($lexer)) { $node = $inlineRule->apply($blockContext, $lexer); } diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/AnonymousReferenceRule.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/AnonymousReferenceRule.php index 116c6ba84..56d60ebca 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/AnonymousReferenceRule.php +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/AnonymousReferenceRule.php @@ -28,8 +28,13 @@ * * @see https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#anonymous-hyperlinks */ -final class AnonymousReferenceRule extends ReferenceRule +final class AnonymousReferenceRule extends ReferenceRule implements CachableInlineRule { + public function getToken(): int + { + return InlineLexer::ANONYMOUSE_REFERENCE; + } + public function applies(InlineLexer $lexer): bool { return $lexer->token?->type === InlineLexer::ANONYMOUSE_REFERENCE; diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/CachableInlineRule.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/CachableInlineRule.php new file mode 100644 index 000000000..eaab1a467 --- /dev/null +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/CachableInlineRule.php @@ -0,0 +1,22 @@ +token?->type === InlineLexer::LITERAL; diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NamedReferenceRule.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NamedReferenceRule.php index d5736a878..b7752378c 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NamedReferenceRule.php +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NamedReferenceRule.php @@ -28,8 +28,13 @@ * * @see https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#hyperlink-references */ -final class NamedReferenceRule extends ReferenceRule +final class NamedReferenceRule extends ReferenceRule implements CachableInlineRule { + public function getToken(): int + { + return InlineLexer::NAMED_REFERENCE; + } + public function applies(InlineLexer $lexer): bool { return $lexer->token?->type === InlineLexer::NAMED_REFERENCE; diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NbspRule.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NbspRule.php index 056c7e34f..ff9044536 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NbspRule.php +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NbspRule.php @@ -20,8 +20,13 @@ /** * Rule to parse for non-breaking spaces: a~b */ -final class NbspRule extends ReferenceRule +final class NbspRule extends AbstractInlineRule implements CachableInlineRule { + public function getToken(): int + { + return InlineLexer::NBSP; + } + public function applies(InlineLexer $lexer): bool { return $lexer->token?->type === InlineLexer::NBSP; diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/StandaloneEmailRule.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/StandaloneEmailRule.php index e5beb95cf..5269baf10 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/StandaloneEmailRule.php +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/StandaloneEmailRule.php @@ -26,8 +26,13 @@ * * @see https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#standalone-hyperlinks */ -final class StandaloneEmailRule extends ReferenceRule +final class StandaloneEmailRule extends ReferenceRule implements CachableInlineRule { + public function getToken(): int + { + return InlineLexer::EMAIL; + } + public function applies(InlineLexer $lexer): bool { return $lexer->token?->type === InlineLexer::EMAIL; diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/StandaloneHyperlinkRule.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/StandaloneHyperlinkRule.php index f7a1ebfd8..d79f367f5 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/StandaloneHyperlinkRule.php +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/StandaloneHyperlinkRule.php @@ -26,8 +26,13 @@ * * @see https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#standalone-hyperlinks */ -final class StandaloneHyperlinkRule extends ReferenceRule +final class StandaloneHyperlinkRule extends ReferenceRule implements CachableInlineRule { + public function getToken(): int + { + return InlineLexer::HYPERLINK; + } + public function applies(InlineLexer $lexer): bool { return $lexer->token?->type === InlineLexer::HYPERLINK; diff --git a/packages/guides-restructured-text/tests/benchmarks/BufferBench.php b/packages/guides-restructured-text/tests/benchmarks/BufferBench.php index 776cd091b..70c62a48f 100644 --- a/packages/guides-restructured-text/tests/benchmarks/BufferBench.php +++ b/packages/guides-restructured-text/tests/benchmarks/BufferBench.php @@ -2,6 +2,15 @@ declare(strict_types=1); +/** + * This file is part of phpDocumentor. + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + * + * @link https://phpdoc.org + */ + namespace phpDocumentor\Guides\RestructuredText; use PhpBench\Attributes\Iterations; @@ -21,7 +30,6 @@ public function __construct() $this->buffer->push(' Final line.'); } - #[Revs([1000, 10_000])] #[Iterations(5)] public function benchGetLines(): void From a860c9b967dbd359a3391bee5b2d4e7c1f33fcf0 Mon Sep 17 00:00:00 2001 From: Jaapio Date: Sun, 21 Sep 2025 10:11:21 +0200 Subject: [PATCH 3/4] Execute sort less times By sorting every call, we are just pumping up the system. There is no need to do this, only when we need the files. --- packages/guides/src/Files.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/guides/src/Files.php b/packages/guides/src/Files.php index 00f020c3b..d4fb27810 100644 --- a/packages/guides/src/Files.php +++ b/packages/guides/src/Files.php @@ -33,17 +33,18 @@ final class Files implements IteratorAggregate, Countable public function add(string $filename): void { - if (in_array($filename, $this->files, true)) { + if (array_key_exists($filename, $this->files)) { return; } - $this->files[] = $filename; - sort($this->files, SORT_NATURAL | SORT_FLAG_CASE); + $this->files[$filename] = $filename; } /** @return Iterator */ public function getIterator(): Iterator { + sort($this->files, SORT_NATURAL | SORT_FLAG_CASE); + return new ArrayIterator($this->files); } From 4708f1d3cb675fc53bc333ba6459653c51b772dc Mon Sep 17 00:00:00 2001 From: Jaapio Date: Thu, 16 Oct 2025 22:51:43 +0200 Subject: [PATCH 4/4] Optimize lexer and re-render compiling --- .../src/DevServer/RerenderListener.php | 7 ++-- .../Directives/CsvTableDirective.php | 20 +++++++++-- .../RestructuredText/Parser/InlineLexer.php | 33 +++++++++++-------- .../tests/benchmarks/InlineLexerBench.php | 11 +++++++ packages/guides/src/Files.php | 2 +- 5 files changed, 51 insertions(+), 22 deletions(-) diff --git a/packages/guides-cli/src/DevServer/RerenderListener.php b/packages/guides-cli/src/DevServer/RerenderListener.php index d014f9063..179e51912 100644 --- a/packages/guides-cli/src/DevServer/RerenderListener.php +++ b/packages/guides-cli/src/DevServer/RerenderListener.php @@ -70,12 +70,9 @@ public function __invoke(FileModifiedEvent $event): void ); assert($document instanceof DocumentNode); - $documents = $this->documents; - $documents[$file] = $document; - /** @var array $documents */ - $documents = $this->commandBus->handle(new CompileDocumentsCommand($documents, new CompilerContext($this->projectNode))); - $this->documents = $documents; + $documents = $this->commandBus->handle(new CompileDocumentsCommand([$file => $document], new CompilerContext($this->projectNode))); + $this->documents[$file] = $documents[$file]; $destinationFileSystem = FlySystemAdapter::createForPath($this->settings->getOutput()); $documentIterator = DocumentListIterator::create( diff --git a/packages/guides-restructured-text/src/RestructuredText/Directives/CsvTableDirective.php b/packages/guides-restructured-text/src/RestructuredText/Directives/CsvTableDirective.php index f747d1a96..41d048b78 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Directives/CsvTableDirective.php +++ b/packages/guides-restructured-text/src/RestructuredText/Directives/CsvTableDirective.php @@ -33,6 +33,7 @@ use function explode; use function implode; use function is_string; +use function method_exists; use function strval; use function trim; @@ -78,10 +79,18 @@ public function processNode( return new GenericNode('csv-table'); } - $csv = Reader::createFromStream($csvStream); + if (method_exists(Reader::class, 'from')) { + $csv = Reader::from($csvStream); + } else { + $csv = Reader::createFromStream($csvStream); + } } else { $lines = $blockContext->getDocumentIterator()->toArray(); - $csv = Reader::createFromString(implode("\n", $lines)); + if (method_exists(Reader::class, 'fromString')) { + $csv = Reader::fromString(implode("\n", $lines)); + } else { + $csv = Reader::createFromString(implode("\n", $lines)); + } } if ($directive->getOption('header-rows')->getValue() !== null) { @@ -90,7 +99,12 @@ public function processNode( $header = null; if ($directive->hasOption('header')) { - $headerCsv = Reader::createFromString($directive->getOption('header')->toString()); + if (method_exists(Reader::class, 'fromString')) { + $headerCsv = Reader::fromString($directive->getOption('header')->toString()); + } else { + $headerCsv = Reader::createFromString($directive->getOption('header')->toString()); + } + $header = new TableRow(); foreach ($headerCsv->first() as $column) { $columnNode = new TableColumn($column, 1, []); diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php b/packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php index 94aaf19ba..51d787a58 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php @@ -19,8 +19,14 @@ use function array_column; use function array_flip; +use function ctype_alnum; +use function ctype_space; use function parse_url; use function preg_match; +use function str_ends_with; +use function str_replace; +use function strlen; +use function substr; use const PHP_URL_SCHEME; @@ -111,6 +117,7 @@ protected function getType(string &$value) { $type = match ($value) { '`' => self::BACKTICK, + '``' => self::DOUBLE_BACKTICK, '**' => self::STRONG_DELIMITER, '*' => self::EMPHASIS_DELIMITER, '|' => self::VARIABLE_DELIMITER, @@ -130,10 +137,22 @@ protected function getType(string &$value) } // $value is already a tokenized part. Therefore, we have to match against the complete String here. - if (preg_match('/^\\\\[\s\S]$/i', $value)) { + if (str_ends_with($value, '__') && ctype_alnum(str_replace('-', '', substr($value, 0, -2)))) { + return self::ANONYMOUSE_REFERENCE; + } + + if (str_ends_with($value, '_') && ctype_alnum(str_replace('-', '', substr($value, 0, -1)))) { + return self::NAMED_REFERENCE; + } + + if (strlen($value) === 2 && $value[0] === '\\') { return self::ESCAPED_SIGN; } + if (strlen($value) === 1 && ctype_space($value)) { + return self::WHITESPACE; + } + if (preg_match('/^``.+``(?!`)$/i', $value)) { return self::LITERAL; } @@ -146,18 +165,6 @@ protected function getType(string &$value) return self::EMAIL; } - if (preg_match('/^[a-z0-9-]+_{2}$/i', $value)) { - return self::ANONYMOUSE_REFERENCE; - } - - if (preg_match('/^[a-z0-9-]+_{1}$/i', $value)) { - return self::NAMED_REFERENCE; - } - - if (preg_match('/^\s$/i', $value)) { - return self::WHITESPACE; - } - return self::WORD; } } diff --git a/packages/guides-restructured-text/tests/benchmarks/InlineLexerBench.php b/packages/guides-restructured-text/tests/benchmarks/InlineLexerBench.php index 57b270e58..389163621 100644 --- a/packages/guides-restructured-text/tests/benchmarks/InlineLexerBench.php +++ b/packages/guides-restructured-text/tests/benchmarks/InlineLexerBench.php @@ -26,4 +26,15 @@ public function benchInlineLexer(): void $lexer = new InlineLexer(); $lexer->setInput('This is a `link`_ to a section.'); } + + #[Revs([1000, 10_000])] + #[Iterations(5)] + public function benchFullParagraph(): void + { + $lexer = new InlineLexer(); + $lexer->setInput(' +With :issue:`103894` the new data processor :ref:`PageContentFetchingProcessor ` +has been introduced, to allow fetching page content based on the current page +layout, taking the configured :php:`SlideMode` into account.'); + } } diff --git a/packages/guides/src/Files.php b/packages/guides/src/Files.php index d4fb27810..c7c185ac0 100644 --- a/packages/guides/src/Files.php +++ b/packages/guides/src/Files.php @@ -18,8 +18,8 @@ use Iterator; use IteratorAggregate; +use function array_key_exists; use function count; -use function in_array; use function sort; use const SORT_FLAG_CASE;