diff --git a/src/ComplexMarkdownParser.php b/src/ComplexMarkdownParser.php new file mode 100644 index 0000000..365b5aa --- /dev/null +++ b/src/ComplexMarkdownParser.php @@ -0,0 +1,99 @@ +content = $content; + $this->lines = explode("\n", $this->content); + } + + public function parse(): Document + { + $this->findFrontMatterStartAndEndLineNumbers(); + + if (!$this->hasFrontMatter()) { + return new Document([], $this->content); + } + + $matter = $this->getFrontMatter(); + $body = $this->getBody(); + + $matter = Yaml::parse($matter); + + return new Document($matter, $body); + } + + protected function findFrontMatterStartAndEndLineNumbers() + { + foreach ($this->lines as $lineNumber => $lineContents) { + if ($this->isFrontMatterControlBlock($lineContents)) { + $this->setFrontMatterLineNumber($lineNumber); + } + } + } + + protected function setFrontMatterLineNumber(int $lineNumber) + { + if (!isset($this->frontMatterStartLine)) { + $this->frontMatterStartLine = $lineNumber; + return; + } + + if (!isset($this->frontMatterEndLine)) { + $this->frontMatterEndLine = $lineNumber; + } + } + + protected function getFrontMatter(): string + { + $matter = []; + foreach ($this->lines as $lineNumber => $lineContents) { + if ($lineNumber <= $this->frontMatterEndLine) { + if (!$this->isFrontMatterControlBlock($lineContents)) { + $matter[] = $lineContents; + } + } + } + return implode("\n", $matter); + } + + protected function getBody(): string + { + $body = []; + foreach ($this->lines as $lineNumber => $lineContents) { + if ($lineNumber > $this->frontMatterEndLine) { + $body[] = $lineContents; + } + } + return implode("\n", $this->trimBody($body)); + } + + protected function trimBody(array $body): array + { + if (trim($body[0]) === '') { + unset($body[0]); + } + return $body; + } + + protected function hasFrontMatter(): bool + { + return ($this->frontMatterStartLine !== null) && ($this->frontMatterEndLine !== null); + } + + protected function isFrontMatterControlBlock(string $line): bool + { + return substr($line, 0, 3) === '---'; + } +} diff --git a/src/YamlFrontMatter.php b/src/YamlFrontMatter.php index 3678413..a1e1b68 100644 --- a/src/YamlFrontMatter.php +++ b/src/YamlFrontMatter.php @@ -23,6 +23,14 @@ public static function parse(string $content): Document return new Document($matter, $body); } + /** + * A parser that can handle Markdown that contains Markdown. + */ + public static function markdownCompatibleParse(string $content): Document + { + return (new ComplexMarkdownParser($content))->parse(); + } + public static function parseFile(string $path): Document { return static::parse( diff --git a/tests/YamlFrontMatterMarkdownCompatibleParseTest.php b/tests/YamlFrontMatterMarkdownCompatibleParseTest.php new file mode 100644 index 0000000..9702c56 --- /dev/null +++ b/tests/YamlFrontMatterMarkdownCompatibleParseTest.php @@ -0,0 +1,100 @@ +assertInstanceOf(Document::class, $document); + $this->assertEquals(['foo' => 'bar'], $document->matter()); + $this->assertStringContainsString('Lorem ipsum.', $document->body()); + } + + /** @test */ + public function it_can_parse_complex_front_matter_from_a_file() + { + $document = YamlFrontMatter::markdownCompatibleParse( + file_get_contents(__DIR__.'/meta-document.md') + ); + + $this->assertInstanceOf(Document::class, $document); + $this->assertEquals(['foo' => 'bar'], $document->matter()); + $this->assertStringContainsString('Lorem ipsum.', $document->body()); + } + + /** @test */ + public function it_separates_the_front_matter_from_the_body() + { + $document = YamlFrontMatter::markdownCompatibleParse( + "---\ntitle: Front Matter\n---\n\nLorem ipsum." + ); + + $this->assertInstanceOf(Document::class, $document); + + // This implicitly asserts that the front matter does not contain any markdown + $this->assertEquals(['title' => 'Front Matter'], $document->matter()); + // This implicitly asserts that the body does not contain any front matter remnants + $this->assertEquals('Lorem ipsum.', $document->body()); + } + + /** @test */ + public function it_leaves_string_without_front_matter_intact() + { + $document = YamlFrontMatter::markdownCompatibleParse( + "Lorem ipsum." + ); + + $this->assertInstanceOf(Document::class, $document); + $this->assertEmpty($document->matter()); + $this->assertEquals('Lorem ipsum.', $document->body()); + } + + /** @test */ + public function it_can_parse_a_file_partial_front_matter() + { + // If there is only one YAML control block, (---) the front matter is invalid + // and the document should be interpreted as having no front matter. + + $document = YamlFrontMatter::markdownCompatibleParse( + "---\ntitle: Front Matter\n\nLorem ipsum." + ); + + $this->assertInstanceOf(Document::class, $document); + $this->assertEmpty($document->matter()); + $this->assertEquals("---\ntitle: Front Matter\n\nLorem ipsum.", $document->body()); + } + + /** @test */ + public function it_can_parse_a_string_with_unix_line_endings() + { + $document = YamlFrontMatter::markdownCompatibleParse( + "---\nfoo: bar\n---\n\nLorem ipsum." + ); + + $this->assertInstanceOf(Document::class, $document); + $this->assertEquals(['foo' => 'bar'], $document->matter()); + $this->assertStringContainsString('Lorem ipsum.', $document->body()); + } + + /** @test */ + public function it_can_parse_a_string_with_windows_line_endings() + { + $document = YamlFrontMatter::markdownCompatibleParse( + "---\r\nfoo: bar\r\n---\r\n\r\nLorem ipsum." + ); + + $this->assertInstanceOf(Document::class, $document); + $this->assertEquals(['foo' => 'bar'], $document->matter()); + $this->assertStringContainsString('Lorem ipsum.', $document->body()); + } +} diff --git a/tests/meta-document.md b/tests/meta-document.md new file mode 100644 index 0000000..852d173 --- /dev/null +++ b/tests/meta-document.md @@ -0,0 +1,17 @@ +--- +foo: bar +--- + +Lorem ipsum. + +A paragraph in a Markdown Post. +This file contains a Markdown code block that the original parser does not handle. +See https://github.com/spatie/yaml-front-matter/discussions/30. + +``` +--- +title: The Title of Markdown Code +--- + +A paragraph in Markdown Code +```