Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions confluence-mdx/bin/mdx_to_storage/emitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def emit_block(block: Block, context: Optional[dict] = None) -> str:
if block.type == "table":
return _emit_markdown_table(block.content)

if block.type == "blockquote":
return _emit_blockquote(block.content)

return ""


Expand Down Expand Up @@ -283,3 +286,33 @@ def _replace_cell(match: re.Match[str]) -> str:
return f"<{tag}{attrs}>{convert_inline(inner.strip())}</{tag}>"

return pattern.sub(_replace_cell, stripped)


def _emit_blockquote(content: str) -> str:
raw_lines = content.splitlines()
stripped_lines: list[str] = []
for line in raw_lines:
line = line.lstrip()
if line.startswith(">"):
line = line[1:]
if line.startswith(" "):
line = line[1:]
stripped_lines.append(line)

paragraphs: list[str] = []
current: list[str] = []
for line in stripped_lines:
if line.strip():
current.append(line.strip())
continue
if current:
paragraphs.append(" ".join(current))
current = []
if current:
paragraphs.append(" ".join(current))

if not paragraphs:
return "<blockquote><p></p></blockquote>"

body = "".join(f"<p>{convert_inline(text)}</p>" for text in paragraphs)
return f"<blockquote>{body}</blockquote>"
29 changes: 29 additions & 0 deletions confluence-mdx/bin/mdx_to_storage/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ def parse_mdx(text: str) -> list[Block]:
blocks.append(block)
continue

if _is_blockquote_line(line):
block, i = _parse_blockquote_block(lines, i)
blocks.append(block)
continue

if _is_list_line(line):
block, i = _parse_list_block(lines, i)
blocks.append(block)
Expand Down Expand Up @@ -259,6 +264,23 @@ def _parse_markdown_table_block(lines: list[str], start: int) -> tuple[Block, in
return Block(type="table", content=content), i


def _parse_blockquote_block(lines: list[str], start: int) -> tuple[Block, int]:
i = start
while i < len(lines):
current = lines[i]
if current == "":
if i + 1 < len(lines) and _is_blockquote_line(lines[i + 1]):
i += 1
continue
break
if not _is_blockquote_line(current):
break
i += 1

content = "\n".join(lines[start:i]) + "\n"
return Block(type="blockquote", content=content), i


def _parse_html_block(lines: list[str], start: int) -> tuple[Block, int]:
if lines[start].startswith("<table"):
i = start + 1
Expand Down Expand Up @@ -305,6 +327,8 @@ def _starts_new_block(line: str) -> bool:
return True
if line.startswith("<figure"):
return True
if _is_blockquote_line(line):
return True
if _is_list_line(line):
return True
if _is_html_block_start(line):
Expand Down Expand Up @@ -369,3 +393,8 @@ def _is_markdown_table_separator(line: str) -> bool:
if "-" not in cell:
return False
return True


def _is_blockquote_line(line: str) -> bool:
stripped = line.lstrip()
return stripped.startswith("> ") or stripped == ">"
35 changes: 35 additions & 0 deletions confluence-mdx/tests/test_mdx_to_storage/test_emitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,3 +448,38 @@ def test_emit_paragraph_then_markdown_table():
assert "<p>Intro paragraph.</p>" in xhtml
assert "<table><tbody>" in xhtml
assert "<td><p>1</p></td>" in xhtml


def test_emit_blockquote_single_paragraph():
mdx = "> quoted **text** and `code`\n"
xhtml = emit_document(parse_mdx(mdx))
assert xhtml == "<blockquote><p>quoted <strong>text</strong> and <code>code</code></p></blockquote>"


def test_emit_blockquote_multiple_paragraphs():
mdx = "> first line\n>\n> second line\n"
xhtml = emit_document(parse_mdx(mdx))
assert xhtml == "<blockquote><p>first line</p><p>second line</p></blockquote>"


def test_emit_blockquote_multiline_single_paragraph():
"""Multiple `>` lines without blank separator → merged into one paragraph."""
mdx = "> line one\n> line two\n> line three\n"
xhtml = emit_document(parse_mdx(mdx))
assert xhtml == "<blockquote><p>line one line two line three</p></blockquote>"


def test_emit_blockquote_in_mixed_document():
"""Blockquote integrated with other block types."""
mdx = "## Section\n\n> Important note.\n\nFollowing text.\n"
xhtml = emit_document(parse_mdx(mdx))
assert "<h1>Section</h1>" in xhtml
assert "<blockquote><p>Important note.</p></blockquote>" in xhtml
assert "<p>Following text.</p>" in xhtml


def test_emit_blockquote_empty_body():
"""Empty blockquote `>` only → blockquote with empty paragraph."""
mdx = ">\n"
xhtml = emit_document(parse_mdx(mdx))
assert xhtml == "<blockquote><p></p></blockquote>"
30 changes: 30 additions & 0 deletions confluence-mdx/tests/test_mdx_to_storage/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,33 @@ def test_parse_inline_code_with_pipes_stays_paragraph():
mdx = "Expression: `a == 'x' || b == 'y'` is valid.\n"
blocks = parse_mdx(mdx)
assert blocks[0].type == "paragraph"


def test_parse_blockquote_block():
mdx = "> quoted line\n> second line\n"
blocks = parse_mdx(mdx)
assert len(blocks) == 1
assert blocks[0].type == "blockquote"
assert "> quoted line" in blocks[0].content


def test_parse_blockquote_with_blank_and_stop_before_next_block():
mdx = "> first\n>\n> second\n## Heading\n"
blocks = parse_mdx(mdx)
assert blocks[0].type == "blockquote"
assert blocks[1].type == "heading"


def test_parse_blockquote_followed_by_paragraph():
"""Blockquote ends when non-`>` line appears; next block is paragraph."""
mdx = "> quote\n\nParagraph after.\n"
blocks = parse_mdx(mdx)
types = [b.type for b in blocks if b.type != "empty"]
assert types == ["blockquote", "paragraph"]


def test_parse_triple_gt_not_blockquote():
"""Lines like `>>> token` (shell output) should NOT be parsed as blockquote."""
mdx = ">>> Service Account token\n"
blocks = parse_mdx(mdx)
assert blocks[0].type != "blockquote"