diff --git a/confluence-mdx/bin/mdx_to_storage/emitter.py b/confluence-mdx/bin/mdx_to_storage/emitter.py index 4d750ed50..5ab1dbd9f 100644 --- a/confluence-mdx/bin/mdx_to_storage/emitter.py +++ b/confluence-mdx/bin/mdx_to_storage/emitter.py @@ -84,6 +84,9 @@ def emit_block(block: Block, context: Optional[dict] = None) -> str: if block.type == "table": return _emit_markdown_table(block.content) + if block.type == "blockquote": + return _emit_blockquote(block.content) + return "" @@ -283,3 +286,33 @@ def _replace_cell(match: re.Match[str]) -> str: return f"<{tag}{attrs}>{convert_inline(inner.strip())}" return pattern.sub(_replace_cell, stripped) + + +def _emit_blockquote(content: str) -> str: + raw_lines = content.splitlines() + stripped_lines: list[str] = [] + for line in raw_lines: + line = line.lstrip() + if line.startswith(">"): + line = line[1:] + if line.startswith(" "): + line = line[1:] + stripped_lines.append(line) + + paragraphs: list[str] = [] + current: list[str] = [] + for line in stripped_lines: + if line.strip(): + current.append(line.strip()) + continue + if current: + paragraphs.append(" ".join(current)) + current = [] + if current: + paragraphs.append(" ".join(current)) + + if not paragraphs: + return "

" + + body = "".join(f"

{convert_inline(text)}

" for text in paragraphs) + return f"
{body}
" diff --git a/confluence-mdx/bin/mdx_to_storage/parser.py b/confluence-mdx/bin/mdx_to_storage/parser.py index 269823e8c..23185f0dd 100644 --- a/confluence-mdx/bin/mdx_to_storage/parser.py +++ b/confluence-mdx/bin/mdx_to_storage/parser.py @@ -85,6 +85,11 @@ def parse_mdx(text: str) -> list[Block]: blocks.append(block) continue + if _is_blockquote_line(line): + block, i = _parse_blockquote_block(lines, i) + blocks.append(block) + continue + if _is_list_line(line): block, i = _parse_list_block(lines, i) blocks.append(block) @@ -259,6 +264,23 @@ def _parse_markdown_table_block(lines: list[str], start: int) -> tuple[Block, in return Block(type="table", content=content), i +def _parse_blockquote_block(lines: list[str], start: int) -> tuple[Block, int]: + i = start + while i < len(lines): + current = lines[i] + if current == "": + if i + 1 < len(lines) and _is_blockquote_line(lines[i + 1]): + i += 1 + continue + break + if not _is_blockquote_line(current): + break + i += 1 + + content = "\n".join(lines[start:i]) + "\n" + return Block(type="blockquote", content=content), i + + def _parse_html_block(lines: list[str], start: int) -> tuple[Block, int]: if lines[start].startswith(" bool: return True if line.startswith(" bool: if "-" not in cell: return False return True + + +def _is_blockquote_line(line: str) -> bool: + stripped = line.lstrip() + return stripped.startswith("> ") or stripped == ">" diff --git a/confluence-mdx/tests/test_mdx_to_storage/test_emitter.py b/confluence-mdx/tests/test_mdx_to_storage/test_emitter.py index ecce341cc..b6b7b670a 100644 --- a/confluence-mdx/tests/test_mdx_to_storage/test_emitter.py +++ b/confluence-mdx/tests/test_mdx_to_storage/test_emitter.py @@ -448,3 +448,38 @@ def test_emit_paragraph_then_markdown_table(): assert "

Intro paragraph.

" in xhtml assert "" in xhtml assert "" in xhtml + + +def test_emit_blockquote_single_paragraph(): + mdx = "> quoted **text** and `code`\n" + xhtml = emit_document(parse_mdx(mdx)) + assert xhtml == "

quoted text and code

" + + +def test_emit_blockquote_multiple_paragraphs(): + mdx = "> first line\n>\n> second line\n" + xhtml = emit_document(parse_mdx(mdx)) + assert xhtml == "

first line

second line

" + + +def test_emit_blockquote_multiline_single_paragraph(): + """Multiple `>` lines without blank separator → merged into one paragraph.""" + mdx = "> line one\n> line two\n> line three\n" + xhtml = emit_document(parse_mdx(mdx)) + assert xhtml == "

line one line two line three

" + + +def test_emit_blockquote_in_mixed_document(): + """Blockquote integrated with other block types.""" + mdx = "## Section\n\n> Important note.\n\nFollowing text.\n" + xhtml = emit_document(parse_mdx(mdx)) + assert "

Section

" in xhtml + assert "

Important note.

" in xhtml + assert "

Following text.

" in xhtml + + +def test_emit_blockquote_empty_body(): + """Empty blockquote `>` only → blockquote with empty paragraph.""" + mdx = ">\n" + xhtml = emit_document(parse_mdx(mdx)) + assert xhtml == "

" diff --git a/confluence-mdx/tests/test_mdx_to_storage/test_parser.py b/confluence-mdx/tests/test_mdx_to_storage/test_parser.py index 98e6cb0a7..b7fd3df26 100644 --- a/confluence-mdx/tests/test_mdx_to_storage/test_parser.py +++ b/confluence-mdx/tests/test_mdx_to_storage/test_parser.py @@ -213,3 +213,33 @@ def test_parse_inline_code_with_pipes_stays_paragraph(): mdx = "Expression: `a == 'x' || b == 'y'` is valid.\n" blocks = parse_mdx(mdx) assert blocks[0].type == "paragraph" + + +def test_parse_blockquote_block(): + mdx = "> quoted line\n> second line\n" + blocks = parse_mdx(mdx) + assert len(blocks) == 1 + assert blocks[0].type == "blockquote" + assert "> quoted line" in blocks[0].content + + +def test_parse_blockquote_with_blank_and_stop_before_next_block(): + mdx = "> first\n>\n> second\n## Heading\n" + blocks = parse_mdx(mdx) + assert blocks[0].type == "blockquote" + assert blocks[1].type == "heading" + + +def test_parse_blockquote_followed_by_paragraph(): + """Blockquote ends when non-`>` line appears; next block is paragraph.""" + mdx = "> quote\n\nParagraph after.\n" + blocks = parse_mdx(mdx) + types = [b.type for b in blocks if b.type != "empty"] + assert types == ["blockquote", "paragraph"] + + +def test_parse_triple_gt_not_blockquote(): + """Lines like `>>> token` (shell output) should NOT be parsed as blockquote.""" + mdx = ">>> Service Account token\n" + blocks = parse_mdx(mdx) + assert blocks[0].type != "blockquote"

1