Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions confluence-mdx/bin/mdx_to_storage/emitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import os
import re
from typing import Optional

Expand Down Expand Up @@ -69,6 +70,9 @@ def emit_block(block: Block, context: Optional[dict] = None) -> str:
if block.type == "callout":
return _emit_callout(block, context)

if block.type == "figure":
return _emit_figure(block)

return ""


Expand Down Expand Up @@ -168,3 +172,24 @@ def _parse_callout_children_from_content(content: str) -> list[Block]:
return []
from .parser import parse_mdx
return parse_mdx(inner_text)


def _emit_figure(block: Block) -> str:
src = block.attrs.get("src", "").strip()
if not src:
return ""

filename = os.path.basename(src)
width = block.attrs.get("width", "").strip()
caption = block.attrs.get("caption", "").strip()

attrs = ['ac:align="center"']
if width:
attrs.append(f'ac:width="{width}"')

parts = [f"<ac:image {' '.join(attrs)}>"]
parts.append(f'<ri:attachment ri:filename="{filename}"></ri:attachment>')
if caption:
parts.append(f"<ac:caption><p>{convert_inline(caption)}</p></ac:caption>")
parts.append("</ac:image>")
return "".join(parts)
11 changes: 11 additions & 0 deletions confluence-mdx/bin/mdx_to_storage/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,20 @@ def _parse_figure_attrs(content: str) -> dict:
for key, v1, v2 in _FIGURE_IMG_ATTR_PATTERN.findall(img_match.group(1)):
value = v1 or v2
attrs[key] = value
caption = _extract_figure_caption(content)
if caption:
attrs["caption"] = caption
return attrs


def _extract_figure_caption(content: str) -> str:
match = re.search(r"<figcaption[^>]*>(.*?)</figcaption>", content, flags=re.DOTALL)
if not match:
return ""
inner = re.sub(r"<[^>]+>", "", match.group(1))
return inner.strip()


def _parse_html_block(lines: list[str], start: int) -> tuple[Block, int]:
i = start + 1
while i < len(lines):
Expand Down
73 changes: 73 additions & 0 deletions confluence-mdx/tests/test_mdx_to_storage/test_emitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,76 @@ def test_emit_callout_body_multiple_paragraphs():
assert "<p>First paragraph.</p>" in xhtml
assert "<p>Second paragraph.</p>" in xhtml
assert '<ac:structured-macro ac:name="info">' in xhtml


def test_emit_figure_to_ac_image_with_width():
mdx = """<figure>
<img src="/images/path/sample.png" alt="Sample" width="700" data-layout="center">
</figure>
"""
xhtml = emit_document(parse_mdx(mdx))
assert (
xhtml
== '<ac:image ac:align="center" ac:width="700"><ri:attachment ri:filename="sample.png"></ri:attachment></ac:image>'
)


def test_emit_figure_with_caption():
mdx = """<figure>
<img src="/images/path/sample.png" alt="Sample">
<figcaption>This is **caption**</figcaption>
</figure>
"""
xhtml = emit_document(parse_mdx(mdx))
assert '<ac:image ac:align="center">' in xhtml
assert '<ri:attachment ri:filename="sample.png"></ri:attachment>' in xhtml
assert "<ac:caption><p>This is <strong>caption</strong></p></ac:caption>" in xhtml


def test_emit_figure_without_src_is_skipped():
block = Block(type="figure", content="<figure></figure>", attrs={})
assert emit_block(block) == ""


def test_emit_figure_without_width_minimal():
"""Figure with src only (no width, no caption) → ac:image without ac:width."""
mdx = """<figure>
<img src="/images/sample.png" alt="Alt">
</figure>
"""
xhtml = emit_document(parse_mdx(mdx))
assert (
xhtml
== '<ac:image ac:align="center"><ri:attachment ri:filename="sample.png"></ri:attachment></ac:image>'
)
assert "ac:width" not in xhtml


def test_emit_figure_in_mixed_document():
"""Figure block integrated with other blocks in a full document."""
mdx = """---
title: "Page"
---

# Page

## Overview

Some text.

<figure>
<img src="/images/path/diagram.png" alt="Diagram" width="600">
<figcaption>Architecture diagram</figcaption>
</figure>

More text after image.
"""
xhtml = emit_document(parse_mdx(mdx))
assert "<h1>Overview</h1>" in xhtml
assert "<p>Some text.</p>" in xhtml
assert '<ac:image ac:align="center" ac:width="600">' in xhtml
assert '<ri:attachment ri:filename="diagram.png">' in xhtml
assert "<ac:caption><p>Architecture diagram</p></ac:caption>" in xhtml
assert "<p>More text after image.</p>" in xhtml
# frontmatter and page title should be excluded
assert "Page" not in xhtml
23 changes: 23 additions & 0 deletions confluence-mdx/tests/test_mdx_to_storage/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,29 @@ def test_parse_figure_block_and_img_attrs():
}


def test_parse_figure_with_caption():
text = """<figure>
<img src="/images/sample.png" alt="Sample" width="700" data-layout="center">
<figcaption>Caption <strong>text</strong></figcaption>
</figure>
"""
blocks = parse_mdx(text)
assert blocks[0].type == "figure"
assert blocks[0].attrs["caption"] == "Caption text"


def test_parse_figure_self_closing_img():
"""Self-closing <img ... /> tag should parse identically."""
text = """<figure>
<img src="/images/sample.png" alt="Sample" width="500" />
</figure>
"""
blocks = parse_mdx(text)
assert blocks[0].type == "figure"
assert blocks[0].attrs["src"] == "/images/sample.png"
assert blocks[0].attrs["width"] == "500"


def test_parse_paragraph_fallback():
blocks = parse_mdx("one\ntwo\n\n")
assert blocks[0].type == "paragraph"
Expand Down
Loading