, , ) is escaped & logged.
+ """
+ out, last = [], 0
+ for match in _HTML_TAG_RE.finditer(text):
+ # escape plain text before the tag-like match
+ out.append(_escape_angles(text[last:match.start()]))
+
+ tag_full = match.group(0) # matched groups from regex
+ tag_name = match.group(1).lower() if match.group(1) else "" # to check if it isn't whitelisted e.g.
+
+ if tag_name in _WHITELIST_TAGS:
+ out.append(tag_full) # keep
/
/
+ else:
+ _log_tag_escape(tag_full)
+ out.append(_escape_angles(tag_full)) # escape non-whitelisted tag-like text
+
+ last = match.end()
+
+ out.append(_escape_angles(text[last:]))
+ return "".join(out)
+
+def _escape_outside_inline_code(text: str) -> str:
+ """Within non-fenced areas, escape outside inline code spans."""
+ out, last = [], 0
+ for match in _INLINE_CODE_RE.finditer(text):
+ # fix headings in the plain-text slice, then escape angles (keeping
)
+ chunk = text[last:match.start()]
+ chunk = _HEADING_SPACE_RE.sub(r"\1 ", chunk)
+ out.append(_escape_preserving_hr_only(chunk))
+ out.append(match.group(0)) # keep inline code as-is
+ last = match.end()
+ # tail
+ chunk = text[last:]
+ chunk = _HEADING_SPACE_RE.sub(r"\1 ", chunk)
+ out.append(_escape_preserving_hr_only(chunk))
+ return "".join(out)
+
+def escape_angle_brackets(markdown: str) -> str:
+ """Escape < and > everywhere except inside fenced/inline code; keep only
."""
+ # Protect existing entities so we don't double-escape them
+ LT, GT = "\x00LT\x00", "\x00GT\x00"
+ markdown = markdown.replace("<", LT).replace(">", GT)
+
+ out, last = [], 0
+ for match in _FENCE_RE.finditer(markdown):
+ out.append(_escape_outside_inline_code(markdown[last:match.start()])) # non-fenced
+ out.append(match.group(0)) # keep fenced code intact
+ last = match.end()
+ out.append(_escape_outside_inline_code(markdown[last:]))
+
+ result = "".join(out)
+ return result.replace(LT, "<").replace(GT, ">")
+
# ============================================================================
# Conversion helpers
# ============================================================================
def mdx_heading(entry: Dict[str, Any]) -> str:
- """Create a level‑2 MDX heading from an API entry."""
+ """Create a level-2 MDX heading from an API entry."""
date_str = datetime.strptime(entry["buildDate"], API_DATE_FMT).strftime("%Y/%m/%d")
return f"## {entry['version']} - {date_str}\n\n"
-
def mdx_block(entry: Dict[str, Any]) -> str:
"""Full MDX chunk for a single changelog entry (heading + body)."""
- return mdx_heading(entry) + entry["changelogMarkdown"]
+ safe_body = escape_angle_brackets(entry["changelogMarkdown"])
+ return mdx_heading(entry) + safe_body + "\n\n"
# ============================================================================
# Filesystem helpers
# ============================================================================
def output_path_for(branch: str, is_primary: bool) -> Path:
- """Return where the *whats‑new.mdx* for *branch* should live."""
+ """Return where the *whats-new.mdx* for *branch* should live."""
# We only need major.minor for the directory name – e.g. "6.2.1" → "6.2"
major_minor = ".".join(branch.split(".")[:2])
@@ -142,7 +220,6 @@ def output_path_for(branch: str, is_primary: bool) -> Path:
return PROJECT_ROOT / "versioned_docs" / f"version-{major_minor}" / "whats-new.mdx"
-
def write_whats_new_file(destination: Path, entries: List[Dict[str, Any]]) -> None:
"""Write an MDX file sorted by *buildDate* (newest first)."""
destination.parent.mkdir(parents=True, exist_ok=True)
@@ -156,7 +233,7 @@ def write_whats_new_file(destination: Path, entries: List[Dict[str, Any]]) -> No
destination.write_text(FRONT_MATTER + body, encoding="utf-8")
# ============================================================================
-# Command‑line interface
+# Command-line interface
# ============================================================================
def main() -> None:
@@ -168,14 +245,23 @@ def main() -> None:
requested_branches = sys.argv[1:]
for branch in requested_branches:
+ # reset log for this branch
+ _ESCAPED_TAG_EVENTS.clear()
+
is_primary = branch == primary_branch
changelog_entries = fetch_branch_entries(branch)
target_file = output_path_for(branch, is_primary)
write_whats_new_file(target_file, changelog_entries)
+
print(f"✅ Wrote {target_file.relative_to(PROJECT_ROOT)}")
- print("🏁 Finished.")
+ if _ESCAPED_TAG_EVENTS:
+ counts = Counter(_ESCAPED_TAG_EVENTS)
+ # print a concise per-branch summary for safe escapes
+ summary = ", ".join(f"{tag}×{n}" for tag, n in counts.most_common())
+ print(f" • Escaped non-whitelisted tag-like snippets: {summary}")
+ print("🏁 Finished.")
if __name__ == "__main__":
- main()
\ No newline at end of file
+ main()