Skip to content

Commit

Permalink
Fix unescaping of Markdown titles
Browse files Browse the repository at this point in the history
Currently this is a regression in unreleased code. Extensions such as 'inlinehilite' and 'smarty' were pasting `wzxhzdk:1` due to their use of htmlStash
  • Loading branch information
oprypin committed Jul 18, 2023
1 parent d8bc90f commit c3d4ff7
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 13 deletions.
22 changes: 11 additions & 11 deletions mkdocs/structure/pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
import logging
import posixpath
import warnings
from typing import TYPE_CHECKING, Any, Callable, Iterator, MutableMapping
from typing import TYPE_CHECKING, Any, Callable, Iterator, MutableMapping, Sequence
from urllib.parse import unquote as urlunquote
from urllib.parse import urljoin, urlsplit, urlunsplit

import markdown
import markdown.extensions
import markdown.postprocessors
import markdown.treeprocessors
from markdown.util import AMP_SUBSTITUTE

Expand All @@ -22,16 +20,12 @@
if TYPE_CHECKING:
from xml.etree import ElementTree as etree

import markdown.postprocessors

from mkdocs.config.defaults import MkDocsConfig
from mkdocs.structure.files import File, Files
from mkdocs.structure.toc import TableOfContents

_unescape: Callable[[str], str]
try:
_unescape = markdown.treeprocessors.UnescapeTreeprocessor().unescape # type: ignore
except AttributeError:
_unescape = markdown.postprocessors.UnescapePostprocessor().run


log = logging.getLogger(__name__)

Expand Down Expand Up @@ -437,6 +431,7 @@ def _register(self, md: markdown.Markdown) -> None:

class _ExtractTitleTreeprocessor(markdown.treeprocessors.Treeprocessor):
title: str | None = None
postprocessors: Sequence[markdown.postprocessors.Postprocessor] = ()

def run(self, root: etree.Element) -> etree.Element:
for el in root:
Expand All @@ -446,13 +441,18 @@ def run(self, root: etree.Element) -> etree.Element:
el = copy.copy(el)
del el[-1]
# Extract the text only, recursively.
self.title = _unescape(''.join(el.itertext()))
title = ''.join(el.itertext())
# Unescape per Markdown implementation details.
for pp in self.postprocessors:
title = pp.run(title)
self.title = title
break
return root

def _register(self, md: markdown.Markdown) -> None:
self.postprocessors = tuple(md.postprocessors)
md.treeprocessors.register(
self,
"mkdocs_extract_title",
priority=1, # Close to the end.
priority=-1, # After the end.
)
5 changes: 3 additions & 2 deletions mkdocs/tests/structure/page_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def test_page_title_from_markdown_stripped_anchorlinks(self, docs_dir):

_FORMATTING_CONTENT = dedent(
'''
# Hello *beautiful* `world`
# \\*Hello --- *beautiful* `world`
Hi.
'''
Expand All @@ -361,11 +361,12 @@ def test_page_title_from_markdown_stripped_anchorlinks(self, docs_dir):
@tempdir(files={'testing_formatting.md': _FORMATTING_CONTENT})
def test_page_title_from_markdown_strip_formatting(self, docs_dir):
cfg = load_config()
cfg.markdown_extensions.append('smarty')
fl = File('testing_formatting.md', docs_dir, docs_dir, use_directory_urls=True)
pg = Page(None, fl, cfg)
pg.read_source(cfg)
pg.render(cfg, fl)
self.assertEqual(pg.title, 'Hello beautiful world')
self.assertEqual(pg.title, '*Hello — beautiful world')

_ATTRLIST_CONTENT = dedent(
'''
Expand Down

0 comments on commit c3d4ff7

Please sign in to comment.