Skip to content

Commit

Permalink
Pick up anchors from raw HTML - WITHOUT PROPOSAL
Browse files Browse the repository at this point in the history
  • Loading branch information
oprypin committed Nov 13, 2023
1 parent 04cca3b commit cf17645
Showing 1 changed file with 49 additions and 1 deletion.
50 changes: 49 additions & 1 deletion mkdocs/structure/pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
from urllib.parse import urljoin, urlsplit, urlunsplit

import markdown
import markdown.htmlparser
import markdown.postprocessors
import markdown.treeprocessors
from markdown.extensions import md_in_html
from markdown.util import AMP_SUBSTITUTE

from mkdocs import utils
Expand Down Expand Up @@ -262,6 +264,14 @@ def render(self, config: MkDocsConfig, files: Files) -> None:
extension_configs=config['mdx_configs'] or {},
)

if type(md.preprocessors['html_block']) is markdown.preprocessors.HtmlBlockPreprocessor:
raw_html_processor = HtmlBlockPreprocessorModified(md)
elif type(md.preprocessors['html_block']) is md_in_html.HtmlBlockPreprocessor:
raw_html_processor = HtmlBlockPreprocessorExtraModified(md)
else:
raise RuntimeError("The user added an extension that replaced 'html_block'.")
md.preprocessors.register(raw_html_processor, 'html_block', 20)

relative_path_ext = _RelativePathTreeprocessor(self.file, files, config)
relative_path_ext._register(md)

Expand All @@ -271,7 +281,9 @@ def render(self, config: MkDocsConfig, files: Files) -> None:
self.content = md.convert(self.markdown)
self.toc = get_toc(getattr(md, 'toc_tokens', []))
self._title_from_render = extract_title_ext.title
self.present_anchor_ids = relative_path_ext.present_anchor_ids
self.present_anchor_ids = (
relative_path_ext.present_anchor_ids | raw_html_processor.present_anchor_ids
)
if log.getEffectiveLevel() > logging.DEBUG:
self.links_to_anchors = relative_path_ext.links_to_anchors

Expand Down Expand Up @@ -483,6 +495,42 @@ def _register(self, md: markdown.Markdown) -> None:
md.treeprocessors.register(self, "relpath", 0)


class HTMLExtractorModified(markdown.htmlparser.HTMLExtractor):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.present_anchor_ids: set[str] = set()

def handle_starttag(self, tag, attrs) -> None:
for k, v in attrs:
if k == 'id':
self.present_anchor_ids.add(v)
return super().handle_starttag(tag, attrs)


class HTMLExtractorExtraModified(HTMLExtractorModified, md_in_html.HTMLExtractorExtra):
pass


class HtmlBlockPreprocessorModified(markdown.preprocessors.Preprocessor):
HTMLExtractorClass = HTMLExtractorModified

def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.present_anchor_ids: set[str] = set()

def run(self, lines: list[str]) -> list[str]:
source = '\n'.join(lines)
parser = self.HTMLExtractorClass(self.md)
parser.feed(source)
parser.close()
self.present_anchor_ids = parser.present_anchor_ids
return ''.join(parser.cleandoc).split('\n')


class HtmlBlockPreprocessorExtraModified(HtmlBlockPreprocessorModified):
HTMLExtractorClass = HTMLExtractorExtraModified


class _ExtractTitleTreeprocessor(markdown.treeprocessors.Treeprocessor):
title: str | None = None
postprocessors: Sequence[markdown.postprocessors.Postprocessor] = ()
Expand Down

0 comments on commit cf17645

Please sign in to comment.