Skip to content

Commit

Permalink
fixup! feat: Add option to scan and register HTML anchors
Browse files Browse the repository at this point in the history
  • Loading branch information
oprypin committed Feb 23, 2024
1 parent 5eea2e0 commit 2fc4e3c
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 45 deletions.
3 changes: 1 addition & 2 deletions mkdocs.yml
Expand Up @@ -95,8 +95,7 @@ markdown_extensions:
permalink: "¤"

plugins:
- autorefs:
scan_anchors: true
- autorefs
- search
- markdown-exec
- gen-files:
Expand Down
11 changes: 1 addition & 10 deletions src/mkdocs_autorefs/plugin.py
Expand Up @@ -19,8 +19,6 @@
from urllib.parse import urlsplit

from markdown.extensions.attr_list import AttrListExtension
from mkdocs.config.base import Config
from mkdocs.config.config_options import Type
from mkdocs.config.defaults import MkDocsConfig
from mkdocs.plugins import BasePlugin
from mkdocs.structure.pages import Page
Expand All @@ -41,14 +39,7 @@
log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment]


class AutorefsConfig(Config):
"""Configuration options for the Autorefs plugin."""

scan_anchors = Type(bool, default=False)
"""Whether to scan HTML pages for anchors defining references."""


class AutorefsPlugin(BasePlugin[AutorefsConfig]):
class AutorefsPlugin(BasePlugin):
"""An `mkdocs` plugin.
This plugin defines the following event hooks:
Expand Down
68 changes: 42 additions & 26 deletions src/mkdocs_autorefs/references.py
Expand Up @@ -4,7 +4,6 @@

import re
from html import escape, unescape
from itertools import zip_longest
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match, Tuple
from urllib.parse import urlsplit
from xml.etree.ElementTree import Element
Expand Down Expand Up @@ -215,40 +214,57 @@ def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None:
"""
super().__init__(md)
self.plugin = plugin
self._slug = md.treeprocessors["toc"].slugify

def run(self, root: Element) -> None: # noqa: D102
if self.plugin.current_page is not None:
self._scan_anchors(root)
pending_anchors = _PendingAnchors(self.plugin, self.plugin.current_page)
self._scan_anchors(root, pending_anchors)
pending_anchors.flush()

def _scan_anchors(self, parent: Element) -> list[str]:
ids = []
# We iterate on pairs of elements, to check if the next element is a heading (alias feature).
for el, next_el in zip_longest(parent, parent[1:], fillvalue=Element("/")):
def _scan_anchors(self, parent: Element, pending_anchors: _PendingAnchors) -> None:
for el in parent:
if el.tag == "a":
# We found an anchor. Record its id if it has one.
if anchor_id := el.get("id"):
if el.tail and el.tail.strip():
# If the anchor has a non-whitespace-only tail, it's not an alias:
# register it immediately.
self.plugin.register_anchor(self.plugin.current_page, anchor_id) # type: ignore[arg-type]
else:
# Else record its id and continue.
ids.append(anchor_id)
pending_anchors.append(anchor_id)
# Non-whitespace text after the element interrupts the chain, aliases can't apply.
if el.tail and el.tail.strip():
pending_anchors.flush()

elif el.tag == "p":
if ids := self._scan_anchors(el):
# Markdown anchors are always rendered as `a` tags within a `p` tag.
# Headings therefore appear after the `p` tag. Here the current element
# is a `p` tag and it contains at least one anchor with an id.
# We can check if the next element is a heading, and use its id as href.
href = (next_el.get("id") or self._slug(next_el.text or "")) if next_el.tag in self._htags else ""
for anchor_id in ids:
self.plugin.register_anchor(self.plugin.current_page, anchor_id, href) # type: ignore[arg-type]
ids.clear()
# A `p` tag is a no-op for our purposes, just recurse into it in the context
# of the current collection of anchors.
self._scan_anchors(el, pending_anchors)
# Non-whitespace text after the element interrupts the chain, aliases can't apply.
if el.tail and el.tail.strip():
pending_anchors.flush()

elif el.tag in self._htags:
# If the element is a heading, that turns the pending anchors into aliases.
pending_anchors.flush(el.get("id"))

else:
# Recurse into sub-elements.
ids = self._scan_anchors(el)
return ids
# But if it's some other interruption, flush anchors anyway as non-aliases.
pending_anchors.flush()
# Recurse into sub-elements, in a *separate* context.
self.run(el)


class _PendingAnchors:
"""A collection of HTML anchors that may or may not become aliased to an upcoming heading."""

def __init__(self, plugin: AutorefsPlugin, current_page: str):
self.plugin = plugin
self.current_page = current_page
self.anchors: list[str] = []

def append(self, anchor: str) -> None:
self.anchors.append(anchor)

def flush(self, alias_to: str | None = None) -> None:
for anchor in self.anchors:
self.plugin.register_anchor(self.current_page, anchor, alias_to)
self.anchors.clear()


class AutorefsExtension(Extension):
Expand Down
26 changes: 19 additions & 7 deletions tests/test_references.py
Expand Up @@ -233,7 +233,7 @@ def test_register_markdown_anchors() -> None:
"""Check that Markdown anchors are registered when enabled."""
plugin = AutorefsPlugin()
md = markdown.Markdown(extensions=["attr_list", "toc", AutorefsExtension(plugin)])
plugin.current_page = ""
plugin.current_page = "page"
md.convert(
dedent(
"""
Expand All @@ -253,12 +253,24 @@ def test_register_markdown_anchors() -> None:
Text.
[](){#alias4}
## Heading baz
[](){#alias5}
[](){#alias6}
Decoy.
## Heading more
[](){#alias7}
""",
),
)
assert plugin._url_map["foo"] == "#heading-foo"
assert plugin._url_map["bar"] == "#bar"
assert plugin._url_map["alias1"] == "#heading-bar"
assert plugin._url_map["alias2"] == "#heading-bar"
assert plugin._url_map["alias3"] == "#alias3"
assert plugin._url_map["alias4"] == "#heading-baz"
assert plugin._url_map == {
"foo": "page#heading-foo",
"bar": "page#bar",
"alias1": "page#heading-bar",
"alias2": "page#heading-bar",
"alias3": "page#alias3",
"alias4": "page#heading-baz",
"alias5": "page#alias5",
"alias6": "page#alias6",
"alias7": "page#alias7",
}

0 comments on commit 2fc4e3c

Please sign in to comment.