Skip to content

Commit

Permalink
Extract title text also from img alt attribute
Browse files Browse the repository at this point in the history
  • Loading branch information
oprypin committed Jan 31, 2024
1 parent c529678 commit e1cbe5c
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 13 deletions.
19 changes: 17 additions & 2 deletions mkdocs/structure/pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import copy
import enum
import io
import logging
import posixpath
import warnings
Expand Down Expand Up @@ -548,6 +549,18 @@ def handle_starttag(self, tag: str, attrs: Sequence[tuple[str, str]]) -> None:
return super().handle_starttag(tag, attrs)


def _extract_text(el: etree.Element, buf: io.StringIO) -> None:
if text := el.get('alt'):
buf.write(text)
return
if text := el.text:
buf.write(text)
for child in el:
_extract_text(child, buf)
if text := child.tail:
buf.write(text)


class _ExtractTitleTreeprocessor(markdown.treeprocessors.Treeprocessor):
title: str | None = None
postprocessors: Sequence[markdown.postprocessors.Postprocessor] = ()
Expand All @@ -559,8 +572,10 @@ def run(self, root: etree.Element) -> etree.Element:
if len(el) > 0 and el[-1].tag == 'a' and not (el[-1].tail or '').strip():
el = copy.copy(el)
del el[-1]
# Extract the text only, recursively.
title = ''.join(el.itertext())
# Extract the text only.
buf = io.StringIO()
_extract_text(el, buf)
title = buf.getvalue()
# Unescape per Markdown implementation details.
for pp in self.postprocessors:
title = pp.run(title)
Expand Down
34 changes: 23 additions & 11 deletions mkdocs/tests/structure/page_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def test_page_title_from_markdown(self):
self.assertEqual(pg.parent, None)
self.assertEqual(pg.previous_page, None)
self.assertEqual(pg.title, 'Welcome to MkDocs')
pg.render(cfg, fl)
pg.render(cfg, Files([fl]))
self.assertEqual(pg.title, 'Welcome to MkDocs')

_SETEXT_CONTENT = dedent(
Expand All @@ -335,7 +335,7 @@ def test_page_title_from_setext_markdown(self, docs_dir):
self.assertIsNone(pg.title)
pg.read_source(cfg)
self.assertEqual(pg.title, 'Testing setext title')
pg.render(cfg, fl)
pg.render(cfg, Files([fl]))
self.assertEqual(pg.title, 'Welcome to MkDocs Setext')

@tempdir(files={'testing_setext_title.md': _SETEXT_CONTENT})
Expand All @@ -347,12 +347,12 @@ def test_page_title_from_markdown_stripped_anchorlinks(self, docs_dir):
fl = File('testing_setext_title.md', docs_dir, docs_dir, use_directory_urls=True)
pg = Page(None, fl, cfg)
pg.read_source(cfg)
pg.render(cfg, fl)
pg.render(cfg, Files([fl]))
self.assertEqual(pg.title, 'Welcome to MkDocs Setext')

_FORMATTING_CONTENT = dedent(
'''
# \\*Hello --- *beautiful* `world`
# \\*Hello --- *beautiful* `wor<dl>`
Hi.
'''
Expand All @@ -365,8 +365,8 @@ def test_page_title_from_markdown_strip_formatting(self, docs_dir):
fl = File('testing_formatting.md', docs_dir, docs_dir, use_directory_urls=True)
pg = Page(None, fl, cfg)
pg.read_source(cfg)
pg.render(cfg, fl)
self.assertEqual(pg.title, '*Hello — beautiful world')
pg.render(cfg, Files([fl]))
self.assertEqual(pg.title, '*Hello — beautiful wor<dl>')

_RAW_CONTENT = dedent(
'''
Expand All @@ -382,9 +382,21 @@ def test_page_title_from_markdown_strip_raw_tags(self, docs_dir):
fl = File('testing_raw_content.md', docs_dir, docs_dir, use_directory_urls=True)
pg = Page(None, fl, cfg)
pg.read_source(cfg)
pg.render(cfg, fl)
pg.render(cfg, Files([fl]))
self.assertEqual(pg.title, 'Hello world')

_IMAGE_CONTENT = '''# ![😄](hah.png)\n'''

@tempdir(files={'testing_image.md': _IMAGE_CONTENT, 'hah.png': ''})
def test_page_title_from_markdown_strip_image(self, docs_dir):
cfg = load_config()
fl = File('testing_image.md', docs_dir, docs_dir, use_directory_urls=True)
fl2 = File('hah.png', docs_dir, docs_dir, use_directory_urls=True)
pg = Page(None, fl, cfg)
pg.read_source(cfg)
pg.render(cfg, Files([fl, fl2]))
self.assertEqual(pg.title, '😄')

_ATTRLIST_CONTENT = dedent(
'''
# Welcome to MkDocs Attr { #welcome }
Expand All @@ -409,7 +421,7 @@ def test_page_title_from_markdown_preserved_attr_list(self, docs_dir):
fl = File('testing_attr_list.md', docs_dir, docs_dir, use_directory_urls=True)
pg = Page(None, fl, cfg)
pg.read_source(cfg)
pg.render(cfg, fl)
pg.render(cfg, Files([fl]))
self.assertEqual(pg.title, 'Welcome to MkDocs Attr { #welcome }')

def test_page_title_from_meta(self):
Expand All @@ -435,7 +447,7 @@ def test_page_title_from_meta(self):
self.assertEqual(pg.previous_page, None)
self.assertEqual(pg.title, 'A Page Title')
self.assertEqual(pg.toc, [])
pg.render(cfg, fl)
pg.render(cfg, Files([fl]))
self.assertEqual(pg.title, 'A Page Title')

def test_page_title_from_filename(self):
Expand All @@ -460,7 +472,7 @@ def test_page_title_from_filename(self):
self.assertEqual(pg.parent, None)
self.assertEqual(pg.previous_page, None)
self.assertEqual(pg.title, 'Page title')
pg.render(cfg, fl)
pg.render(cfg, Files([fl]))
self.assertEqual(pg.title, 'Page title')

def test_page_title_from_capitalized_filename(self):
Expand Down Expand Up @@ -721,7 +733,7 @@ def test_page_render(self):
pg.read_source(cfg)
self.assertEqual(pg.content, None)
self.assertEqual(pg.toc, [])
pg.render(cfg, [fl])
pg.render(cfg, Files([fl]))
self.assertTrue(
pg.content.startswith('<h1 id="welcome-to-mkdocs">Welcome to MkDocs</h1>\n')
)
Expand Down

0 comments on commit e1cbe5c

Please sign in to comment.