Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Close #9016: linkcheck builder failed to check the anchors of github.com #9260

Merged
merged 4 commits into from Jun 3, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES
Expand Up @@ -42,6 +42,9 @@ Features added
text
* #9176: i18n: Emit a debug message if message catalog file not found under
:confval:`locale_dirs`
* #9016: linkcheck: failed to check the anchor of github.com
* #9016: linkcheck: Add a new event :event:`linkcheck-process-uri` to modify
URIs before checking hyperlinks
tk0miya marked this conversation as resolved.
Show resolved Hide resolved
* #1874: py domain: Support union types using ``|`` in info-field-list
* #9097: Optimize the paralell build
* #9131: Add :confval:`nitpick_ignore_regex` to ignore nitpicky warnings using
Expand Down
8 changes: 8 additions & 0 deletions doc/extdev/appapi.rst
Expand Up @@ -384,6 +384,14 @@ Here is a more detailed list of these events.
.. versionchanged:: 1.3
The return value can now specify a template name.

.. event:: linkcheck-process-uri (app, uri)

Emitted when the linkcheck builder collects hyperlinks from document. *uri*
is a collected URI. The event handlers can modify the URI by returning a
string.

.. versionadded:: 4.1

.. event:: build-finished (app, exception)

Emitted when a build has finished, before Sphinx exits, usually used for
Expand Down
28 changes: 28 additions & 0 deletions sphinx/builders/linkcheck.py
Expand Up @@ -627,6 +627,10 @@ def run(self, **kwargs: Any) -> None:
if 'refuri' not in refnode:
continue
uri = refnode['refuri']
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri

lineno = get_node_line(refnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
Expand All @@ -636,12 +640,33 @@ def run(self, **kwargs: Any) -> None:
for imgnode in self.document.traverse(nodes.image):
uri = imgnode['candidates'].get('?')
if uri and '://' in uri:
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri

tk0miya marked this conversation as resolved.
Show resolved Hide resolved
lineno = get_node_line(imgnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
hyperlinks[uri] = uri_info


def rewrite_github_anchor(app: Sphinx, uri: str) -> Optional[str]:
"""Rewrite anchor name of the hyperlink to github.com

The hyperlink anchors in github.com are dynamically generated. This rewrites
them before checking and makes them comparable.
"""
if re.search('://github.com/', uri) and '#' in uri:
baseuri, anchor = uri.split('#', 1)
if anchor.startswith('user-content-'):
# Ignored when URI is already prefixed.
return None
else:
return f'{baseuri}#user-content-{anchor}'
else:
return None
tk0miya marked this conversation as resolved.
Show resolved Hide resolved


def setup(app: Sphinx) -> Dict[str, Any]:
app.add_builder(CheckExternalLinksBuilder)
app.add_post_transform(HyperlinkCollector)
Expand All @@ -658,6 +683,9 @@ def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value('linkcheck_anchors_ignore', ["^!"], None)
app.add_config_value('linkcheck_rate_limit_timeout', 300.0, None)

app.add_event('linkcheck-process-uri')
app.connect('linkcheck-process-uri', rewrite_github_anchor)

return {
'version': 'builtin',
'parallel_read_safe': True,
Expand Down
2 changes: 2 additions & 0 deletions tests/roots/test-linkcheck/links.txt
Expand Up @@ -13,6 +13,8 @@ Some additional anchors to exercise ignore code
* `Complete nonsense <https://localhost:7777/doesnotexist>`_
* `Example valid local file <conf.py>`_
* `Example invalid local file <path/to/notfound>`_
* https://github.com/sphinx-doc/sphinx#documentation
* https://github.com/sphinx-doc/sphinx#user-content-testing

.. image:: https://www.google.com/image.png
.. figure:: https://www.google.com/image2.png
10 changes: 7 additions & 3 deletions tests/test_build_linkcheck.py
Expand Up @@ -65,8 +65,8 @@ def test_defaults_json(app):
"info"]:
assert attr in row

assert len(content.splitlines()) == 10
assert len(rows) == 10
assert len(content.splitlines()) == 12
assert len(rows) == 12
# the output order of the rows is not stable
# due to possible variance in network latency
rowsby = {row["uri"]: row for row in rows}
Expand All @@ -87,7 +87,7 @@ def test_defaults_json(app):
assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
assert rowsby['https://www.google.com/image2.png'] == {
'filename': 'links.txt',
'lineno': 18,
'lineno': 20,
'status': 'broken',
'code': 0,
'uri': 'https://www.google.com/image2.png',
Expand All @@ -101,6 +101,10 @@ def test_defaults_json(app):
# images should fail
assert "Not Found for url: https://www.google.com/image.png" in \
rowsby["https://www.google.com/image.png"]["info"]
# The anchor of the URI for github.com is automatically modified
assert 'https://github.com/sphinx-doc/sphinx#documentation' not in rowsby
assert 'https://github.com/sphinx-doc/sphinx#user-content-documentation' in rowsby
assert 'https://github.com/sphinx-doc/sphinx#user-content-testing' in rowsby


@pytest.mark.sphinx(
Expand Down