Skip to content

Commit

Permalink
BUG: Better handle raw URLs and markdown links (#203)
Browse files Browse the repository at this point in the history
* Display link text correctly

In generated documentation the link text placed into [] is not followed
by left angle brakcet and it redirects to the given website. Moreover,
improved regular expression enables displaying url given in parentheses
or url containing them, i.e.:

(https://example.com)
https://example.com/more_(examples)

* match code span fences or arbitrary length

* some touch-ups
  • Loading branch information
ButterflyBug committed Jun 26, 2020
1 parent 7f52493 commit 1de51a9
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 14 deletions.
20 changes: 18 additions & 2 deletions pdoc/html_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,25 @@ def doctests(text):
@staticmethod
def raw_urls(text):
"""Wrap URLs in Python-Markdown-compatible <angle brackets>."""
pattern = re.compile(r"""
(?P<code_span> # matches whole code span
(?<!`)(?P<fence>`+)(?!`) # a string of backticks
.*?
(?<!`)(?P=fence)(?!`))
|
(?P<markdown_link>\[.*?\]\(.*\)) # matches whole inline link
|
(?<![<\"\']) # does not start with <, ", '
(?P<url>(?:http|ftp)s?:// # url with protocol
[^>\s()]+ # url part before any (, )
(?:\([^>\s)]*\))* # optionally url part within parentheses
[^>\s)]* # url part after any )
)""", re.VERBOSE)

with _fenced_code_blocks_hidden(text) as result:
result[0] = re.sub(r'(?<![<"\'])(\s*)((?:http|ftp)s?://[^>)\s]+)(\s*)',
r'\1<\2>\3', result[0])
result[0] = pattern.sub(
lambda m: ('<' + m.group('url') + '>') if m.group('url') else m.group(),
result[0])
text = result[0]
return text

Expand Down
43 changes: 31 additions & 12 deletions pdoc/test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1363,29 +1363,48 @@ def test_reST_include(self):

def test_urls(self):
text = """Beautiful Soup
http://www.foo.bar
http://www.foo.bar?q="foo"
<a href="https://travis-ci.org/cs01/pygdbmi"><img src="https://foo" /></a>
<https://foo.bar>
Work [like this](http://foo/) and [like that].
[like that]: ftp://bar
data:text/plain;base64,SGVsbG8sIFdvcmxkIQ%3D%3D
```
http://url.com
```"""
```
[https://google.com](https://google.com)
[https://en.wikipedia.org/wiki/Orange_(software)](https://en.wikipedia.org/wiki/Orange_(software))
[Check https://google.com here](https://google.com)
`https://google.com`
http://www.foo.bar
http://www.foo.bar?q="foo"
https://en.wikipedia.org/wiki/Orange_(software)
(https://google.com)
(http://foo and http://bar)
text ``x ` http://foo`` http://bar `http://foo`
"""

expected = """<p>Beautiful Soup
<a href="http://www.foo.bar">http://www.foo.bar</a>
<a href="http://www.foo.bar?q=&quot;foo&quot;">http://www.foo.bar?q="foo"</a>
<a href="https://travis-ci.org/cs01/pygdbmi"><img src="https://foo" /></a>
<a href="https://foo.bar">https://foo.bar</a></p>
<p>Work <a href="http://foo/">like this</a> and <a href="ftp://bar">like that</a>.</p>
<a href="https://foo.bar">https://foo.bar</a>
Work <a href="http://foo/">like this</a> and <a href="ftp://bar">like that</a>.</p>
<p>data:text/plain;base64,SGVsbG8sIFdvcmxkIQ%3D%3D</p>
<pre><code>http://url.com
</code></pre>"""
</code></pre>
<p><a href="https://google.com">https://google.com</a>
<a href="https://en.wikipedia.org/wiki/Orange_(software)">\
https://en.wikipedia.org/wiki/Orange_(software)</a>
<a href="https://google.com">Check https://google.com here</a>
<code>https://google.com</code></p>
<p><a href="http://www.foo.bar">http://www.foo.bar</a>
<a href="http://www.foo.bar?q=&quot;foo&quot;">http://www.foo.bar?q="foo"</a>
<a href="https://en.wikipedia.org/wiki/Orange_(software)">\
https://en.wikipedia.org/wiki/Orange_(software)</a>
(<a href="https://google.com">https://google.com</a>)
(<a href="http://foo">http://foo</a> and <a href="http://bar">http://bar</a>)
text <code>x ` http://foo</code> <a href="http://bar">http://bar</a> <code>http://foo</code></p>"""

html = to_html(text)
self.assertEqual(html, expected)

Expand Down

0 comments on commit 1de51a9

Please sign in to comment.