New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

more gentle with non-http looking links #377

Merged
merged 1 commit into from Oct 10, 2018
Jump to file or symbol
Failed to load files and symbols.
+26 −6
Diff settings

Always

Just for now

@@ -100,3 +100,18 @@ def test_linkify_urls_with_fragments(requestsmock):
'rel="nofollow">https://www.youtobe.com/watch#anchor'
"</a>"
) in html
def test_linkify_urls_not_http():
text = """
Email me mailto:mail@example.com
Or call me on tel:123456789
Or you can just go to ftp://archive.example.com
But SSH is better ssh://root@git.example.com
Then open file:///tmp/foo.txt
"""
html = utils.render_comment_text(text)
assert '<a href="mailto:mail@example.com">mailto:mail@example.com</a>' in html
assert " ftp://" in html # that it does not become a link.
assert " ssh://" in html # that it does not become a link.
assert " file://" in html # that it does not become a link.
View
@@ -80,21 +80,26 @@ def render_comment_text(text):
html = bleach.clean(text)
def custom_nofollow_maker(attrs, new=False):
href_key = (None, u"href")
href_key = (None, "href")
if href_key not in attrs:
return attrs
if attrs[href_key].startswith(u"mailto:"):
href = attrs[href_key]
if href.startswith("mailto:") or href.startswith("tel:"):
# Leave untouched
return attrs
if not (href.startswith("http:") or href.startswith("https:")):
# Bail if it's not a HTTP URL, such as ssh:// or ftp://
return None
p = urlparse(attrs[href_key])
p = urlparse(href)
if p.netloc not in settings.NOFOLLOW_EXCEPTIONS:
# Before we add the `rel="nofollow"` let's first check that this is a
# valid domain at all.
root_url = p.scheme + "://" + p.netloc
try:
response = requests.head(root_url)
response = requests.head(root_url, timeout=5)
if response.status_code == 301:
redirect_p = urlparse(response.headers["location"])
# If the only difference is that it redirects to https instead
@@ -104,12 +109,12 @@ def custom_nofollow_maker(attrs, new=False):
and p.scheme == "http"
and p.netloc == redirect_p.netloc
):
attrs[href_key] = attrs[href_key].replace("http://", "https://")
attrs[href_key] = href.replace("http://", "https://")
except ConnectionError:
return None
rel_key = (None, u"rel")
rel_key = (None, "rel")
rel_values = [val for val in attrs.get(rel_key, "").split(" ") if val]
if "nofollow" not in [rel_val.lower() for rel_val in rel_values]:
rel_values.append("nofollow")
ProTip! Use n and p to navigate between commits in a pull request.