New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

more gentle with non-http looking links #377

merged 1 commit into from Oct 10, 2018
Jump to file or symbol
Failed to load files and symbols.
+26 −6
Diff settings


Just for now

@@ -100,3 +100,18 @@ def test_linkify_urls_with_fragments(requestsmock):
) in html
def test_linkify_urls_not_http():
text = """
Email me
Or call me on tel:123456789
Or you can just go to
But SSH is better ssh://
Then open file:///tmp/foo.txt
html = utils.render_comment_text(text)
assert '<a href=""></a>' in html
assert " ftp://" in html # that it does not become a link.
assert " ssh://" in html # that it does not become a link.
assert " file://" in html # that it does not become a link.
@@ -80,21 +80,26 @@ def render_comment_text(text):
html = bleach.clean(text)
def custom_nofollow_maker(attrs, new=False):
href_key = (None, u"href")
href_key = (None, "href")
if href_key not in attrs:
return attrs
if attrs[href_key].startswith(u"mailto:"):
href = attrs[href_key]
if href.startswith("mailto:") or href.startswith("tel:"):
# Leave untouched
return attrs
if not (href.startswith("http:") or href.startswith("https:")):
# Bail if it's not a HTTP URL, such as ssh:// or ftp://
return None
p = urlparse(attrs[href_key])
p = urlparse(href)
if p.netloc not in settings.NOFOLLOW_EXCEPTIONS:
# Before we add the `rel="nofollow"` let's first check that this is a
# valid domain at all.
root_url = p.scheme + "://" + p.netloc
response = requests.head(root_url)
response = requests.head(root_url, timeout=5)
if response.status_code == 301:
redirect_p = urlparse(response.headers["location"])
# If the only difference is that it redirects to https instead
@@ -104,12 +109,12 @@ def custom_nofollow_maker(attrs, new=False):
and p.scheme == "http"
and p.netloc == redirect_p.netloc
attrs[href_key] = attrs[href_key].replace("http://", "https://")
attrs[href_key] = href.replace("http://", "https://")
except ConnectionError:
return None
rel_key = (None, u"rel")
rel_key = (None, "rel")
rel_values = [val for val in attrs.get(rel_key, "").split(" ") if val]
if "nofollow" not in [rel_val.lower() for rel_val in rel_values]:
ProTip! Use n and p to navigate between commits in a pull request.