New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

work around https://github.com/mozilla/bleach/issues/374 #376

Merged
merged 1 commit into from Oct 10, 2018
Jump to file or symbol
Failed to load files and symbols.
+61 −44
Diff settings

Always

Just for now

@@ -78,3 +78,25 @@ def test_linkify_only_valid_domains(requestsmock):
assert '<a href="http://google.com" rel="nofollow">google.com</a>' in html
# Did dare with this one.
assert '<a href="https://github.com" rel="nofollow">github.com</a>' in html
def test_linkify_urls_with_ampersands(requestsmock):
requestsmock.head("https://www.youtobe.com", text="Works", status_code=200)
text = "link: https://www.youtobe.com/watch?v=2rGuXYAQb8s&feature=share"
html = utils.render_comment_text(text)
assert (
'<a href="https://www.youtobe.com/watch?v=2rGuXYAQb8s&amp;feature=share" '
'rel="nofollow">https://www.youtobe.com/watch?v=2rGuXYAQb8s&amp;feature=share'
"</a>"
) in html
def test_linkify_urls_with_fragments(requestsmock):
requestsmock.head("https://www.youtobe.com", text="Works", status_code=200)
text = "link: https://www.youtobe.com/watch#anchor"
html = utils.render_comment_text(text)
assert (
'<a href="https://www.youtobe.com/watch#anchor" '
'rel="nofollow">https://www.youtobe.com/watch#anchor'
"</a>"
) in html
View
@@ -8,8 +8,7 @@
from urllib.parse import urlencode, urlparse
from html import escape
from bleach.sanitizer import Cleaner
from bleach.linkifier import LinkifyFilter
import bleach
import requests
from requests.exceptions import ConnectionError
import zope.structuredtext
@@ -76,53 +75,49 @@ def valid_email(value):
whitespace_start_regex = re.compile(r"^\n*(\s+)", re.M)
def custom_nofollow_maker(attrs, new=False):
href_key = (None, u"href")
def render_comment_text(text):
if href_key not in attrs:
return attrs
html = bleach.clean(text)
if attrs[href_key].startswith(u"mailto:"):
return attrs
def custom_nofollow_maker(attrs, new=False):
href_key = (None, u"href")
p = urlparse(attrs[href_key])
if p.netloc not in settings.NOFOLLOW_EXCEPTIONS:
# Before we add the `rel="nofollow"` let's first check that this is a
# valid domain at all.
root_url = p.scheme + "://" + p.netloc
try:
response = requests.head(root_url)
if response.status_code == 301:
redirect_p = urlparse(response.headers["location"])
# If the only difference is that it redirects to https instead
# of http, then amend the href.
if (
redirect_p.scheme == "https"
and p.scheme == "http"
and p.netloc == redirect_p.netloc
):
attrs[href_key] = attrs[href_key].replace("http://", "https://")
except ConnectionError:
return None
rel_key = (None, u"rel")
rel_values = [val for val in attrs.get(rel_key, "").split(" ") if val]
if "nofollow" not in [rel_val.lower() for rel_val in rel_values]:
rel_values.append("nofollow")
attrs[rel_key] = " ".join(rel_values)
return attrs
cleaner = Cleaner(
tags=[],
filters=[functools.partial(LinkifyFilter, callbacks=[custom_nofollow_maker])],
)
if href_key not in attrs:
return attrs
if attrs[href_key].startswith(u"mailto:"):
return attrs
def render_comment_text(text):
html = cleaner.clean(text)
p = urlparse(attrs[href_key])
if p.netloc not in settings.NOFOLLOW_EXCEPTIONS:
# Before we add the `rel="nofollow"` let's first check that this is a
# valid domain at all.
root_url = p.scheme + "://" + p.netloc
try:
response = requests.head(root_url)
if response.status_code == 301:
redirect_p = urlparse(response.headers["location"])
# If the only difference is that it redirects to https instead
# of http, then amend the href.
if (
redirect_p.scheme == "https"
and p.scheme == "http"
and p.netloc == redirect_p.netloc
):
attrs[href_key] = attrs[href_key].replace("http://", "https://")
except ConnectionError:
return None
rel_key = (None, u"rel")
rel_values = [val for val in attrs.get(rel_key, "").split(" ") if val]
if "nofollow" not in [rel_val.lower() for rel_val in rel_values]:
rel_values.append("nofollow")
attrs[rel_key] = " ".join(rel_values)
return attrs
html = bleach.linkify(html, callbacks=[custom_nofollow_maker])
# So you can write comments with code with left indentation whitespace
def subber(m):
ProTip! Use n and p to navigate between commits in a pull request.