Skip to content

Commit

Permalink
sanitizer: escape HTML comments
Browse files Browse the repository at this point in the history
fixes: bug 1689399 / GHSA vv2x-vrpj-qqpq
  • Loading branch information
Greg Guthe committed Feb 1, 2021
1 parent c045a8b commit 1334134
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 0 deletions.
1 change: 1 addition & 0 deletions bleach/html5lib_shim.py
Expand Up @@ -48,6 +48,7 @@
HTMLInputStream,
) # noqa: E402 module level import not at top of file
from bleach._vendor.html5lib.serializer import (
escape,
HTMLSerializer,
) # noqa: E402 module level import not at top of file
from bleach._vendor.html5lib._tokenizer import (
Expand Down
4 changes: 4 additions & 0 deletions bleach/sanitizer.py
Expand Up @@ -371,6 +371,10 @@ def sanitize_token(self, token):

elif token_type == "Comment":
if not self.strip_html_comments:
# call lxml.sax.saxutils to escape &, <, and > in addition to " and '
token["data"] = html5lib_shim.escape(
token["data"], entities={'"': "&quot;", "'": "&#x27;"}
)
return token
else:
return None
Expand Down
47 changes: 47 additions & 0 deletions tests/test_clean.py
Expand Up @@ -739,6 +739,53 @@ def test_namespace_rc_data_element_strip_false(
)


@pytest.mark.parametrize(
"namespace_tag, end_tag, data, expected",
[
(
"math",
"p",
"<math></p><style><!--</style><img src/onerror=alert(1)>",
"<math><p></p><style><!--&lt;/style&gt;&lt;img src/onerror=alert(1)&gt;--></style></math>",
),
(
"math",
"br",
"<math></br><style><!--</style><img src/onerror=alert(1)>",
"<math><br><style><!--&lt;/style&gt;&lt;img src/onerror=alert(1)&gt;--></style></math>",
),
(
"svg",
"p",
"<svg></p><style><!--</style><img src/onerror=alert(1)>",
"<svg><p></p><style><!--&lt;/style&gt;&lt;img src/onerror=alert(1)&gt;--></style></svg>",
),
(
"svg",
"br",
"<svg></br><style><!--</style><img src/onerror=alert(1)>",
"<svg><br><style><!--&lt;/style&gt;&lt;img src/onerror=alert(1)&gt;--></style></svg>",
),
],
)
def test_html_comments_escaped(namespace_tag, end_tag, data, expected):
# refs: bug 1689399 / GHSA-vv2x-vrpj-qqpq
#
# p and br can be just an end tag (e.g. </p> == <p></p>)
#
# In browsers:
#
# * img and other tags break out of the svg or math namespace (e.g. <svg><img></svg> == <svg><img></svg>)
# * style does not (e.g. <svg><style></svg> == <svg><style></style></svg>)
# * the breaking tag ejects trailing elements (e.g. <svg><img><style></style></svg> == <svg></svg><img><style></style>)
#
# the ejected elements can trigger XSS
assert (
clean(data, tags=[namespace_tag, end_tag, "style"], strip_comments=False)
== expected
)


def get_ids_and_tests():
"""Retrieves regression tests from data/ directory
Expand Down

0 comments on commit 1334134

Please sign in to comment.