Skip to content

Commit

Permalink
fix bug 1621692
Browse files Browse the repository at this point in the history
  • Loading branch information
Greg Guthe authored and g-k committed Mar 17, 2020
1 parent 0d88dd8 commit e4e9e21
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 3 deletions.
13 changes: 12 additions & 1 deletion bleach/html5lib_shim.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,18 @@ def next_possible_entity(text):


class BleachHTMLSerializer(HTMLSerializer):
"""HTMLSerializer that undoes & -> & in attributes"""
"""HTMLSerializer that undoes & -> & in attributes and sets
escape_rcdata to True
"""

# per the HTMLSerializer.__init__ docstring:
#
# Whether to escape characters that need to be
# escaped within normal elements within rcdata elements such as
# style.
#
escape_rcdata = True

def escape_base_amp(self, stoken):
"""Escapes just bare & in HTML attribute values"""
# First, undo escaping of &. We need to do this because html5lib's
Expand Down
27 changes: 25 additions & 2 deletions tests/test_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from bleach import clean
from bleach.html5lib_shim import Filter
from bleach.sanitizer import Cleaner

from bleach._vendor.html5lib.constants import rcdataElements

def test_clean_idempotent():
"""Make sure that applying the filter twice doesn't change anything."""
Expand Down Expand Up @@ -787,7 +787,7 @@ def test_nonexistent_namespace():
(
raw_tag,
"<noscript><%s></noscript><img src=x onerror=alert(1) />" % raw_tag,
"<noscript><%s></noscript>&lt;img src=x onerror=alert(1) /&gt;" % raw_tag,
"<noscript>&lt;%s&gt;</noscript>&lt;img src=x onerror=alert(1) /&gt;" % raw_tag,
)
for raw_tag in _raw_tags
],
Expand All @@ -797,6 +797,29 @@ def test_noscript_rawtag_(raw_tag, data, expected):
assert clean(data, tags=["noscript", raw_tag]) == expected


@pytest.mark.parametrize(
"namespace_tag, rc_data_element_tag, data, expected",
[
(
namespace_tag,
rc_data_element_tag,
"<%s><%s><img src=x onerror=alert(1)>" % (namespace_tag, rc_data_element_tag),
"<%s><%s>&lt;img src=x onerror=alert(1)&gt;</%s></%s>" % (namespace_tag, rc_data_element_tag, rc_data_element_tag, namespace_tag),
)
for namespace_tag in ["math", "svg"]
# https://dev.w3.org/html5/html-author/#rcdata-elements
# https://html.spec.whatwg.org/index.html#parsing-html-fragments
# in html5lib: 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', and 'noscript'
for rc_data_element_tag in rcdataElements
],
)
def test_namespace_rc_data_element_strip_false(namespace_tag, rc_data_element_tag, data, expected):
# refs: bug 1621692 / GHSA-m6xf-fq7q-8743
#
# browsers will pull the img out of the namespace and rc data tag resulting in XSS
assert clean(data, tags=[namespace_tag, rc_data_element_tag], strip=False) == expected


def get_ids_and_tests():
"""Retrieves regression tests from data/ directory
Expand Down

0 comments on commit e4e9e21

Please sign in to comment.