diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 30a6faf..222b475 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,7 +47,6 @@ jobs: - run: bundle exec rake jruby: - continue-on-error: true # nokogiri on jruby has different behavior strategy: fail-fast: false matrix: diff --git a/.rubocop.yml b/.rubocop.yml index fa00f06..b96841e 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -343,3 +343,6 @@ Minitest/SkipEnsure: Minitest/UnreachableAssertion: Enabled: true + +Minitest/NoAssertions: + Enabled: true diff --git a/CHANGELOG.md b/CHANGELOG.md index d9cd3b6..80b52e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +## next / unreleased + +* `SafeListSanitizer` allows `time` tag and `lang` attribute by default. + + *Mike Dalessio* + +* `Rails::Html::XPATHS_TO_REMOVE` has been removed. It's not necessary with the existing sanitizers, + and should have been a private constant all along anyway. + + *Mike Dalessio* + + ## 1.5.0 / 2023-01-20 * `SafeListSanitizer`, `PermitScrubber`, and `TargetScrubber` now all support pruning of unsafe tags. diff --git a/lib/rails/html/sanitizer.rb b/lib/rails/html/sanitizer.rb index dc50430..531ff0d 100644 --- a/lib/rails/html/sanitizer.rb +++ b/lib/rails/html/sanitizer.rb @@ -2,8 +2,6 @@ module Rails module Html - XPATHS_TO_REMOVE = %w{.//script .//form comment()} - class Sanitizer # :nodoc: def sanitize(html, options = {}) raise NotImplementedError, "subclasses must implement sanitize method." @@ -33,7 +31,6 @@ def sanitize(html, options = {}) loofah_fragment = Loofah.fragment(html) - remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE) loofah_fragment.scrub!(TextOnlyScrubber.new) properly_encode(loofah_fragment, encoding: "UTF-8") @@ -106,10 +103,65 @@ class << self attr_accessor :allowed_tags attr_accessor :allowed_attributes end - self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub - sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr - acronym a img blockquote del ins)) - self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) + self.allowed_tags = Set.new([ + "a", + "abbr", + "acronym", + "address", + "b", + "big", + "blockquote", + "br", + "cite", + "code", + "dd", + "del", + "dfn", + "div", + "dl", + "dt", + "em", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "hr", + "i", + "img", + "ins", + "kbd", + "li", + "ol", + "p", + "pre", + "samp", + "small", + "span", + "strong", + "sub", + "sup", + "time", + "tt", + "ul", + "var", + ]) + self.allowed_attributes = Set.new([ + "abbr", + "alt", + "cite", + "class", + "datetime", + "height", + "href", + "lang", + "name", + "src", + "title", + "width", + "xml:lang", + ]) def initialize(prune: false) @permit_scrubber = PermitScrubber.new(prune: prune) @@ -129,7 +181,6 @@ def sanitize(html, options = {}) @permit_scrubber.attributes = allowed_attributes(options) loofah_fragment.scrub!(@permit_scrubber) else - remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE) loofah_fragment.scrub!(:strip) end diff --git a/test/sanitizer_test.rb b/test/sanitizer_test.rb index 41b274e..dae5001 100644 --- a/test/sanitizer_test.rb +++ b/test/sanitizer_test.rb @@ -6,9 +6,22 @@ puts Nokogiri::VERSION_INFO +# +# NOTE that many of these tests contain multiple acceptable results. +# +# In some cases, this is because of how the HTML4 parser's recovery behavior changed in libxml2 +# 2.9.14 and 2.10.0. For more details, see: +# +# - https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5 +# - https://gitlab.gnome.org/GNOME/libxml2/-/issues/380 +# +# In other cases, multiple acceptable results are provided because Nokogiri's vendored libxml2 is +# patched to entity-escape server-side includes (aks "SSI", aka ``). +# +# In many other cases, it's because the parser used by Nokogiri on JRuby (xerces+nekohtml) parses +# slightly differently than libxml2 in edge cases. +# class SanitizersTest < Minitest::Test - include Rails::Dom::Testing::Assertions::DomAssertions - def test_sanitizer_sanitize_raises_not_implemented_error assert_raises NotImplementedError do Rails::Html::Sanitizer.new.sanitize("") @@ -20,7 +33,16 @@ def test_sanitize_nested_script end def test_sanitize_nested_script_in_style - assert_equal '<script>alert("XSS");</script>', safe_list_sanitize('alert("XSS");/', tags: %w(em)) + input = 'alert("XSS");/' + result = safe_list_sanitize(input, tags: %w(em)) + acceptable_results = [ + # libxml2 + %{<script>alert("XSS");</script>}, + # xerces+neko. unavoidable double-escaping, see loofah/docs/2022-10-decision-on-cdata-nodes.md + %{&lt;script&gt;alert(\"XSS\");&lt;&lt;/style&gt;/script&gt;}, + ] + + assert_includes(acceptable_results, result) end class XpathRemovalTestSanitizer < Rails::Html::Sanitizer @@ -56,8 +78,15 @@ def test_remove_xpaths_called_with_enumerable_xpaths def test_strip_tags_with_quote input = '<" hi' - expected = libxml_2_9_14_recovery_lt? ? %{<" hi} : %{ hi} - assert_equal(expected, full_sanitize(input)) + result = full_sanitize(input) + acceptable_results = [ + # libxml2 >= 2.9.14 and xerces+neko + %{<" hi}, + # other libxml2 + %{ hi}, + ] + + assert_includes(acceptable_results, result) end def test_strip_invalid_html @@ -72,27 +101,54 @@ def test_strip_nested_tags def test_strip_tags_multiline expected = %{This is a test.\n\n\n\nIt no longer contains any HTML.\n} - input = %{This is <b>a <a href="" target="_blank">test</a></b>.\n\n\n\n

It no longer contains any HTML.

\n} + input = %{

This is a test.

\n\n\n\n

It no longer contains any HTML.

\n} assert_equal expected, full_sanitize(input) end def test_remove_unclosed_tags input = "This is <-- not\n a comment here." - expected = libxml_2_9_14_recovery_lt? ? %{This is <-- not\n a comment here.} : %{This is } - assert_equal(expected, full_sanitize(input)) + result = full_sanitize(input) + acceptable_results = [ + # libxml2 >= 2.9.14 and xerces+neko + %{This is <-- not\n a comment here.}, + # other libxml2 + %{This is }, + ] + + assert_includes(acceptable_results, result) end def test_strip_cdata input = "This has a ]]> here." - expected = libxml_2_9_14_recovery_lt_bang? ? %{This has a <![CDATA[]]> here.} : %{This has a ]]> here.} - assert_equal(expected, full_sanitize(input)) + result = full_sanitize(input) + acceptable_results = [ + # libxml2 = 2.9.14 + %{This has a <![CDATA[]]> here.}, + # other libxml2 + %{This has a ]]> here.}, + # xerces+neko + %{This has a here.}, + ] + + assert_includes(acceptable_results, result) end def test_strip_unclosed_cdata input = "This has an unclosed ]] here..." - expected = libxml_2_9_14_recovery_lt_bang? ? %{This has an unclosed <![CDATA[]] here...} : %{This has an unclosed ]] here...} - assert_equal(expected, full_sanitize(input)) + + result = safe_list_sanitize(input) + + acceptable_results = [ + # libxml2 = 2.9.14 + %{This has an unclosed <![CDATA[]] here...}, + # other libxml2 + %{This has an unclosed ]] here...}, + # xerces+neko + %{This has an unclosed } + ] + + assert_includes(acceptable_results, result) end def test_strip_blank_string @@ -168,7 +224,20 @@ def test_sanitize_form end def test_sanitize_plaintext - assert_sanitized "<span>foo</span></plaintext>", "<span>foo</span>" + # note that the `plaintext` tag has been deprecated since HTML 2 + # https://developer.mozilla.org/en-US/docs/Web/HTML/Element/plaintext + input = "<plaintext><span>foo</span></plaintext>" + result = safe_list_sanitize(input) + acceptable_results = [ + # libxml2 + "<span>foo</span>", + # xerces+nekohtml-unit + "&lt;span&gt;foo&lt;/span&gt;&lt;/plaintext&gt;", + # xerces+cyberneko + "&lt;span&gt;foo&lt;/span&gt;" + ] + + assert_includes(acceptable_results, result) end def test_sanitize_script @@ -187,16 +256,7 @@ def test_sanitize_javascript_href def test_sanitize_image_src raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>} - assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>} - end - - tags = Loofah::HTML5::SafeList::ALLOWED_ELEMENTS - %w(script form) - tags.each do |tag_name| - define_method "test_should_allow_#{tag_name}_tag" do - scope_allowed_tags(tags) do - assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end) - end - end + assert_sanitized raw, %{src="javascript:bang" <img width="5">foo, <span>bar</span>} end def test_should_allow_anchors @@ -206,8 +266,20 @@ def test_should_allow_anchors def test_video_poster_sanitization scope_allowed_tags(%w(video)) do scope_allowed_attributes %w(src poster) do - assert_sanitized %(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>), %(<video src="videofile.ogg" poster="posterimage.jpg"></video>) - assert_sanitized %(<video src="videofile.ogg" poster=javascript:alert(1)></video>), %(<video src="videofile.ogg"></video>) + expected = if RUBY_PLATFORM == "java" + # xerces+nekohtml alphabetizes the attributes! FML. + %(<video poster="posterimage.jpg" src="videofile.ogg"></video>) + else + %(<video src="videofile.ogg" poster="posterimage.jpg"></video>) + end + assert_sanitized( + %(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>), + expected, + ) + assert_sanitized( + %(<video src="videofile.ogg" poster=javascript:alert(1)></video>), + %(<video src="videofile.ogg"></video>), + ) end end end @@ -219,16 +291,33 @@ def test_allow_colons_in_path_component %w(src width height alt).each do |img_attr| define_method "test_should_allow_image_#{img_attr}_attribute" do - assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />) + assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo">) end end + def test_lang_and_xml_lang + # https://html.spec.whatwg.org/multipage/dom.html#the-lang-and-xml:lang-attributes + # + # 3.2.6.2 The lang and xml:lang attributes + # + # ... Authors must not use the lang attribute in the XML namespace on HTML elements in HTML + # documents. To ease migration to and from XML, authors may specify an attribute in no namespace + # with no prefix and with the literal localname "xml:lang" on HTML elements in HTML documents, + # but such attributes must only be specified if a lang attribute in no namespace is also + # specified, and both attributes must have the same value when compared in an ASCII + # case-insensitive manner. + input = expected = "<div lang=\"en\" xml:lang=\"en\">foo</div>" + assert_sanitized(input, expected) + end + def test_should_handle_non_html assert_sanitized "abc" end def test_should_handle_blank_text - [nil, "", " "].each { |blank| assert_sanitized blank } + assert_nil(safe_list_sanitize(nil)) + assert_equal("", safe_list_sanitize("")) + assert_equal(" ", safe_list_sanitize(" ")) end def test_setting_allowed_tags_affects_sanitization @@ -287,6 +376,7 @@ def test_should_allow_custom_tags_with_custom_attributes def test_scrub_style_if_style_attribute_option_is_passed input = '<p style="color: #000; background-image: url(http://www.ragingplatypus.com/i/cam-full.jpg);"></p>' actual = safe_list_sanitize(input, attributes: %w(style)) + assert_includes(['<p style="color: #000;"></p>', '<p style="color:#000;"></p>'], actual) end @@ -313,28 +403,30 @@ def scrubber.scrub(node); node.name = "h1"; end def test_should_accept_loofah_inheriting_scrubber scrubber = Loofah::Scrubber.new - def scrubber.scrub(node); node.name = "h1"; end + def scrubber.scrub(node); node.replace("<h1>#{node.inner_html}</h1>"); end html = "<script>hello!</script>" assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber) end def test_should_accept_loofah_scrubber_that_wraps_a_block - scrubber = Loofah::Scrubber.new { |node| node.name = "h1" } + scrubber = Loofah::Scrubber.new { |node| node.replace("<h1>#{node.inner_html}</h1>") } html = "<script>hello!</script>" assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber) end def test_custom_scrubber_takes_precedence_over_other_options - scrubber = Loofah::Scrubber.new { |node| node.name = "h1" } + scrubber = Loofah::Scrubber.new { |node| node.replace("<h1>#{node.inner_html}</h1>") } html = "<script>hello!</script>" assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber, tags: ["foo"]) end - [%w(img src), %w(a href)].each do |(tag, attr)| - define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do - assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>) - end + def test_should_strip_src_attribute_in_img_with_bad_protocols + assert_sanitized %(<img src="javascript:bang" title="1">), %(<img title="1">) + end + + def test_should_strip_href_attribute_in_a_with_bad_protocols + assert_sanitized %(<a href="javascript:bang" title="1">boo</a>), %(<a title="1">boo</a>) end def test_should_block_script_tag @@ -366,7 +458,16 @@ def test_should_not_fall_for_xss_image_hack_with_uppercase_tags end def test_should_sanitize_tag_broken_up_by_null - assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), "" + input = %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>) + result = safe_list_sanitize(input) + acceptable_results = [ + # libxml2 + "", + # xerces+neko + 'alert("XSS")', + ] + + assert_includes(acceptable_results, result) end def test_should_sanitize_invalid_script_tag @@ -375,7 +476,19 @@ def test_should_sanitize_invalid_script_tag def test_should_sanitize_script_tag_with_multiple_open_brackets assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;alert(\"XSS\");//&lt;" - assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), "" + end + + def test_should_sanitize_script_tag_with_multiple_open_brackets_2 + input = %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a) + result = safe_list_sanitize(input) + acceptable_results = [ + # libxml2 + "", + # xerces+neko + "&lt;a", + ] + + assert_includes(acceptable_results, result) end def test_should_sanitize_unclosed_script @@ -392,7 +505,10 @@ def test_should_not_fall_for_ridiculous_hack end def test_should_sanitize_attributes - assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="#{CGI.escapeHTML "'><script>alert()</script>"}">blah</span>) + assert_sanitized( + %(<SPAN title="'><script>alert()</script>">blah</SPAN>), + %(<span title="'&gt;&lt;script&gt;alert()&lt;/script&gt;">blah</span>), + ) end def test_should_sanitize_illegal_style_properties @@ -421,11 +537,11 @@ def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags end def test_should_sanitize_invalid_tag_names_in_single_tags - assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />") + assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img>") end def test_should_sanitize_img_dynsrc_lowsrc - assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />") + assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img>") end def test_should_sanitize_div_background_image_unicode_encoded @@ -446,6 +562,7 @@ def test_should_allow_div_background_image_unicode_encoded_safe_functions convert_to_css_hex("rgb(255,0,0)", true), ].each do |propval| raw = "background-image:" + propval + assert_includes(sanitize_css(raw), "background-image") end end @@ -461,19 +578,38 @@ def test_should_sanitize_across_newlines end def test_should_sanitize_img_vbscript - assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), "<img />" + assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), "<img>" end def test_should_sanitize_cdata_section input = "<![CDATA[<span>section</span>]]>" - expected = libxml_2_9_14_recovery_lt_bang? ? %{&lt;![CDATA[<span>section</span>]]&gt;} : %{section]]&gt;} - assert_sanitized(input, expected) + result = safe_list_sanitize(input) + acceptable_results = [ + # libxml2 = 2.9.14 + %{&lt;![CDATA[<span>section</span>]]&gt;}, + # other libxml2 + %{section]]&gt;}, + # xerces+neko + "", + ] + + assert_includes(acceptable_results, result) end def test_should_sanitize_unterminated_cdata_section input = "<![CDATA[<span>neverending..." - expected = libxml_2_9_14_recovery_lt_bang? ? %{&lt;![CDATA[<span>neverending...</span>} : %{neverending...} - assert_sanitized(input, expected) + result = safe_list_sanitize(input) + + acceptable_results = [ + # libxml2 = 2.9.14 + %{&lt;![CDATA[<span>neverending...</span>}, + # other libxml2 + %{neverending...}, + # xerces+neko + "" + ] + + assert_includes(acceptable_results, result) end def test_should_not_mangle_urls_with_ampersand @@ -481,7 +617,8 @@ def test_should_not_mangle_urls_with_ampersand end def test_should_sanitize_neverending_attribute - assert_sanitized "<span class=\"\\", "<span class=\"\\\">" + # note that assert_dom_equal chokes in this case! so avoid using assert_sanitized + assert_equal("<span class=\"\\\"></span>", safe_list_sanitize("<span class=\"\\\">")) end [ @@ -491,13 +628,13 @@ def test_should_sanitize_neverending_attribute %(<a href="javascript&#x003A;alert('XSS');">) ].each_with_index do |enc_hack, i| define_method "test_x03a_handling_#{i + 1}" do - assert_sanitized enc_hack, "<a>" + assert_sanitized enc_hack, "<a></a>" end end def test_x03a_legitimate - assert_sanitized %(<a href="http&#x3a;//legit">), %(<a href="http://legit">) - assert_sanitized %(<a href="http&#x3A;//legit">), %(<a href="http://legit">) + assert_sanitized %(<a href="http&#x3a;//legit">asdf</a>), %(<a href="http://legit">asdf</a>) + assert_sanitized %(<a href="http&#x3A;//legit">asdf</a>), %(<a href="http://legit">asdf</a>) end def test_sanitize_ascii_8bit_string @@ -517,6 +654,31 @@ def test_allow_data_attribute_if_requested assert_equal %(<a data-foo="foo">foo</a>), safe_list_sanitize(text, attributes: ["data-foo"]) end + # https://developer.mozilla.org/en-US/docs/Glossary/Void_element + VOID_ELEMENTS = %w[area base br col embed hr img input keygen link meta param source track wbr] + + %w(strong em b i p code pre tt samp kbd var sub + sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr + acronym a img blockquote del ins time).each do |tag_name| + define_method "test_default_safelist_should_allow_#{tag_name}" do + if VOID_ELEMENTS.include?(tag_name) + assert_sanitized("<#{tag_name}>") + else + assert_sanitized("<#{tag_name}>foo</#{tag_name}>") + end + end + end + + def test_datetime_attribute + assert_sanitized("<time datetime=\"2023-01-01\">Today</time>") + end + + def test_abbr_attribute + scope_allowed_tags(%w(table tr th td)) do + assert_sanitized(%(<table><tr><td abbr="UK">United Kingdom</td></tr></table>)) + end + end + def test_uri_escaping_of_href_attr_in_a_tag_in_safe_list_sanitizer skip if RUBY_VERSION < "2.3" @@ -525,11 +687,14 @@ def test_uri_escaping_of_href_attr_in_a_tag_in_safe_list_sanitizer text = safe_list_sanitize(html) acceptable_results = [ - # nokogiri w/vendored+patched libxml2 + # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch) %{<a href="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>}, - # nokogiri w/ system libxml2 + # system libxml2 %{<a href="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>}, + # xerces+neko + %{<a href="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>} ] + assert_includes(acceptable_results, text) end @@ -541,11 +706,14 @@ def test_uri_escaping_of_src_attr_in_a_tag_in_safe_list_sanitizer text = safe_list_sanitize(html) acceptable_results = [ - # nokogiri w/vendored+patched libxml2 + # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch) %{<a src="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>}, - # nokogiri w/system libxml2 + # system libxml2 %{<a src="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>}, + # xerces+neko + %{<a src="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>} ] + assert_includes(acceptable_results, text) end @@ -557,11 +725,14 @@ def test_uri_escaping_of_name_attr_in_a_tag_in_safe_list_sanitizer text = safe_list_sanitize(html) acceptable_results = [ - # nokogiri w/vendored+patched libxml2 + # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch) %{<a name="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>}, - # nokogiri w/system libxml2 + # system libxml2 %{<a name="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>}, + # xerces+neko + %{<a name="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>} ] + assert_includes(acceptable_results, text) end @@ -573,16 +744,28 @@ def test_uri_escaping_of_name_action_in_a_tag_in_safe_list_sanitizer text = safe_list_sanitize(html, attributes: ["action"]) acceptable_results = [ - # nokogiri w/vendored+patched libxml2 + # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch) %{<a action="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>}, - # nokogiri w/system libxml2 + # system libxml2 %{<a action="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>}, + # xerces+neko + %{<a action="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>}, ] + assert_includes(acceptable_results, text) end def test_exclude_node_type_processing_instructions - assert_equal("<div>text</div><b>text</b>", safe_list_sanitize("<div>text</div><?div content><b>text</b>")) + input = "<div>text</div><?div content><b>text</b>" + result = safe_list_sanitize(input) + acceptable_results = [ + # jruby cyberneko (nokogiri < 1.14.0) + "<div>text</div>", + # everything else + "<div>text</div><b>text</b>", + ] + + assert_includes(acceptable_results, result) end def test_exclude_node_type_comment @@ -687,7 +870,9 @@ def test_combination_of_math_and_style_with_img_payload actual = safe_list_sanitize(input, tags: tags) assert_equal(expected, actual) + end + def test_combination_of_math_and_style_with_img_payload_2 input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style", "img"] expected = "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>" actual = safe_list_sanitize(input, tags: tags) @@ -701,7 +886,9 @@ def test_combination_of_svg_and_style_with_img_payload actual = safe_list_sanitize(input, tags: tags) assert_equal(expected, actual) + end + def test_combination_of_svg_and_style_with_img_payload_2 input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style", "img"] expected = "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>" actual = safe_list_sanitize(input, tags: tags) @@ -727,11 +914,7 @@ def safe_list_sanitize(input, options = {}) end def assert_sanitized(input, expected = nil) - if input - assert_dom_equal expected || input, safe_list_sanitize(input) - else - assert_nil safe_list_sanitize(input) - end + assert_equal((expected || input), safe_list_sanitize(input)) end def sanitize_css(input) @@ -764,15 +947,4 @@ def convert_to_css_hex(string, escape_parens = false) end end.join end - - def libxml_2_9_14_recovery_lt? - # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5 - Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?(">= 2.9.14") - end - - def libxml_2_9_14_recovery_lt_bang? - # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5 - # then reverted in 2.10.0, see https://gitlab.gnome.org/GNOME/libxml2/-/issues/380 - Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?("= 2.9.14") - end end diff --git a/test/scrubbers_test.rb b/test/scrubbers_test.rb index 279d0dd..2fc156c 100644 --- a/test/scrubbers_test.rb +++ b/test/scrubbers_test.rb @@ -5,8 +5,12 @@ class ScrubberTest < Minitest::Test protected + def scrub_fragment(html) + Loofah.scrub_fragment(html, @scrubber).to_s + end + def assert_scrubbed(html, expected = html) - output = Loofah.scrub_fragment(html, @scrubber).to_s + output = scrub_fragment(html) assert_equal expected, output end @@ -47,8 +51,17 @@ def test_default_scrub_removes_comments end def test_default_scrub_removes_processing_instructions - assert_scrubbed("<div>one</div><?div two><span>three</span>", - "<div>one</div><span>three</span>") + input = "<div>one</div><?div two><span>three</span>" + result = scrub_fragment(input) + + acceptable_results = [ + # jruby cyberneko (nokogiri < 1.14.0) + "<div>one</div>", + # everything else + "<div>one</div><span>three</span>", + ] + + assert_includes(acceptable_results, result) end def test_default_attributes_removal_behavior