Skip to content

Commit

Permalink
test: cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
flavorjones committed Apr 6, 2021
1 parent 2dc7dd7 commit 8f6c714
Show file tree
Hide file tree
Showing 22 changed files with 525 additions and 540 deletions.
42 changes: 21 additions & 21 deletions test/html/sax/test_push_parser.rb
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
# frozen_string_literal: true

require "helper"

Expand All @@ -12,38 +13,37 @@ def setup
end

def test_end_document_called
@parser.<<(<<-eoxml)
@parser.<<(<<~eoxml)
<p id="asdfasdf">
<!-- This is a comment -->
Paragraph 1
</p>
eoxml
assert ! @parser.document.end_document_called
assert(!@parser.document.end_document_called)
@parser.finish
assert @parser.document.end_document_called
assert(@parser.document.end_document_called)
end

def test_start_element
@parser.<<(<<-eoxml)
@parser.<<(<<~eoxml)
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html><head><body><p id="asdfasdf">
eoxml

assert_equal [["html", []], ["head", []], ["body", []], ["p", [["id", "asdfasdf"]]]],
@parser.document.start_elements
@parser.<<(<<-eoxml)
<!-- This is a comment -->
Paragraph 1
assert_equal([["html", []], ["head", []], ["body", []], ["p", [["id", "asdfasdf"]]]],
@parser.document.start_elements)

@parser.<<(<<~eoxml)
<!-- This is a comment -->
Paragraph 1
</p></body></html>
eoxml
assert_equal [' This is a comment '], @parser.document.comments
assert_equal([' This is a comment '], @parser.document.comments)
@parser.finish
end


def test_chevron_partial_html
@parser.<<(<<-eoxml)
@parser.<<(<<~eoxml)
<p id="asdfasdf">
eoxml

Expand All @@ -52,34 +52,34 @@ def test_chevron_partial_html
Paragraph 1
</p>
eoxml
assert_equal [' This is a comment '], @parser.document.comments
assert_equal([' This is a comment '], @parser.document.comments)
@parser.finish
end

def test_chevron
@parser.<<(<<-eoxml)
@parser.<<(<<~eoxml)
<p id="asdfasdf">
<!-- This is a comment -->
Paragraph 1
</p>
eoxml
@parser.finish
assert_equal [' This is a comment '], @parser.document.comments
assert_equal([' This is a comment '], @parser.document.comments)
end

def test_default_options
assert_equal 0, @parser.options
assert_equal(0, @parser.options)
end

def test_broken_encoding
skip("ultra hard to fix for pure Java version") if Nokogiri.jruby?
skip_unless_libxml2("ultra hard to fix for pure Java version")
@parser.options |= XML::ParseOptions::RECOVER
# This is ISO_8859-1:
@parser.<< "<?xml version='1.0' encoding='UTF-8'?><r>Gau\337</r>"
@parser.finish
assert(@parser.document.errors.size >= 1)
assert_equal "Gau\337", @parser.document.data.join
assert_equal [["r"], ["body"], ["html"]], @parser.document.end_elements
assert_equal("Gau\337", @parser.document.data.join)
assert_equal([["r"], ["body"], ["html"]], @parser.document.end_elements)
end
end
end
Expand Down
124 changes: 62 additions & 62 deletions test/html/test_attributes.rb
Expand Up @@ -3,80 +3,80 @@
module Nokogiri
module HTML
class TestAttr < Nokogiri::TestCase
unless Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?
#
# libxml2 >= 2.9.2 fails to escape comments within some attributes. It
# wants to ensure these comments can be treated as "server-side includes",
# but as a result fails to ensure that serialization is well-formed,
# resulting in an opportunity for XSS injection of code into a final
# re-parsed document (presumably in a browser).
#
# the offending commit is:
#
# https://github.com/GNOME/libxml2/commit/960f0e2
#
# we'll test this by parsing the HTML, serializing it, then
# re-parsing it to ensure there isn't any ambiguity in the output
# that might allow code injection into a browser consuming
# "sanitized" output.
#
# complaints have been made upstream about this behavior, notably at
#
# https://bugzilla.gnome.org/show_bug.cgi?id=769760
#
# and multiple CVEs have been declared and fixed in downstream
# libraries as a result, a list is being kept up to date here:
#
# https://github.com/flavorjones/loofah/issues/144
#
[
#
# libxml2 >= 2.9.2 fails to escape comments within some attributes. It
# wants to ensure these comments can be treated as "server-side includes",
# but as a result fails to ensure that serialization is well-formed,
# resulting in an opportunity for XSS injection of code into a final
# re-parsed document (presumably in a browser).
# these tags and attributes are determined by the code at:
#
# the offending commit is:
# https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
#
# https://github.com/GNOME/libxml2/commit/960f0e2
{tag: "a", attr: "href"},
{tag: "div", attr: "href"},
{tag: "a", attr: "action"},
{tag: "div", attr: "action"},
{tag: "a", attr: "src"},
{tag: "div", attr: "src"},
{tag: "a", attr: "name"},
#
# we'll test this by parsing the HTML, serializing it, then
# re-parsing it to ensure there isn't any ambiguity in the output
# that might allow code injection into a browser consuming
# "sanitized" output.
# note that div+name is _not_ affected by the libxml2 issue.
# but we test it anyway to ensure our logic isn't modifying
# attributes that don't need modifying.
#
# complaints have been made upstream about this behavior, notably at
#
# https://bugzilla.gnome.org/show_bug.cgi?id=769760
#
# and multiple CVEs have been declared and fixed in downstream
# libraries as a result, a list is being kept up to date here:
#
# https://github.com/flavorjones/loofah/issues/144
#
[
#
# these tags and attributes are determined by the code at:
#
# https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
#
{tag: "a", attr: "href"},
{tag: "div", attr: "href"},
{tag: "a", attr: "action"},
{tag: "div", attr: "action"},
{tag: "a", attr: "src"},
{tag: "div", attr: "src"},
{tag: "a", attr: "name"},
#
# note that div+name is _not_ affected by the libxml2 issue.
# but we test it anyway to ensure our logic isn't modifying
# attributes that don't need modifying.
#
{tag: "div", attr: "name", unescaped: true},
].each do |config|
{tag: "div", attr: "name", unescaped: true},
].each do |config|

define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=unsafevalue()>-->le.com'>test</#{config[:tag]}>}
define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
skip if Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?

reparsed = HTML.fragment(HTML.fragment(html).to_html)
attributes = reparsed.at_css(config[:tag]).attribute_nodes
html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=unsafevalue()>-->le.com'>test</#{config[:tag]}>}

assert_equal [config[:attr]], attributes.collect(&:name)
if Nokogiri::VersionInfo.instance.libxml2?
if config[:unescaped]
#
# this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
# assert that this attribute's serialization is unaffected.
#
assert_equal %{examp<!--" unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
else
#
# let's match the behavior in libxml < 2.9.2.
# test that this attribute's serialization is well-formed and sanitized.
#
assert_equal %{examp<!--%22%20unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
end
reparsed = HTML.fragment(HTML.fragment(html).to_html)
attributes = reparsed.at_css(config[:tag]).attribute_nodes

assert_equal [config[:attr]], attributes.collect(&:name)
if Nokogiri::VersionInfo.instance.libxml2?
if config[:unescaped]
#
# this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
# assert that this attribute's serialization is unaffected.
#
assert_equal %{examp<!--" unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
else
#
# yay for consistency in javaland. move along, nothing to see here.
# let's match the behavior in libxml < 2.9.2.
# test that this attribute's serialization is well-formed and sanitized.
#
assert_equal %{examp<!--%22 unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
assert_equal %{examp<!--%22%20unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
end
else
#
# yay for consistency in javaland. move along, nothing to see here.
#
assert_equal %{examp<!--%22 unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
end
end
end
Expand Down
42 changes: 23 additions & 19 deletions test/html/test_attributes_properly_escaped.rb
Expand Up @@ -3,31 +3,35 @@
module Nokogiri
module HTML
class TestAttributesProperlyEscaped < Nokogiri::TestCase
unless Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?

def test_attribute_macros_are_escaped
html = "<p><i for=\"&{<test>}\"></i></p>"
document = Nokogiri::HTML::Document.new
nodes = document.parse(html)
def test_attribute_macros_are_escaped
skip if Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?

assert_equal("<p><i for=\"&amp;{&lt;test&gt;}\"></i></p>", nodes[0].to_s)
end
html = "<p><i for=\"&{<test>}\"></i></p>"
document = Nokogiri::HTML::Document.new
nodes = document.parse(html)

def test_libxml_escapes_server_side_includes
original_html = %(<p><a href='<!--"><test>-->'></a></p>)
document = Nokogiri::HTML::Document.new
html = document.parse(original_html).to_s
assert_equal("<p><i for=\"&amp;{&lt;test&gt;}\"></i></p>", nodes[0].to_s)
end

def test_libxml_escapes_server_side_includes
skip if Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?

original_html = %(<p><a href='<!--"><test>-->'></a></p>)
document = Nokogiri::HTML::Document.new
html = document.parse(original_html).to_s

assert_match(/!--%22&gt;&lt;test&gt;/, html)
end

assert_match(/!--%22&gt;&lt;test&gt;/, html)
end
def test_libxml_escapes_server_side_includes_without_nested_quotes
skip if Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?

def test_libxml_escapes_server_side_includes_without_nested_quotes
original_html = %(<p><i for="<!--<test>-->"></i></p>)
document = Nokogiri::HTML::Document.new
html = document.parse(original_html).to_s
original_html = %(<p><i for="<!--<test>-->"></i></p>)
document = Nokogiri::HTML::Document.new
html = document.parse(original_html).to_s

assert_match(/&lt;!--&lt;test&gt;/, html)
end
assert_match(/&lt;!--&lt;test&gt;/, html)
end
end
end
Expand Down
17 changes: 8 additions & 9 deletions test/namespaces/test_namespaces_in_parsed_doc.rb
Expand Up @@ -63,16 +63,15 @@ def test_parsed_namespace_count
end

def test_namespaces_under_memory_pressure_issue1155
skip("JRuby doesn't do GC.") if Nokogiri.jruby?
refute_valgrind_errors do
# see https://github.com/sparklemotion/nokogiri/issues/1155 for background
filename = File.join ASSETS_DIR, 'namespace_pressure_test.xml'
doc = Nokogiri::XML File.open(filename)

# this test is here to emit warnings when run under valgrind
# see https://github.com/sparklemotion/nokogiri/issues/1155 for background
filename = File.join ASSETS_DIR, 'namespace_pressure_test.xml'
doc = Nokogiri::XML File.open(filename)

# bizarrely, can't repro without the call to #to_a
doc.xpath('//namespace::*').to_a.each do |ns|
ns.inspect
# bizarrely, can't repro without the call to #to_a
doc.xpath('//namespace::*').to_a.each do |ns|
ns.inspect
end
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion test/test_nokogiri.rb
Expand Up @@ -2,7 +2,7 @@

class TestNokogiri < Nokogiri::TestCase
def test_libxml_iconv
skip "this constant is only set in the C extension when libxml2 is used" if !Nokogiri.uses_libxml?
skip_unless_libxml2("this constant is only set in the C extension when libxml2 is used")
assert Nokogiri.const_defined?(:LIBXML_ICONV_ENABLED)
end

Expand Down
8 changes: 4 additions & 4 deletions test/test_version.rb
Expand Up @@ -29,17 +29,17 @@ def test_version_info_basics
end

def test_version_info_for_xerces
skip("xerces is only used for JRuby") unless Nokogiri.jruby?
skip_unless_jruby("xerces is only used for JRuby")
assert_equal(Nokogiri::XERCES_VERSION, version_info["other_libraries"]["xerces"])
end

def test_version_info_for_nekohtml
skip("nekohtml is only used for JRuby") unless Nokogiri.jruby?
skip_unless_jruby("nekohtml is only used for JRuby")
assert_equal(Nokogiri::NEKO_VERSION, version_info["other_libraries"]["nekohtml"])
end

def test_version_info_for_libxml
skip("libxml2 is only used for CRuby") unless Nokogiri.uses_libxml?
skip_unless_libxml2("libxml2 is only used for CRuby")

if Nokogiri::VersionInfo.instance.libxml2_using_packaged?
assert_equal("packaged", version_info["libxml"]["source"])
Expand All @@ -66,7 +66,7 @@ def test_version_info_for_libxml
end

def test_version_info_for_libxslt
skip("libxslt is only used for CRuby") unless Nokogiri.uses_libxml?
skip_unless_libxml2("libxslt is only used for CRuby")

if Nokogiri::VersionInfo.instance.libxml2_using_packaged?
assert_equal("packaged", version_info["libxslt"]["source"])
Expand Down

0 comments on commit 8f6c714

Please sign in to comment.