From bf38d811d7bf3e0743e03f709c553f756acdd8c3 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 5 Jul 2023 01:23:27 -0400 Subject: [PATCH] prefactor: clean up XML::Schema tests (cherry picked from commit 11bb4d7057f96fec6bbe8bb42eec36a2ed1ffd71) --- test/xml/test_schema.rb | 351 ++++++++++++++++++++-------------------- 1 file changed, 175 insertions(+), 176 deletions(-) diff --git a/test/xml/test_schema.rb b/test/xml/test_schema.rb index 3b31d134d7..7500622d28 100644 --- a/test/xml/test_schema.rb +++ b/test/xml/test_schema.rb @@ -2,58 +2,29 @@ require "helper" -module Nokogiri - module XML - class TestSchema < Nokogiri::TestCase - def setup - super - assert(@xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))) - end - - def test_issue_1985_schema_parse_modifying_underlying_document - skip_unless_libxml2("Pure Java version doesn't have this bug") - - # This is a test for a workaround for a bug in LibXML2: - # - # https://gitlab.gnome.org/GNOME/libxml2/issues/148 - # - # Schema creation can modify the original document -- removal of blank text nodes -- which - # results in dangling pointers. - # - # If no nodes have been exposed, then it should be fine to create a schema. If nodes have - # been exposed to Ruby, then we need to make sure they won't be freed out from under us. - doc = <<~EOF - - - - - EOF - - # This is OK, no nodes have been exposed - xsd_doc = Nokogiri::XML(doc) - assert(Nokogiri::XML::Schema.from_document(xsd_doc)) +class TestNokogiriXMLSchema < Nokogiri::TestCase + describe Nokogiri::XML::Schema do + let(:xsd) { Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE)) } - # This is not OK, nodes have been exposed to Ruby - xsd_doc = Nokogiri::XML(doc) - child = xsd_doc.root.children.find(&:blank?) # Find a blank node that would be freed without the fix - - Nokogiri::XML::Schema.from_document(xsd_doc) - assert(child.to_s) # This will raise a valgrind error if the node was freed + describe "construction" do + it ".new" do + assert(xsd = Nokogiri::XML::Schema.new(File.read(PO_SCHEMA_FILE))) + assert_instance_of(Nokogiri::XML::Schema, xsd) end - def test_schema_read_memory + it ".read_memory" do xsd = Nokogiri::XML::Schema.read_memory(File.read(PO_SCHEMA_FILE)) assert_instance_of(Nokogiri::XML::Schema, xsd) end - def test_schema_from_document + it ".from_document" do doc = Nokogiri::XML(File.open(PO_SCHEMA_FILE)) assert(doc) xsd = Nokogiri::XML::Schema.from_document(doc) assert_instance_of(Nokogiri::XML::Schema, xsd) end - def test_invalid_schema_do_not_raise_exceptions + it "invalid_schema_do_not_raise_exceptions" do xsd = Nokogiri::XML::Schema.new(<<~EOF) @@ -86,7 +57,7 @@ def test_invalid_schema_do_not_raise_exceptions end end - def test_schema_from_document_node + it ".from_document accepts a node, but warns about it" do doc = Nokogiri::XML(File.open(PO_SCHEMA_FILE)) assert(doc) xsd = nil @@ -97,7 +68,7 @@ def test_schema_from_document_node assert_instance_of(Nokogiri::XML::Schema, xsd) end - def test_schema_validates_with_relative_paths + it "schema_validates_with_relative_paths" do xsd = File.join(ASSETS_DIR, "foo", "foo.xsd") xml = File.join(ASSETS_DIR, "valid_bar.xml") doc = Nokogiri::XML(File.open(xsd)) @@ -107,17 +78,12 @@ def test_schema_validates_with_relative_paths assert(xsd.valid?(doc)) end - def test_parse_with_memory - assert_instance_of(Nokogiri::XML::Schema, @xsd) - assert_equal(0, @xsd.errors.length) - end - - def test_new - assert(xsd = Nokogiri::XML::Schema.new(File.read(PO_SCHEMA_FILE))) + it "parse_with_memory" do assert_instance_of(Nokogiri::XML::Schema, xsd) + assert_equal(0, xsd.errors.length) end - def test_schema_method_with_parse_options + it "schema_method_with_parse_options" do schema = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE)) assert_equal(Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options) @@ -125,7 +91,7 @@ def test_schema_method_with_parse_options assert_equal(Nokogiri::XML::ParseOptions.new.recover, schema.parse_options) end - def test_schema_new_with_parse_options + it "schema_new_with_parse_options" do schema = Nokogiri::XML::Schema.new(File.read(PO_SCHEMA_FILE)) assert_equal(Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options) @@ -133,7 +99,7 @@ def test_schema_new_with_parse_options assert_equal(Nokogiri::XML::ParseOptions.new.recover, schema.parse_options) end - def test_schema_from_document_with_parse_options + it "schema_from_document_with_parse_options" do schema = Nokogiri::XML::Schema.from_document(Nokogiri::XML::Document.parse(File.read(PO_SCHEMA_FILE))) assert_equal(Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options) @@ -144,7 +110,7 @@ def test_schema_from_document_with_parse_options assert_equal(Nokogiri::XML::ParseOptions.new.recover, schema.parse_options) end - def test_schema_read_memory_with_parse_options + it "schema_read_memory_with_parse_options" do schema = Nokogiri::XML::Schema.read_memory(File.read(PO_SCHEMA_FILE)) assert_equal(Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options) @@ -152,7 +118,7 @@ def test_schema_read_memory_with_parse_options assert_equal(Nokogiri::XML::ParseOptions.new.recover, schema.parse_options) end - def test_parse_with_io + it "parse_with_io" do xsd = nil File.open(PO_SCHEMA_FILE, "rb") do |f| assert(xsd = Nokogiri::XML::Schema(f)) @@ -160,33 +126,35 @@ def test_parse_with_io assert_equal(0, xsd.errors.length) end - def test_parse_with_errors - xml = File.read(PO_SCHEMA_FILE).sub(/name="/, "name=") + it "parse_with_errors" do + xml = File.read(PO_SCHEMA_FILE).sub('name="', "name=") assert_raises(Nokogiri::XML::SyntaxError) do Nokogiri::XML::Schema(xml) end end + end - def test_validate_document + describe "validation" do + it "validate_document" do doc = Nokogiri::XML(File.read(PO_XML_FILE)) - assert(errors = @xsd.validate(doc)) + assert(errors = xsd.validate(doc)) assert_equal(0, errors.length) end - def test_validate_file - assert(errors = @xsd.validate(PO_XML_FILE)) + it "validate_file" do + assert(errors = xsd.validate(PO_XML_FILE)) assert_equal(0, errors.length) end - def test_validate_invalid_document + it "validate_invalid_document" do doc = Nokogiri::XML(File.read(PO_XML_FILE)) doc.css("city").unlink - assert(errors = @xsd.validate(doc)) + assert(errors = xsd.validate(doc)) assert_equal(2, errors.length) end - def test_validate_invalid_file + it "validate_invalid_file" do tempfile = Tempfile.new("xml") doc = Nokogiri::XML(File.read(PO_XML_FILE)) @@ -194,156 +162,187 @@ def test_validate_invalid_file tempfile.write(doc.to_xml) tempfile.close - assert(errors = @xsd.validate(tempfile.path)) + assert(errors = xsd.validate(tempfile.path)) assert_equal(2, errors.length) end - def test_validate_non_document + it "validate_non_document" do string = File.read(PO_XML_FILE) - assert_raises(ArgumentError) { @xsd.validate(string) } + assert_raises(ArgumentError) { xsd.validate(string) } end - def test_validate_empty_document + it "validate_empty_document" do doc = Nokogiri::XML("") - assert(errors = @xsd.validate(doc)) + assert(errors = xsd.validate(doc)) pending_if("https://github.com/sparklemotion/nokogiri/issues/783", Nokogiri.jruby?) do assert_equal(1, errors.length) end end - def test_valid? + it "valid?" do valid_doc = Nokogiri::XML(File.read(PO_XML_FILE)) invalid_doc = Nokogiri::XML( File.read(PO_XML_FILE).gsub(%r{[^<]*}, ""), ) - assert(@xsd.valid?(valid_doc)) - refute(@xsd.valid?(invalid_doc)) + assert(xsd.valid?(valid_doc)) + refute(xsd.valid?(invalid_doc)) end + end - def test_xsd_with_dtd - Dir.chdir(File.join(ASSETS_DIR, "saml")) do - # works - Nokogiri::XML::Schema(File.read("xmldsig_schema.xsd")) - # was not working - Nokogiri::XML::Schema(File.read("saml20protocol_schema.xsd")) - end + it "xsd_with_dtd" do + Dir.chdir(File.join(ASSETS_DIR, "saml")) do + # works + Nokogiri::XML::Schema(File.read("xmldsig_schema.xsd")) + # was not working + Nokogiri::XML::Schema(File.read("saml20protocol_schema.xsd")) end + end + + it "xsd_import_with_no_systemid" do + # https://github.com/sparklemotion/nokogiri/pull/2296 + xsd = <<~EOF + + + + + EOF + Nokogiri::XML::Schema(xsd) # assert_nothing_raised + end + + it "issue_1985_schema_parse_modifying_underlying_document" do + skip_unless_libxml2("Pure Java version doesn't have this bug") + + # This is a test for a workaround for a bug in LibXML2: + # + # https://gitlab.gnome.org/GNOME/libxml2/issues/148 + # + # Schema creation can modify the original document -- removal of blank text nodes -- which + # results in dangling pointers. + # + # If no nodes have been exposed, then it should be fine to create a schema. If nodes have + # been exposed to Ruby, then we need to make sure they won't be freed out from under us. + doc = <<~EOF + + + + + EOF + + # This is OK, no nodes have been exposed + xsd_doc = Nokogiri::XML(doc) + assert(Nokogiri::XML::Schema.from_document(xsd_doc)) + + # This is not OK, nodes have been exposed to Ruby + xsd_doc = Nokogiri::XML(doc) + child = xsd_doc.root.children.find(&:blank?) # Find a blank node that would be freed without the fix + + Nokogiri::XML::Schema.from_document(xsd_doc) + assert(child.to_s) # This will raise a valgrind error if the node was freed + end - def test_xsd_import_with_no_systemid - # https://github.com/sparklemotion/nokogiri/pull/2296 - xsd = <<~EOF - - - + describe "CVE-2020-26247" do + # https://github.com/sparklemotion/nokogiri/security/advisories/GHSA-vr8q-g5c7-m54m + let(:schema) do + <<~EOSCHEMA + + - EOF - Nokogiri::XML::Schema(xsd) # assert_nothing_raised + EOSCHEMA end - describe "CVE-2020-26247" do - # https://github.com/sparklemotion/nokogiri/security/advisories/GHSA-vr8q-g5c7-m54m - let(:schema) do - <<~EOSCHEMA - - - - EOSCHEMA + if Nokogiri.uses_libxml? + describe "with default parse options" do + it "XML::Schema parsing does not attempt to access external DTDs" do + doc = Nokogiri::XML::Schema.new(schema) + errors = doc.errors.map(&:to_s) + assert_equal( + 1, + errors.grep(/ERROR: Attempt to load network entity/).length, + "Should see xmlIO.c:xmlNoNetExternalEntityLoader() raising XML_IO_NETWORK_ATTEMPT", + ) + assert_empty( + errors.grep(/WARNING: failed to load HTTP resource/), + "Should not see xmlIO.c:xmlCheckHTTPInput() raising 'failed to load HTTP resource'", + ) + assert_empty( + errors.grep(/WARNING: failed to load external entity/), + "Should not see xmlIO.c:xmlDefaultExternalEntityLoader() raising 'failed to load external entity'", + ) + end + + it "XML::Schema parsing of memory does not attempt to access external DTDs" do + doc = Nokogiri::XML::Schema.read_memory(schema) + errors = doc.errors.map(&:to_s) + assert_equal( + 1, + errors.grep(/ERROR: Attempt to load network entity/).length, + "Should see xmlIO.c:xmlNoNetExternalEntityLoader() raising XML_IO_NETWORK_ATTEMPT", + ) + assert_empty( + errors.grep(/WARNING: failed to load HTTP resource/), + "Should not see xmlIO.c:xmlCheckHTTPInput() raising 'failed to load HTTP resource'", + ) + assert_empty( + errors.grep(/WARNING: failed to load external entity/), + "Should not see xmlIO.c:xmlDefaultExternalEntityLoader() raising 'failed to load external entity'", + ) + end end - if Nokogiri.uses_libxml? - describe "with default parse options" do - it "XML::Schema parsing does not attempt to access external DTDs" do - doc = Nokogiri::XML::Schema.new(schema) - errors = doc.errors.map(&:to_s) - assert_equal( - 1, - errors.grep(/ERROR: Attempt to load network entity/).length, - "Should see xmlIO.c:xmlNoNetExternalEntityLoader() raising XML_IO_NETWORK_ATTEMPT", - ) - assert_empty( - errors.grep(/WARNING: failed to load HTTP resource/), - "Should not see xmlIO.c:xmlCheckHTTPInput() raising 'failed to load HTTP resource'", - ) - assert_empty( - errors.grep(/WARNING: failed to load external entity/), - "Should not see xmlIO.c:xmlDefaultExternalEntityLoader() raising 'failed to load external entity'", - ) - end - - it "XML::Schema parsing of memory does not attempt to access external DTDs" do - doc = Nokogiri::XML::Schema.read_memory(schema) - errors = doc.errors.map(&:to_s) - assert_equal( - 1, - errors.grep(/ERROR: Attempt to load network entity/).length, - "Should see xmlIO.c:xmlNoNetExternalEntityLoader() raising XML_IO_NETWORK_ATTEMPT", - ) - assert_empty( - errors.grep(/WARNING: failed to load HTTP resource/), - "Should not see xmlIO.c:xmlCheckHTTPInput() raising 'failed to load HTTP resource'", - ) - assert_empty( - errors.grep(/WARNING: failed to load external entity/), - "Should not see xmlIO.c:xmlDefaultExternalEntityLoader() raising 'failed to load external entity'", - ) - end + describe "with NONET turned off" do + it "XML::Schema parsing attempts to access external DTDs" do + doc = Nokogiri::XML::Schema.new(schema, Nokogiri::XML::ParseOptions.new.nononet) + errors = doc.errors.map(&:to_s) + assert_equal( + 0, + errors.grep(/ERROR: Attempt to load network entity/).length, + "Should not see xmlIO.c:xmlNoNetExternalEntityLoader() raising XML_IO_NETWORK_ATTEMPT", + ) + assert_equal(1, errors.grep(/WARNING: failed to load HTTP resource|WARNING: failed to load external entity/).length) end - describe "with NONET turned off" do - it "XML::Schema parsing attempts to access external DTDs" do - doc = Nokogiri::XML::Schema.new(schema, Nokogiri::XML::ParseOptions.new.nononet) - errors = doc.errors.map(&:to_s) - assert_equal( - 0, - errors.grep(/ERROR: Attempt to load network entity/).length, - "Should not see xmlIO.c:xmlNoNetExternalEntityLoader() raising XML_IO_NETWORK_ATTEMPT", - ) - assert_equal(1, errors.grep(/WARNING: failed to load HTTP resource|WARNING: failed to load external entity/).length) - end - - it "XML::Schema parsing of memory attempts to access external DTDs" do - doc = Nokogiri::XML::Schema.read_memory(schema, Nokogiri::XML::ParseOptions.new.nononet) - errors = doc.errors.map(&:to_s) - assert_equal( - 0, - errors.grep(/ERROR: Attempt to load network entity/).length, - "Should not see xmlIO.c:xmlNoNetExternalEntityLoader() raising XML_IO_NETWORK_ATTEMPT", - ) - assert_equal(1, errors.grep(/WARNING: failed to load HTTP resource|WARNING: failed to load external entity/).length) - end + it "XML::Schema parsing of memory attempts to access external DTDs" do + doc = Nokogiri::XML::Schema.read_memory(schema, Nokogiri::XML::ParseOptions.new.nononet) + errors = doc.errors.map(&:to_s) + assert_equal( + 0, + errors.grep(/ERROR: Attempt to load network entity/).length, + "Should not see xmlIO.c:xmlNoNetExternalEntityLoader() raising XML_IO_NETWORK_ATTEMPT", + ) + assert_equal(1, errors.grep(/WARNING: failed to load HTTP resource|WARNING: failed to load external entity/).length) end end + end - if Nokogiri.jruby? - describe "with default parse options" do - it "XML::Schema parsing does not attempt to access external DTDs" do - doc = Nokogiri::XML::Schema.new(schema) - assert_equal 1, doc.errors.map(&:to_s).grep(/WARNING: Attempt to load network entity/).length - end - - it "XML::Schema parsing of memory does not attempt to access external DTDs" do - doc = Nokogiri::XML::Schema.read_memory(schema) - assert_equal 1, doc.errors.map(&:to_s).grep(/WARNING: Attempt to load network entity/).length - end + if Nokogiri.jruby? + describe "with default parse options" do + it "XML::Schema parsing does not attempt to access external DTDs" do + doc = Nokogiri::XML::Schema.new(schema) + assert_equal 1, doc.errors.map(&:to_s).grep(/WARNING: Attempt to load network entity/).length end - describe "with NONET turned off" do - it "XML::Schema parsing attempts to access external DTDs" do - doc = Nokogiri::XML::Schema.new(schema, Nokogiri::XML::ParseOptions.new.nononet) - assert_equal 0, doc.errors.map(&:to_s).grep(/WARNING: Attempt to load network entity/).length - end + it "XML::Schema parsing of memory does not attempt to access external DTDs" do + doc = Nokogiri::XML::Schema.read_memory(schema) + assert_equal 1, doc.errors.map(&:to_s).grep(/WARNING: Attempt to load network entity/).length + end + end + + describe "with NONET turned off" do + it "XML::Schema parsing attempts to access external DTDs" do + doc = Nokogiri::XML::Schema.new(schema, Nokogiri::XML::ParseOptions.new.nononet) + assert_equal 0, doc.errors.map(&:to_s).grep(/WARNING: Attempt to load network entity/).length + end - it "XML::Schema parsing of memory attempts to access external DTDs" do - doc = Nokogiri::XML::Schema.read_memory(schema, Nokogiri::XML::ParseOptions.new.nononet) - assert_equal 0, doc.errors.map(&:to_s).grep(/WARNING: Attempt to load network entity/).length - end + it "XML::Schema parsing of memory attempts to access external DTDs" do + doc = Nokogiri::XML::Schema.read_memory(schema, Nokogiri::XML::ParseOptions.new.nononet) + assert_equal 0, doc.errors.map(&:to_s).grep(/WARNING: Attempt to load network entity/).length end end end