Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Bugfixes, speed improvements and code cleanup for Nokogiri's and LibX…

…ML's XmlMini backend

[#3641]

Signed-off-by: Jeremy Kemper <jeremy@bitsweat.net>
  • Loading branch information...
commit 12f6fd0f2687f083bc23ad63fdc82c7e65cb8984 1 parent 54bc485
@wvanbergen wvanbergen authored jeremy committed
View
106 activesupport/lib/active_support/xml_mini/libxml.rb
@@ -9,15 +9,12 @@ module XmlMini_LibXML #:nodoc:
# string::
# XML Document string to parse
def parse(string)
- LibXML::XML.default_keep_blanks = false
-
if string.blank?
{}
else
LibXML::XML::Parser.string(string.strip).parse.to_hash
end
end
-
end
end
@@ -30,101 +27,44 @@ def to_hash
end
module Node
- CONTENT_ROOT = '__content__'
- LIB_XML_LIMIT = 30000000 # Hardcoded LibXML limit
+ CONTENT_ROOT = '__content__'.freeze
# Convert XML document to hash
#
# hash::
# Hash to merge the converted element into.
def to_hash(hash={})
- if text?
- raise LibXML::XML::Error if content.length >= LIB_XML_LIMIT
- hash[CONTENT_ROOT] = content
- else
- sub_hash = insert_name_into_hash(hash, name)
- attributes_to_hash(sub_hash)
- if array?
- children_array_to_hash(sub_hash)
- elsif yaml?
- children_yaml_to_hash(sub_hash)
- else
- children_to_hash(sub_hash)
- end
+ node_hash = {}
+
+ # Insert node hash into parent hash correctly.
+ case hash[name]
+ when Array then hash[name] << node_hash
+ when Hash then hash[name] = [hash[name], node_hash]
+ when nil then hash[name] = node_hash
+ else raise "Unexpected error during hash insertion!"
end
- hash
- end
- protected
-
- # Insert name into hash
- #
- # hash::
- # Hash to merge the converted element into.
- # name::
- # name to to merge into hash
- def insert_name_into_hash(hash, name)
- sub_hash = {}
- if hash[name]
- if !hash[name].kind_of? Array
- hash[name] = [hash[name]]
- end
- hash[name] << sub_hash
- else
- hash[name] = sub_hash
+ # Handle child elements
+ each_child do |c|
+ if c.element?
+ c.to_hash(node_hash)
+ elsif c.text? || c.cdata?
+ node_hash[CONTENT_ROOT] ||= ''
+ node_hash[CONTENT_ROOT] << c.content
end
- sub_hash
end
- # Insert children into hash
- #
- # hash::
- # Hash to merge the children into.
- def children_to_hash(hash={})
- each { |child| child.to_hash(hash) }
- attributes_to_hash(hash)
- hash
- end
- # Convert xml attributes to hash
- #
- # hash::
- # Hash to merge the attributes into
- def attributes_to_hash(hash={})
- each_attr { |attr| hash[attr.name] = attr.value }
- hash
+ # Remove content node if it is blank
+ if node_hash.length > 1 && node_hash[CONTENT_ROOT].blank?
+ node_hash.delete(CONTENT_ROOT)
end
- # Convert array into hash
- #
- # hash::
- # Hash to merge the array into
- def children_array_to_hash(hash={})
- hash[child.name] = map do |child|
- returning({}) { |sub_hash| child.children_to_hash(sub_hash) }
- end
- hash
- end
-
- # Convert yaml into hash
- #
- # hash::
- # Hash to merge the yaml into
- def children_yaml_to_hash(hash = {})
- hash[CONTENT_ROOT] = content unless content.blank?
- hash
- end
-
- # Check if child is of type array
- def array?
- child? && child.next? && child.name == child.next.name
- end
-
- # Check if child is of type yaml
- def yaml?
- attributes.collect{|x| x.value}.include?('yaml')
- end
+ # Handle attributes
+ each_attr { |a| node_hash[a.name] = a.value }
+ hash
+ end
end
end
end
View
47 activesupport/lib/active_support/xml_mini/nokogiri.rb
@@ -12,7 +12,7 @@ def parse(string)
if string.blank?
{}
else
- doc = Nokogiri::XML(string) { |cfg| cfg.noblanks }
+ doc = Nokogiri::XML(string)
raise doc.errors.first if doc.errors.length > 0
doc.to_hash
end
@@ -26,40 +26,43 @@ def to_hash
end
module Node
- CONTENT_ROOT = '__content__'
+ CONTENT_ROOT = '__content__'.freeze
# Convert XML document to hash
#
# hash::
# Hash to merge the converted element into.
def to_hash(hash = {})
- attributes = attributes_as_hash
- if hash[name]
- hash[name] = [hash[name]].flatten
- hash[name] << attributes
- else
- hash[name] ||= attributes
+ node_hash = {}
+
+ # Insert node hash into parent hash correctly.
+ case hash[name]
+ when Array then hash[name] << node_hash
+ when Hash then hash[name] = [hash[name], node_hash]
+ when nil then hash[name] = node_hash
+ else raise "Unexpected error during hash insertion!"
end
- children.each { |child|
- next if child.blank? && 'file' != self['type']
+ # Handle child elements
+ children.each do |c|
+ if c.element?
+ c.to_hash(node_hash)
+ elsif c.text? || c.cdata?
+ node_hash[CONTENT_ROOT] ||= ''
+ node_hash[CONTENT_ROOT] << c.content
+ end
+ end
- if child.text? || child.cdata?
- (attributes[CONTENT_ROOT] ||= '') << child.content
- next
- end
+ # Remove content node if it is blank and there are child tags
+ if node_hash.length > 1 && node_hash[CONTENT_ROOT].blank?
+ node_hash.delete(CONTENT_ROOT)
+ end
- child.to_hash attributes
- }
+ # Handle attributes
+ attribute_nodes.each { |a| node_hash[a.node_name] = a.value }
hash
end
-
- def attributes_as_hash
- Hash[*(attribute_nodes.map { |node|
- [node.node_name, node.value]
- }.flatten)]
- end
end
end
View
204 activesupport/test/xml_mini/libxml_engine_test.rb
@@ -0,0 +1,204 @@
+require 'abstract_unit'
+require 'active_support/xml_mini'
+
+begin
+ gem 'libxml-ruby'
+rescue Gem::LoadError
+ # Skip nokogiri tests
+else
+
+require 'libxml'
+
+class NokogiriEngineTest < Test::Unit::TestCase
+ include ActiveSupport
+
+ def setup
+ @default_backend = XmlMini.backend
+ XmlMini.backend = 'LibXML'
+ end
+
+ def teardown
+ XmlMini.backend = @default_backend
+ end
+
+ def test_file_from_xml
+ hash = Hash.from_xml(<<-eoxml)
+ <blog>
+ <logo type="file" name="logo.png" content_type="image/png">
+ </logo>
+ </blog>
+ eoxml
+ assert hash.has_key?('blog')
+ assert hash['blog'].has_key?('logo')
+
+ file = hash['blog']['logo']
+ assert_equal 'logo.png', file.original_filename
+ assert_equal 'image/png', file.content_type
+ end
+
+ def test_exception_thrown_on_expansion_attack
+ assert_raise LibXML::XML::Error do
+ attack_xml = <<-EOT
+ <?xml version="1.0" encoding="UTF-8"?>
+ <!DOCTYPE member [
+ <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
+ <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
+ <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
+ <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
+ <!ENTITY e "&f;&f;&f;&f;&f;&f;&f;&f;&f;&f;">
+ <!ENTITY f "&g;&g;&g;&g;&g;&g;&g;&g;&g;&g;">
+ <!ENTITY g "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
+ ]>
+ <member>
+ &a;
+ </member>
+ EOT
+ Hash.from_xml(attack_xml)
+ end
+ end
+
+ def test_setting_nokogiri_as_backend
+ XmlMini.backend = 'LibXML'
+ assert_equal XmlMini_LibXML, XmlMini.backend
+ end
+
+ def test_blank_returns_empty_hash
+ assert_equal({}, XmlMini.parse(nil))
+ assert_equal({}, XmlMini.parse(''))
+ end
+
+ def test_array_type_makes_an_array
+ assert_equal_rexml(<<-eoxml)
+ <blog>
+ <posts type="array">
+ <post>a post</post>
+ <post>another post</post>
+ </posts>
+ </blog>
+ eoxml
+ end
+
+ def test_one_node_document_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products/>
+ eoxml
+ end
+
+ def test_one_node_with_attributes_document_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products foo="bar"/>
+ eoxml
+ end
+
+ def test_products_node_with_book_node_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products>
+ <book name="awesome" id="12345" />
+ </products>
+ eoxml
+ end
+
+ def test_products_node_with_two_book_nodes_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products>
+ <book name="awesome" id="12345" />
+ <book name="america" id="67890" />
+ </products>
+ eoxml
+ end
+
+ def test_single_node_with_content_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products>
+ hello world
+ </products>
+ eoxml
+ end
+
+ def test_children_with_children
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ <book name="america" id="67890" />
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_text
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ hello everyone
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_non_adjacent_text
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ good
+ <products>
+ hello everyone
+ </products>
+ morning
+ </root>
+ eoxml
+ end
+
+ def test_children_with_simple_cdata
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ <![CDATA[cdatablock]]>
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_multiple_cdata
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ <![CDATA[cdatablock1]]><![CDATA[cdatablock2]]>
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_text_and_cdata
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ hello <![CDATA[cdatablock]]>
+ morning
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_blank_text
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products> </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_blank_text_and_attribute
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products type="file"> </products>
+ </root>
+ eoxml
+ end
+
+ private
+ def assert_equal_rexml(xml)
+ hash = XmlMini.with_backend('REXML') { XmlMini.parse(xml) }
+ assert_equal(hash, XmlMini.parse(xml))
+ end
+end
+
+end
View
42 activesupport/test/xml_mini/nokogiri_engine_test.rb
@@ -147,17 +147,53 @@ def test_children_with_non_adjacent_text
eoxml
end
- def test_children_with_cdata
+ def test_children_with_simple_cdata
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ <![CDATA[cdatablock]]>
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_multiple_cdata
assert_equal_rexml(<<-eoxml)
<root>
<products>
- hello <![CDATA[everyone]]>
- morning
+ <![CDATA[cdatablock1]]><![CDATA[cdatablock2]]>
</products>
</root>
eoxml
end
+ def test_children_with_text_and_cdata
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ hello <![CDATA[cdatablock]]>
+ morning
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_blank_text
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products> </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_blank_text_and_attribute
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products type="file"> </products>
+ </root>
+ eoxml
+ end
+
private
def assert_equal_rexml(xml)
hash = XmlMini.with_backend('REXML') { XmlMini.parse(xml) }
Please sign in to comment.
Something went wrong with that request. Please try again.