Skip to content

Commit

Permalink
Added two SAX-based backends for XmlMini, using both LibXML and Nokog…
Browse files Browse the repository at this point in the history
…iri.

[#3636]

Signed-off-by: Jeremy Kemper <jeremy@bitsweat.net>
  • Loading branch information
wvanbergen authored and jeremy committed Jan 1, 2010
1 parent 12f6fd0 commit 37c5159
Show file tree
Hide file tree
Showing 5 changed files with 550 additions and 3 deletions.
74 changes: 74 additions & 0 deletions activesupport/lib/active_support/xml_mini/libxmlsax.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
require 'libxml'

# = XmlMini LibXML implementation using a SAX-based parser
module ActiveSupport
module XmlMini_LibXMLSAX
extend self

# Class that will build the hash while the XML document
# is being parsed using SAX events.
class HashBuilder

include LibXML::XML::SaxParser::Callbacks

CONTENT_KEY = '__content__'.freeze
HASH_SIZE_KEY = '__hash_size__'.freeze

attr_reader :hash

def current_hash
@hash_stack.last
end

def on_start_document
@hash = { CONTENT_KEY => '' }
@hash_stack = [@hash]
end

def on_end_document
@hash = @hash_stack.pop
@hash.delete(CONTENT_KEY)
end

def on_start_element(name, attrs = {})
new_hash = { CONTENT_KEY => '' }.merge(attrs)
new_hash[HASH_SIZE_KEY] = new_hash.size + 1

case current_hash[name]
when Array then current_hash[name] << new_hash
when Hash then current_hash[name] = [current_hash[name], new_hash]
when nil then current_hash[name] = new_hash
end

@hash_stack.push(new_hash)
end

def on_end_element(name)
if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == ''
current_hash.delete(CONTENT_KEY)
end
@hash_stack.pop
end

def on_characters(string)
current_hash[CONTENT_KEY] << string
end

alias_method :on_cdata_block, :on_characters
end

attr_accessor :document_class
self.document_class = HashBuilder

def parse(string)
return {} if string.blank?
LibXML::XML::Error.set_handler(&LibXML::XML::Error::QUIET_HANDLER)
parser = LibXML::XML::SaxParser.string(string)
document = self.document_class.new

parser.callbacks = document
parser.parse
document.hash
end
end
end
73 changes: 73 additions & 0 deletions activesupport/lib/active_support/xml_mini/nokogirisax.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
require 'nokogiri'

# = XmlMini Nokogiri implementation using a SAX-based parser
module ActiveSupport
module XmlMini_NokogiriSAX
extend self

# Class that will build the hash while the XML document
# is being parsed using SAX events.
class HashBuilder < Nokogiri::XML::SAX::Document

CONTENT_KEY = '__content__'.freeze
HASH_SIZE_KEY = '__hash_size__'.freeze

attr_reader :hash

def current_hash
@hash_stack.last
end

def start_document
@hash = {}
@hash_stack = [@hash]
end

def end_document
raise "Parse stack not empty!" if @hash_stack.size > 1
end

def error(error_message)
raise error_message
end

def start_element(name, attrs = [])
new_hash = { CONTENT_KEY => '' }
new_hash[attrs.shift] = attrs.shift while attrs.length > 0
new_hash[HASH_SIZE_KEY] = new_hash.size + 1

case current_hash[name]
when Array then current_hash[name] << new_hash
when Hash then current_hash[name] = [current_hash[name], new_hash]
when nil then current_hash[name] = new_hash
end

@hash_stack.push(new_hash)
end

def end_element(name)
if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == ''
current_hash.delete(CONTENT_KEY)
end
@hash_stack.pop
end

def characters(string)
current_hash[CONTENT_KEY] << string
end

alias_method :cdata_block, :characters
end

attr_accessor :document_class
self.document_class = HashBuilder

def parse(string)
return {} if string.blank?
document = self.document_class.new
parser = Nokogiri::XML::SAX::Parser.new(document)
parser.parse(string)
document.hash
end
end
end
8 changes: 5 additions & 3 deletions activesupport/test/core_ext/hash_ext_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -982,9 +982,11 @@ def test_array_values_are_not_sorted

def test_expansion_count_is_limited
expected = {
'ActiveSupport::XmlMini_REXML' => 'RuntimeError',
'ActiveSupport::XmlMini_Nokogiri' => 'Nokogiri::XML::SyntaxError',
'ActiveSupport::XmlMini_LibXML' => 'LibXML::XML::Error',
'ActiveSupport::XmlMini_REXML' => 'RuntimeError',
'ActiveSupport::XmlMini_Nokogiri' => 'Nokogiri::XML::SyntaxError',
'ActiveSupport::XmlMini_NokogiriSAX' => 'RuntimeError',
'ActiveSupport::XmlMini_LibXML' => 'LibXML::XML::Error',
'ActiveSupport::XmlMini_LibXMLSAX' => 'LibXML::XML::Error',
}[ActiveSupport::XmlMini.backend.name].constantize

assert_raise expected do
Expand Down
181 changes: 181 additions & 0 deletions activesupport/test/xml_mini/libxmlsax_engine_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
require 'abstract_unit'
require 'active_support/xml_mini'
require 'active_support/core_ext/hash/conversions'

begin
require 'libxml'
rescue LoadError
# Skip libxml tests
else

class LibXMLSAXEngineTest < Test::Unit::TestCase
include ActiveSupport

def setup
@default_backend = XmlMini.backend
XmlMini.backend = 'LibXMLSAX'
end

def teardown
XmlMini.backend = @default_backend
end

def test_exception_thrown_on_expansion_attack
assert_raise LibXML::XML::Error do
attack_xml = <<-EOT
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE member [
<!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
<!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
<!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
<!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
<!ENTITY e "&f;&f;&f;&f;&f;&f;&f;&f;&f;&f;">
<!ENTITY f "&g;&g;&g;&g;&g;&g;&g;&g;&g;&g;">
<!ENTITY g "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
]>
<member>
&a;
</member>
EOT

Hash.from_xml(attack_xml)
end
end

def test_setting_libxml_as_backend
XmlMini.backend = 'LibXMLSAX'
assert_equal XmlMini_LibXMLSAX, XmlMini.backend
end

def test_blank_returns_empty_hash
assert_equal({}, XmlMini.parse(nil))
assert_equal({}, XmlMini.parse(''))
end

def test_array_type_makes_an_array
assert_equal_rexml(<<-eoxml)
<blog>
<posts type="array">
<post>a post</post>
<post>another post</post>
</posts>
</blog>
eoxml
end

def test_one_node_document_as_hash
assert_equal_rexml(<<-eoxml)
<products/>
eoxml
end

def test_one_node_with_attributes_document_as_hash
assert_equal_rexml(<<-eoxml)
<products foo="bar"/>
eoxml
end

def test_products_node_with_book_node_as_hash
assert_equal_rexml(<<-eoxml)
<products>
<book name="awesome" id="12345" />
</products>
eoxml
end

def test_products_node_with_two_book_nodes_as_hash
assert_equal_rexml(<<-eoxml)
<products>
<book name="awesome" id="12345" />
<book name="america" id="67890" />
</products>
eoxml
end

def test_single_node_with_content_as_hash
assert_equal_rexml(<<-eoxml)
<products>
hello world
</products>
eoxml
end

def test_children_with_children
assert_equal_rexml(<<-eoxml)
<root>
<products>
<book name="america" id="67890" />
</products>
</root>
eoxml
end

def test_children_with_text
assert_equal_rexml(<<-eoxml)
<root>
<products>
hello everyone
</products>
</root>
eoxml
end

def test_children_with_non_adjacent_text
assert_equal_rexml(<<-eoxml)
<root>
good
<products>
hello everyone
</products>
morning
</root>
eoxml
end

def test_children_with_simple_cdata
assert_equal_rexml(<<-eoxml)
<root>
<products>
<![CDATA[cdatablock]]>
</products>
</root>
eoxml
end

def test_children_with_multiple_cdata
assert_equal_rexml(<<-eoxml)
<root>
<products>
<![CDATA[cdatablock1]]><![CDATA[cdatablock2]]>
</products>
</root>
eoxml
end

def test_children_with_text_and_cdata
assert_equal_rexml(<<-eoxml)
<root>
<products>
hello <![CDATA[cdatablock]]>
morning
</products>
</root>
eoxml
end

def test_children_with_blank_text
assert_equal_rexml(<<-eoxml)
<root>
<products> </products>
</root>
eoxml
end

private
def assert_equal_rexml(xml)
hash = XmlMini.with_backend('REXML') { XmlMini.parse(xml) }
assert_equal(hash, XmlMini.parse(xml))
end
end

end
Loading

0 comments on commit 37c5159

Please sign in to comment.