Permalink
Browse files

Added SAX-based parser for XmlMini, using Nokogiri.

Signed-off-by: Jeremy Kemper <jeremy@bitsweat.net>
  • Loading branch information...
wvanbergen authored and jeremy committed Jan 1, 2010
1 parent 34b03ce commit d7f9b9fd244228d3503d7d37ac2f07365d54df3c
@@ -0,0 +1,82 @@
+require 'nokogiri'
+
+# = XmlMini Nokogiri implementation using a SAX-based parser
+module ActiveSupport
+ module XmlMini_NokogiriSAX
+ extend self
+
+ # Class that will build the hash while the XML document
+ # is being parsed using SAX events.
+ class HashBuilder < Nokogiri::XML::SAX::Document
+
+ CONTENT_KEY = '__content__'.freeze
+ HASH_SIZE_KEY = '__hash_size__'.freeze
+
+ attr_reader :hash
+
+ def current_hash
+ @hash_stack.last
+ end
+
+ def start_document
+ @hash = {}
+ @hash_stack = [@hash]
+ end
+
+ def end_document
+ raise "Parse stack not empty!" if @hash_stack.size > 1
+ end
+
+ def error(error_message)
+ raise Nokogiri::XML::SyntaxError, error_message
+ end
+
+ def start_element(name, attrs = [])
+ new_hash = { CONTENT_KEY => '' }
+ new_hash[attrs.shift] = attrs.shift while attrs.length > 0
+ new_hash[HASH_SIZE_KEY] = new_hash.size + 1
+
+ case current_hash[name]
+ when Array then current_hash[name] << new_hash
+ when Hash then current_hash[name] = [current_hash[name], new_hash]
+ when nil then current_hash[name] = new_hash
+ end
+
+ @hash_stack.push(new_hash)
+ end
+
+ def end_element(name)
+ if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == ''
+ current_hash.delete(CONTENT_KEY)
+ end
+ @hash_stack.pop
+ end
+
+ def characters(string)
+ current_hash[CONTENT_KEY] << string
+ end
+
+ alias_method :cdata_block, :characters
+ end
+
+ attr_accessor :document_class
+ self.document_class = HashBuilder
+
+ def parse(data)
+ if !data.respond_to?(:read)
+ data = StringIO.new(data || '')
+ end
+
+ char = data.getc
+ if char.nil?
+ {}
+ else
+ data.ungetc(char)
+ document = self.document_class.new
+ parser = Nokogiri::XML::SAX::Parser.new(document)
+ parser.parse(data)
+ document.hash
+ end
+ end
+ end
+end
@@ -0,0 +1,216 @@
+require 'abstract_unit'
+require 'active_support/xml_mini'
+require 'active_support/core_ext/hash/conversions'
+
+begin
+ require 'nokogiri'
+rescue LoadError
+ # Skip nokogiri tests
+else
+
+class NokogiriEngineTest < Test::Unit::TestCase
+ include ActiveSupport
+
+ def setup
+ @default_backend = XmlMini.backend
+ XmlMini.backend = 'NokogiriSAX'
+ end
+
+ def teardown
+ XmlMini.backend = @default_backend
+ end
+
+ def test_file_from_xml
+ hash = Hash.from_xml(<<-eoxml)
+ <blog>
+ <logo type="file" name="logo.png" content_type="image/png">
+ </logo>
+ </blog>
+ eoxml
+ assert hash.has_key?('blog')
+ assert hash['blog'].has_key?('logo')
+
+ file = hash['blog']['logo']
+ assert_equal 'logo.png', file.original_filename
+ assert_equal 'image/png', file.content_type
+ end
+
+ def test_exception_thrown_on_expansion_attack
+ assert_raise Nokogiri::XML::SyntaxError do
+ attack_xml = <<-EOT
+ <?xml version="1.0" encoding="UTF-8"?>
+ <!DOCTYPE member [
+ <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
+ <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
+ <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
+ <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
+ <!ENTITY e "&f;&f;&f;&f;&f;&f;&f;&f;&f;&f;">
+ <!ENTITY f "&g;&g;&g;&g;&g;&g;&g;&g;&g;&g;">
+ <!ENTITY g "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
+ ]>
+ <member>
+ &a;
+ </member>
+ EOT
+ Hash.from_xml(attack_xml)
+ end
+ end
+
+ def test_setting_nokogiri_as_backend
+ XmlMini.backend = 'Nokogiri'
+ assert_equal XmlMini_Nokogiri, XmlMini.backend
+ end
+
+ def test_blank_returns_empty_hash
+ assert_equal({}, XmlMini.parse(nil))
+ assert_equal({}, XmlMini.parse(''))
+ end
+
+ def test_array_type_makes_an_array
+ assert_equal_rexml(<<-eoxml)
+ <blog>
+ <posts type="array">
+ <post>a post</post>
+ <post>another post</post>
+ </posts>
+ </blog>
+ eoxml
+ end
+
+ def test_one_node_document_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products/>
+ eoxml
+ end
+
+ def test_one_node_with_attributes_document_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products foo="bar"/>
+ eoxml
+ end
+
+ def test_products_node_with_book_node_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products>
+ <book name="awesome" id="12345" />
+ </products>
+ eoxml
+ end
+
+ def test_products_node_with_two_book_nodes_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products>
+ <book name="awesome" id="12345" />
+ <book name="america" id="67890" />
+ </products>
+ eoxml
+ end
+
+ def test_single_node_with_content_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products>
+ hello world
+ </products>
+ eoxml
+ end
+
+ def test_children_with_children
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ <book name="america" id="67890" />
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_text
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ hello everyone
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_non_adjacent_text
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ good
+ <products>
+ hello everyone
+ </products>
+ morning
+ </root>
+ eoxml
+ end
+
+ def test_parse_from_io
+ io = StringIO.new(<<-eoxml)
+ <root>
+ good
+ <products>
+ hello everyone
+ </products>
+ morning
+ </root>
+ eoxml
+ XmlMini.parse(io)
+ end
+
+ def test_children_with_simple_cdata
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ <![CDATA[cdatablock]]>
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_multiple_cdata
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ <![CDATA[cdatablock1]]><![CDATA[cdatablock2]]>
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_text_and_cdata
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ hello <![CDATA[cdatablock]]>
+ morning
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_blank_text
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products> </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_blank_text_and_attribute
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products type="file"> </products>
+ </root>
+ eoxml
+ end
+
+ private
+ def assert_equal_rexml(xml)
+ hash = XmlMini.with_backend('REXML') { XmlMini.parse(xml) }
+ assert_equal(hash, XmlMini.parse(xml))
+ end
+end
+
+end

0 comments on commit d7f9b9f

Please sign in to comment.