-
Notifications
You must be signed in to change notification settings - Fork 21.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added two SAX-based backends for XmlMini, using both LibXML and Nokog…
…iri. [#3636] Signed-off-by: Jeremy Kemper <jeremy@bitsweat.net>
- Loading branch information
1 parent
12f6fd0
commit 37c5159
Showing
5 changed files
with
550 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
require 'libxml' | ||
|
||
# = XmlMini LibXML implementation using a SAX-based parser | ||
module ActiveSupport | ||
module XmlMini_LibXMLSAX | ||
extend self | ||
|
||
# Class that will build the hash while the XML document | ||
# is being parsed using SAX events. | ||
class HashBuilder | ||
|
||
include LibXML::XML::SaxParser::Callbacks | ||
|
||
CONTENT_KEY = '__content__'.freeze | ||
HASH_SIZE_KEY = '__hash_size__'.freeze | ||
|
||
attr_reader :hash | ||
|
||
def current_hash | ||
@hash_stack.last | ||
end | ||
|
||
def on_start_document | ||
@hash = { CONTENT_KEY => '' } | ||
@hash_stack = [@hash] | ||
end | ||
|
||
def on_end_document | ||
@hash = @hash_stack.pop | ||
@hash.delete(CONTENT_KEY) | ||
end | ||
|
||
def on_start_element(name, attrs = {}) | ||
new_hash = { CONTENT_KEY => '' }.merge(attrs) | ||
new_hash[HASH_SIZE_KEY] = new_hash.size + 1 | ||
|
||
case current_hash[name] | ||
when Array then current_hash[name] << new_hash | ||
when Hash then current_hash[name] = [current_hash[name], new_hash] | ||
when nil then current_hash[name] = new_hash | ||
end | ||
|
||
@hash_stack.push(new_hash) | ||
end | ||
|
||
def on_end_element(name) | ||
if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == '' | ||
current_hash.delete(CONTENT_KEY) | ||
end | ||
@hash_stack.pop | ||
end | ||
|
||
def on_characters(string) | ||
current_hash[CONTENT_KEY] << string | ||
end | ||
|
||
alias_method :on_cdata_block, :on_characters | ||
end | ||
|
||
attr_accessor :document_class | ||
self.document_class = HashBuilder | ||
|
||
def parse(string) | ||
return {} if string.blank? | ||
LibXML::XML::Error.set_handler(&LibXML::XML::Error::QUIET_HANDLER) | ||
parser = LibXML::XML::SaxParser.string(string) | ||
document = self.document_class.new | ||
|
||
parser.callbacks = document | ||
parser.parse | ||
document.hash | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
require 'nokogiri' | ||
|
||
# = XmlMini Nokogiri implementation using a SAX-based parser | ||
module ActiveSupport | ||
module XmlMini_NokogiriSAX | ||
extend self | ||
|
||
# Class that will build the hash while the XML document | ||
# is being parsed using SAX events. | ||
class HashBuilder < Nokogiri::XML::SAX::Document | ||
|
||
CONTENT_KEY = '__content__'.freeze | ||
HASH_SIZE_KEY = '__hash_size__'.freeze | ||
|
||
attr_reader :hash | ||
|
||
def current_hash | ||
@hash_stack.last | ||
end | ||
|
||
def start_document | ||
@hash = {} | ||
@hash_stack = [@hash] | ||
end | ||
|
||
def end_document | ||
raise "Parse stack not empty!" if @hash_stack.size > 1 | ||
end | ||
|
||
def error(error_message) | ||
raise error_message | ||
end | ||
|
||
def start_element(name, attrs = []) | ||
new_hash = { CONTENT_KEY => '' } | ||
new_hash[attrs.shift] = attrs.shift while attrs.length > 0 | ||
new_hash[HASH_SIZE_KEY] = new_hash.size + 1 | ||
|
||
case current_hash[name] | ||
when Array then current_hash[name] << new_hash | ||
when Hash then current_hash[name] = [current_hash[name], new_hash] | ||
when nil then current_hash[name] = new_hash | ||
end | ||
|
||
@hash_stack.push(new_hash) | ||
end | ||
|
||
def end_element(name) | ||
if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == '' | ||
current_hash.delete(CONTENT_KEY) | ||
end | ||
@hash_stack.pop | ||
end | ||
|
||
def characters(string) | ||
current_hash[CONTENT_KEY] << string | ||
end | ||
|
||
alias_method :cdata_block, :characters | ||
end | ||
|
||
attr_accessor :document_class | ||
self.document_class = HashBuilder | ||
|
||
def parse(string) | ||
return {} if string.blank? | ||
document = self.document_class.new | ||
parser = Nokogiri::XML::SAX::Parser.new(document) | ||
parser.parse(string) | ||
document.hash | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
require 'abstract_unit' | ||
require 'active_support/xml_mini' | ||
require 'active_support/core_ext/hash/conversions' | ||
|
||
begin | ||
require 'libxml' | ||
rescue LoadError | ||
# Skip libxml tests | ||
else | ||
|
||
class LibXMLSAXEngineTest < Test::Unit::TestCase | ||
include ActiveSupport | ||
|
||
def setup | ||
@default_backend = XmlMini.backend | ||
XmlMini.backend = 'LibXMLSAX' | ||
end | ||
|
||
def teardown | ||
XmlMini.backend = @default_backend | ||
end | ||
|
||
def test_exception_thrown_on_expansion_attack | ||
assert_raise LibXML::XML::Error do | ||
attack_xml = <<-EOT | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!DOCTYPE member [ | ||
<!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;"> | ||
<!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;"> | ||
<!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;"> | ||
<!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;"> | ||
<!ENTITY e "&f;&f;&f;&f;&f;&f;&f;&f;&f;&f;"> | ||
<!ENTITY f "&g;&g;&g;&g;&g;&g;&g;&g;&g;&g;"> | ||
<!ENTITY g "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"> | ||
]> | ||
<member> | ||
&a; | ||
</member> | ||
EOT | ||
|
||
Hash.from_xml(attack_xml) | ||
end | ||
end | ||
|
||
def test_setting_libxml_as_backend | ||
XmlMini.backend = 'LibXMLSAX' | ||
assert_equal XmlMini_LibXMLSAX, XmlMini.backend | ||
end | ||
|
||
def test_blank_returns_empty_hash | ||
assert_equal({}, XmlMini.parse(nil)) | ||
assert_equal({}, XmlMini.parse('')) | ||
end | ||
|
||
def test_array_type_makes_an_array | ||
assert_equal_rexml(<<-eoxml) | ||
<blog> | ||
<posts type="array"> | ||
<post>a post</post> | ||
<post>another post</post> | ||
</posts> | ||
</blog> | ||
eoxml | ||
end | ||
|
||
def test_one_node_document_as_hash | ||
assert_equal_rexml(<<-eoxml) | ||
<products/> | ||
eoxml | ||
end | ||
|
||
def test_one_node_with_attributes_document_as_hash | ||
assert_equal_rexml(<<-eoxml) | ||
<products foo="bar"/> | ||
eoxml | ||
end | ||
|
||
def test_products_node_with_book_node_as_hash | ||
assert_equal_rexml(<<-eoxml) | ||
<products> | ||
<book name="awesome" id="12345" /> | ||
</products> | ||
eoxml | ||
end | ||
|
||
def test_products_node_with_two_book_nodes_as_hash | ||
assert_equal_rexml(<<-eoxml) | ||
<products> | ||
<book name="awesome" id="12345" /> | ||
<book name="america" id="67890" /> | ||
</products> | ||
eoxml | ||
end | ||
|
||
def test_single_node_with_content_as_hash | ||
assert_equal_rexml(<<-eoxml) | ||
<products> | ||
hello world | ||
</products> | ||
eoxml | ||
end | ||
|
||
def test_children_with_children | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
<products> | ||
<book name="america" id="67890" /> | ||
</products> | ||
</root> | ||
eoxml | ||
end | ||
|
||
def test_children_with_text | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
<products> | ||
hello everyone | ||
</products> | ||
</root> | ||
eoxml | ||
end | ||
|
||
def test_children_with_non_adjacent_text | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
good | ||
<products> | ||
hello everyone | ||
</products> | ||
morning | ||
</root> | ||
eoxml | ||
end | ||
|
||
def test_children_with_simple_cdata | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
<products> | ||
<![CDATA[cdatablock]]> | ||
</products> | ||
</root> | ||
eoxml | ||
end | ||
|
||
def test_children_with_multiple_cdata | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
<products> | ||
<![CDATA[cdatablock1]]><![CDATA[cdatablock2]]> | ||
</products> | ||
</root> | ||
eoxml | ||
end | ||
|
||
def test_children_with_text_and_cdata | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
<products> | ||
hello <![CDATA[cdatablock]]> | ||
morning | ||
</products> | ||
</root> | ||
eoxml | ||
end | ||
|
||
def test_children_with_blank_text | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
<products> </products> | ||
</root> | ||
eoxml | ||
end | ||
|
||
private | ||
def assert_equal_rexml(xml) | ||
hash = XmlMini.with_backend('REXML') { XmlMini.parse(xml) } | ||
assert_equal(hash, XmlMini.parse(xml)) | ||
end | ||
end | ||
|
||
end |
Oops, something went wrong.