-
Notifications
You must be signed in to change notification settings - Fork 21.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added SAX-based parser for XmlMini, using Nokogiri.
Signed-off-by: Jeremy Kemper <jeremy@bitsweat.net>
- Loading branch information
1 parent
34b03ce
commit d7f9b9f
Showing
2 changed files
with
298 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
require 'nokogiri' | ||
|
||
# = XmlMini Nokogiri implementation using a SAX-based parser | ||
module ActiveSupport | ||
module XmlMini_NokogiriSAX | ||
extend self | ||
|
||
# Class that will build the hash while the XML document | ||
# is being parsed using SAX events. | ||
class HashBuilder < Nokogiri::XML::SAX::Document | ||
|
||
CONTENT_KEY = '__content__'.freeze | ||
HASH_SIZE_KEY = '__hash_size__'.freeze | ||
|
||
attr_reader :hash | ||
|
||
def current_hash | ||
@hash_stack.last | ||
end | ||
|
||
def start_document | ||
@hash = {} | ||
@hash_stack = [@hash] | ||
end | ||
|
||
def end_document | ||
raise "Parse stack not empty!" if @hash_stack.size > 1 | ||
end | ||
|
||
def error(error_message) | ||
raise Nokogiri::XML::SyntaxError, error_message | ||
end | ||
|
||
def start_element(name, attrs = []) | ||
new_hash = { CONTENT_KEY => '' } | ||
new_hash[attrs.shift] = attrs.shift while attrs.length > 0 | ||
new_hash[HASH_SIZE_KEY] = new_hash.size + 1 | ||
|
||
case current_hash[name] | ||
when Array then current_hash[name] << new_hash | ||
when Hash then current_hash[name] = [current_hash[name], new_hash] | ||
when nil then current_hash[name] = new_hash | ||
end | ||
|
||
@hash_stack.push(new_hash) | ||
end | ||
|
||
def end_element(name) | ||
if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == '' | ||
current_hash.delete(CONTENT_KEY) | ||
end | ||
@hash_stack.pop | ||
end | ||
|
||
def characters(string) | ||
current_hash[CONTENT_KEY] << string | ||
end | ||
|
||
alias_method :cdata_block, :characters | ||
end | ||
|
||
attr_accessor :document_class | ||
self.document_class = HashBuilder | ||
|
||
def parse(data) | ||
if !data.respond_to?(:read) | ||
data = StringIO.new(data || '') | ||
end | ||
|
||
char = data.getc | ||
if char.nil? | ||
{} | ||
else | ||
data.ungetc(char) | ||
document = self.document_class.new | ||
parser = Nokogiri::XML::SAX::Parser.new(document) | ||
parser.parse(data) | ||
document.hash | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
require 'abstract_unit' | ||
require 'active_support/xml_mini' | ||
require 'active_support/core_ext/hash/conversions' | ||
|
||
begin | ||
require 'nokogiri' | ||
rescue LoadError | ||
# Skip nokogiri tests | ||
else | ||
|
||
class NokogiriEngineTest < Test::Unit::TestCase | ||
include ActiveSupport | ||
|
||
def setup | ||
@default_backend = XmlMini.backend | ||
XmlMini.backend = 'NokogiriSAX' | ||
end | ||
|
||
def teardown | ||
XmlMini.backend = @default_backend | ||
end | ||
|
||
def test_file_from_xml | ||
hash = Hash.from_xml(<<-eoxml) | ||
<blog> | ||
<logo type="file" name="logo.png" content_type="image/png"> | ||
</logo> | ||
</blog> | ||
eoxml | ||
assert hash.has_key?('blog') | ||
assert hash['blog'].has_key?('logo') | ||
|
||
file = hash['blog']['logo'] | ||
assert_equal 'logo.png', file.original_filename | ||
assert_equal 'image/png', file.content_type | ||
end | ||
|
||
def test_exception_thrown_on_expansion_attack | ||
assert_raise Nokogiri::XML::SyntaxError do | ||
attack_xml = <<-EOT | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!DOCTYPE member [ | ||
<!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;"> | ||
<!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;"> | ||
<!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;"> | ||
<!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;"> | ||
<!ENTITY e "&f;&f;&f;&f;&f;&f;&f;&f;&f;&f;"> | ||
<!ENTITY f "&g;&g;&g;&g;&g;&g;&g;&g;&g;&g;"> | ||
<!ENTITY g "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"> | ||
]> | ||
<member> | ||
&a; | ||
</member> | ||
EOT | ||
Hash.from_xml(attack_xml) | ||
end | ||
end | ||
|
||
def test_setting_nokogiri_as_backend | ||
XmlMini.backend = 'Nokogiri' | ||
assert_equal XmlMini_Nokogiri, XmlMini.backend | ||
end | ||
|
||
def test_blank_returns_empty_hash | ||
assert_equal({}, XmlMini.parse(nil)) | ||
assert_equal({}, XmlMini.parse('')) | ||
end | ||
|
||
def test_array_type_makes_an_array | ||
assert_equal_rexml(<<-eoxml) | ||
<blog> | ||
<posts type="array"> | ||
<post>a post</post> | ||
<post>another post</post> | ||
</posts> | ||
</blog> | ||
eoxml | ||
end | ||
|
||
def test_one_node_document_as_hash | ||
assert_equal_rexml(<<-eoxml) | ||
<products/> | ||
eoxml | ||
end | ||
|
||
def test_one_node_with_attributes_document_as_hash | ||
assert_equal_rexml(<<-eoxml) | ||
<products foo="bar"/> | ||
eoxml | ||
end | ||
|
||
def test_products_node_with_book_node_as_hash | ||
assert_equal_rexml(<<-eoxml) | ||
<products> | ||
<book name="awesome" id="12345" /> | ||
</products> | ||
eoxml | ||
end | ||
|
||
def test_products_node_with_two_book_nodes_as_hash | ||
assert_equal_rexml(<<-eoxml) | ||
<products> | ||
<book name="awesome" id="12345" /> | ||
<book name="america" id="67890" /> | ||
</products> | ||
eoxml | ||
end | ||
|
||
def test_single_node_with_content_as_hash | ||
assert_equal_rexml(<<-eoxml) | ||
<products> | ||
hello world | ||
</products> | ||
eoxml | ||
end | ||
|
||
def test_children_with_children | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
<products> | ||
<book name="america" id="67890" /> | ||
</products> | ||
</root> | ||
eoxml | ||
end | ||
|
||
def test_children_with_text | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
<products> | ||
hello everyone | ||
</products> | ||
</root> | ||
eoxml | ||
end | ||
|
||
def test_children_with_non_adjacent_text | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
good | ||
<products> | ||
hello everyone | ||
</products> | ||
morning | ||
</root> | ||
eoxml | ||
end | ||
|
||
def test_parse_from_io | ||
io = StringIO.new(<<-eoxml) | ||
<root> | ||
good | ||
<products> | ||
hello everyone | ||
</products> | ||
morning | ||
</root> | ||
eoxml | ||
XmlMini.parse(io) | ||
end | ||
|
||
def test_children_with_simple_cdata | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
<products> | ||
<![CDATA[cdatablock]]> | ||
</products> | ||
</root> | ||
eoxml | ||
end | ||
|
||
def test_children_with_multiple_cdata | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
<products> | ||
<![CDATA[cdatablock1]]><![CDATA[cdatablock2]]> | ||
</products> | ||
</root> | ||
eoxml | ||
end | ||
|
||
def test_children_with_text_and_cdata | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
<products> | ||
hello <![CDATA[cdatablock]]> | ||
morning | ||
</products> | ||
</root> | ||
eoxml | ||
end | ||
|
||
def test_children_with_blank_text | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
<products> </products> | ||
</root> | ||
eoxml | ||
end | ||
|
||
def test_children_with_blank_text_and_attribute | ||
assert_equal_rexml(<<-eoxml) | ||
<root> | ||
<products type="file"> </products> | ||
</root> | ||
eoxml | ||
end | ||
|
||
private | ||
def assert_equal_rexml(xml) | ||
hash = XmlMini.with_backend('REXML') { XmlMini.parse(xml) } | ||
assert_equal(hash, XmlMini.parse(xml)) | ||
end | ||
end | ||
|
||
end |