Permalink
Browse files

Add more specs to go along with article

  • Loading branch information...
1 parent 132c975 commit d1715013e25360ab81275cf867f14e18d1ce2627 @nicksieger committed Nov 13, 2009
Showing with 164 additions and 3 deletions.
  1. +3 −1 parsers/javadom/parse.rb
  2. +1 −1 spec/driver_helper.rb
  3. +94 −0 spec/javadom_spec.rb
  4. +32 −0 spec/nokogiri_spec.rb
  5. +34 −1 spec/rexml_spec.rb
View
@@ -2,7 +2,9 @@ class Harness
module JavaDOM
class Parse
def prepare_input(xml_stream)
- @parser = Java::JavaxXmlParsers::DocumentBuilderFactory.newInstance.newDocumentBuilder
+ factory = javax.xml.parsers.DocumentBuilderFactory.newInstance
+ factory.namespace_aware = true
+ @parser = factory.newDocumentBuilder
xml_stream
end
View
@@ -63,7 +63,7 @@ def parse_the_same_as(expected)
class ShouldBeAnArrayOfStrings
def matches?(target)
@target = target
- Array === target && target.all? {|x| String === x}
+ Array === target && target.size > 0 && target.all? {|x| String === x}
end
def failure_message
View
@@ -36,5 +36,99 @@ def ns_context.getNamespaceURI(prefix)
titles = nodes.map {|e| e.node_value}
titles.should be_an_array_of_strings
end
+
+ it "should parse the titles by xpathing against a pre-parsed document" do
+ # Reuse some code just to get a Java DOM
+ parser = DriverHelper::SpecDriver.new(Harness::JavaDOM::Parse.new)
+ parser.prepare
+ document = parser.run
+
+ nodes = xpath.evaluate("//atom:entry/atom:title/text()",
+ document, javax.xml.xpath.XPathConstants::NODESET)
+ titles = []
+ 0.upto(nodes.length-1) do |i|
+ titles << nodes.item(i).node_value
+ end
+
+ titles.should be_an_array_of_strings
+ end
+
+ module org::w3c::dom::Node
+ def traverse(&blk)
+ blk.call(self)
+ child_nodes.each do |e|
+ e.traverse(&blk)
+ end
+ end
+ end
+
+ it "should parse the titles by walking a DOM" do
+ # Reuse some code just to get a Java DOM
+ parser = DriverHelper::SpecDriver.new(Harness::JavaDOM::Parse.new)
+ parser.prepare
+ document = parser.run
+
+ titles = []
+ document.traverse do |elem|
+ titles << elem.text_content if elem.node_name == "title"
+ end
+
+ titles.should be_an_array_of_strings
+ end
+
+ it "should grab titles by stream parsing" do
+ factory = javax.xml.stream.XMLInputFactory.newInstance
+ reader = factory.createXMLStreamReader(xml_stream.to_inputstream)
+ titles = []
+ text = ''
+ grab_text = false
+ while reader.has_next
+ case reader.next
+ when javax.xml.stream.XMLStreamConstants::START_ELEMENT
+ grab_text = true if reader.local_name == "title"
+ when javax.xml.stream.XMLStreamConstants::CHARACTERS
+ text << reader.text if grab_text
+ when javax.xml.stream.XMLStreamConstants::END_ELEMENT
+ if reader.local_name == "title"
+ titles << text
+ text = ''
+ grab_text = false
+ end
+ end
+ end
+
+ titles.should be_an_array_of_strings
+ end
+
+ it "should grab titles by reading events" do
+ factory = javax.xml.stream.XMLInputFactory.newInstance
+ raw_reader = factory.createXMLEventReader(xml_stream.to_inputstream)
+
+ inside_title = false
+ reader = factory.createFilteredReader(raw_reader) do |event|
+ keep = true
+ if event.start_element? && event.as_start_element.name.local_part == "title"
+ inside_title = true
+ elsif event.end_element? && event.as_end_element.name.local_part == "title"
+ inside_title = false
+ elsif !inside_title || !event.characters?
+ keep = false
+ end
+ keep
+ end
+
+ titles = []
+ text = ''
+ while reader.has_next
+ event = reader.next_event
+ if event.end_element?
+ titles << text
+ text = ''
+ end
+ text << event.as_characters.data if event.characters?
+ end
+
+ titles.should be_an_array_of_strings
+ end
end
end
View
@@ -24,4 +24,36 @@
titles = elements.map {|e| e.to_s}
titles.should be_an_array_of_strings
end
+
+ it "should walk the DOM to find the titles" do
+ document = Nokogiri::XML(xml_stream)
+ titles = []
+ document.root.traverse do |elem|
+ titles << elem.content if elem.name == "title"
+ end
+
+ titles.should be_an_array_of_strings
+ end
+
+ it "should grab titles by pull parsing" do
+ reader = Nokogiri::XML::Reader(xml_stream)
+ titles = []
+ text = ''
+ grab_text = false
+ reader.each do |elem|
+ if elem.name == "title"
+ if elem.node_type == 1 # start element?
+ grab_text = true
+ else # elem.node_type == 15 # end element?
+ titles << text
+ text = ''
+ grab_text = false
+ end
+ elsif grab_text && elem.node_type == 3 # text?
+ text << elem.value
+ end
+ end
+
+ titles.should be_an_array_of_strings
+ end
end
View
@@ -1,5 +1,5 @@
require File.dirname(__FILE__) + '/spec_helper'
-
+require 'rexml/parsers/pullparser'
describe REXML do
it "should parse the titles out of an Atom document" do
@@ -19,4 +19,37 @@
titles.should be_an_array_of_strings
end
+
+ it "should walk the DOM to find the titles" do
+ document = REXML::Document.new(xml_stream)
+ titles = []
+ document.root.each_recursive do |elem|
+ titles << elem.text.to_s if elem.name == "title"
+ end
+
+ titles.should be_an_array_of_strings
+ end
+
+ it "should grab titles by pull parsing" do
+ parser = REXML::Parsers::PullParser.new(xml_stream)
+ titles = []
+ text = ''
+ grab_text = false
+ parser.each do |event|
+ case event.event_type
+ when :start_element
+ grab_text = true if event[0] == "title"
+ when :text
+ text << event[1] if grab_text
+ when :end_element
+ if event[0] == "title"
+ titles << text
+ text = ''
+ grab_text = false
+ end
+ end
+ end
+
+ titles.should be_an_array_of_strings
+ end
end

0 comments on commit d171501

Please sign in to comment.