Permalink
Browse files

JavaDOM => JAXP

  • Loading branch information...
1 parent d171501 commit 64a81fe42f21d1d66c945c92c4101b0bb8708151 @nicksieger committed Nov 13, 2009
View
@@ -31,17 +31,27 @@ task :clean do
rm_f URLs.keys
end
-desc "Fetch new data"
-task :check_data => URLs.keys do
+def check_objectspace
if defined?(JRUBY_VERSION)
require 'jruby'
fail "Re-run JRuby with -X+O to enable ObjectSpace (needed for Nokogiri)" unless JRuby.objectspace
end
end
+desc "Fetch new data"
+task :check_data => URLs.keys do
+end
+
namespace :bench do
def run_file(f)
Harness.run_parser(f =~ %r{parsers/(.*)\.rb} && $1, URLs.keys.sort, ENV['N'] && ENV['N'].to_i)
+ rescue => e
+ puts e.message
+ if e.message =~ /objectspace/
+ check_objectspace
+ else
+ raise
+ end
end
desc "Run the benchmarks on all parsers."
@@ -2,7 +2,7 @@
class Harness
module Hpricot
- class AtomEntries
+ class XPath
def prepare_input(xml_stream)
xml_stream
end
@@ -16,6 +16,6 @@ def perform(xml_input)
end
def self.parser
- Harness::Hpricot::AtomEntries.new
+ Harness::Hpricot::XPath.new
end
end
@@ -1,5 +1,5 @@
class Harness
- module JavaDOM
+ module JAXP
class Count
def prepare_input(xml_stream)
@xpath = javax.xml.xpath.XPathFactory.newInstance.newXPath
@@ -16,6 +16,6 @@ def perform(xml_input)
end
def self.parser
- Harness::JavaDOM::Count.new
+ Harness::JAXP::Count.new if defined?(JRUBY_VERSION)
end
end
View
@@ -0,0 +1,28 @@
+require File.dirname(__FILE__) + '/helpers'
+
+class Harness
+ module JAXP
+ class DOM
+ def prepare_input(xml_stream)
+ factory = javax.xml.parsers.DocumentBuilderFactory.newInstance
+ factory.namespace_aware = true
+ @parser = factory.newDocumentBuilder
+ xml_stream
+ end
+
+ def perform(xml_input)
+ xml_input.rewind if xml_input.respond_to?(:rewind)
+ document = @parser.parse(xml_input.to_inputstream)
+ titles = []
+ document.traverse do |elem|
+ titles << elem.text_content if elem.node_name == "title"
+ end
+ titles
+ end
+ end
+ end
+
+ def self.parser
+ Harness::JAXP::DOM.new if defined?(JRUBY_VERSION)
+ end
+end
@@ -0,0 +1,19 @@
+if defined?(JRUBY_VERSION)
+module org::w3c::dom::NodeList
+ include Enumerable
+ def each
+ 0.upto(length - 1) do |i|
+ yield item(i)
+ end
+ end
+end
+
+module org::w3c::dom::Node
+ def traverse(&blk)
+ blk.call(self)
+ child_nodes.each do |e|
+ e.traverse(&blk)
+ end
+ end
+end
+end
@@ -1,5 +1,5 @@
class Harness
- module JavaDOM
+ module JAXP
class Parse
def prepare_input(xml_stream)
factory = javax.xml.parsers.DocumentBuilderFactory.newInstance
@@ -16,6 +16,6 @@ def perform(xml_input)
end
def self.parser
- Harness::JavaDOM::Parse.new if defined?(JRUBY_VERSION)
+ Harness::JAXP::Parse.new if defined?(JRUBY_VERSION)
end
end
@@ -0,0 +1,39 @@
+require File.dirname(__FILE__) + '/helpers'
+
+class Harness
+ module JAXP
+ class Stream
+ def prepare_input(xml_stream)
+ factory = javax.xml.stream.XMLInputFactory.newInstance
+ @reader = factory.createXMLStreamReader(xml_stream.to_inputstream)
+ xml_stream
+ end
+
+ def perform(xml_input)
+ xml_input.rewind if xml_input.respond_to?(:rewind)
+ titles = []
+ text = ''
+ grab_text = false
+ while @reader.has_next
+ case @reader.next
+ when javax.xml.stream.XMLStreamConstants::START_ELEMENT
+ grab_text = true if @reader.local_name == "title"
+ when javax.xml.stream.XMLStreamConstants::CHARACTERS
+ text << @reader.text if grab_text
+ when javax.xml.stream.XMLStreamConstants::END_ELEMENT
+ if @reader.local_name == "title"
+ titles << text
+ text = ''
+ grab_text = false
+ end
+ end
+ end
+ titles
+ end
+ end
+ end
+
+ def self.parser
+ Harness::JAXP::Stream.new if defined?(JRUBY_VERSION)
+ end
+end
@@ -1,15 +1,8 @@
-module org::w3c::dom::NodeList
- include Enumerable
- def each
- 0.upto(length - 1) do |i|
- yield item(i)
- end
- end
-end
+require File.dirname(__FILE__) + '/helpers'
class Harness
- module JavaDOM
- class AtomEntries
+ module JAXP
+ class XPath
def prepare_input(xml_stream)
@xpath = javax.xml.xpath.XPathFactory.newInstance.newXPath
ns_context = Object.new
@@ -31,6 +24,6 @@ def perform(xml_input)
end
def self.parser
- Harness::JavaDOM::AtomEntries.new
+ Harness::JAXP::XPath.new if defined?(JRUBY_VERSION)
end
end
@@ -0,0 +1,24 @@
+require 'nokogiri'
+
+class Harness
+ module Nokogiri
+ class DOM
+ def prepare_input(xml_stream)
+ xml_stream
+ end
+
+ def perform(xml_input)
+ xml_input.rewind if xml_input.respond_to?(:rewind)
+ document = ::Nokogiri::XML(xml_input)
+ titles = []
+ document.root.traverse do |elem|
+ titles << elem.content if elem.name == "title"
+ end
+ end
+ end
+ end
+
+ def self.parser
+ Harness::Nokogiri::DOM.new
+ end
+end
@@ -0,0 +1,36 @@
+require 'nokogiri'
+
+class Harness
+ module Nokogiri
+ class XPath
+ def prepare_input(xml_stream)
+ xml_stream
+ end
+
+ def perform(xml_input)
+ xml_input.rewind if xml_input.respond_to?(:rewind)
+ reader = ::Nokogiri::XML::Reader(xml_input)
+ titles = []
+ text = ''
+ grab_text = false
+ reader.each do |elem|
+ if elem.name == "title"
+ if elem.node_type == 1 # start element?
+ grab_text = true
+ else # elem.node_type == 15 # end element?
+ titles << text
+ text = ''
+ grab_text = false
+ end
+ elsif grab_text && elem.node_type == 3 # text?
+ text << elem.value
+ end
+ end
+ end
+ end
+ end
+
+ def self.parser
+ Harness::Nokogiri::XPath.new
+ end
+end
@@ -2,7 +2,7 @@
class Harness
module Nokogiri
- class AtomEntries
+ class XPath
def prepare_input(xml_stream)
xml_stream
end
@@ -16,6 +16,6 @@ def perform(xml_input)
end
def self.parser
- Harness::Nokogiri::AtomEntries.new
+ Harness::Nokogiri::XPath.new
end
end
@@ -3,7 +3,7 @@
class Harness
module REXML
- class AtomEntries
+ class XPath
def prepare_input(xml_stream)
xml_stream
end
@@ -18,6 +18,6 @@ def perform(xml_input)
end
def self.parser
- Harness::REXML::AtomEntries.new
+ Harness::REXML::XPath.new
end
end
@@ -4,8 +4,8 @@
it_should_parse_the_same_as(Harness::REXML::Count)
end
-describe Harness::Hpricot::AtomEntries do
- it_should_parse_the_same_as(Harness::REXML::AtomEntries)
+describe Harness::Hpricot::XPath do
+ it_should_parse_the_same_as(Harness::REXML::XPath)
end
describe Hpricot do
@@ -1,11 +1,11 @@
require File.dirname(__FILE__) + '/spec_helper'
-if defined?(Harness::JavaDOM)
- describe Harness::JavaDOM::AtomEntries do
- it_should_parse_the_same_as(Harness::REXML::AtomEntries)
+if defined?(Harness::JAXP)
+ describe Harness::JAXP::XPath do
+ it_should_parse_the_same_as(Harness::REXML::XPath)
end
- describe Harness::JavaDOM::Count do
+ describe Harness::JAXP::Count do
it_should_parse_the_same_as(Harness::REXML::Count)
end
@@ -39,7 +39,7 @@ def ns_context.getNamespaceURI(prefix)
it "should parse the titles by xpathing against a pre-parsed document" do
# Reuse some code just to get a Java DOM
- parser = DriverHelper::SpecDriver.new(Harness::JavaDOM::Parse.new)
+ parser = DriverHelper::SpecDriver.new(Harness::JAXP::Parse.new)
parser.prepare
document = parser.run
@@ -53,18 +53,9 @@ def ns_context.getNamespaceURI(prefix)
titles.should be_an_array_of_strings
end
- module org::w3c::dom::Node
- def traverse(&blk)
- blk.call(self)
- child_nodes.each do |e|
- e.traverse(&blk)
- end
- end
- end
-
it "should parse the titles by walking a DOM" do
# Reuse some code just to get a Java DOM
- parser = DriverHelper::SpecDriver.new(Harness::JavaDOM::Parse.new)
+ parser = DriverHelper::SpecDriver.new(Harness::JAXP::Parse.new)
parser.prepare
document = parser.run
@@ -4,8 +4,8 @@
it_should_parse_the_same_as(Harness::REXML::Count)
end
-describe Harness::Nokogiri::AtomEntries do
- it_should_parse_the_same_as(Harness::REXML::AtomEntries)
+describe Harness::Nokogiri::XPath do
+ it_should_parse_the_same_as(Harness::REXML::XPath)
end
describe Nokogiri do

0 comments on commit 64a81fe

Please sign in to comment.