Permalink
Browse files

Initial commit

  • Loading branch information...
0 parents commit 1e925b420d4bd28b6551743d006505ec92766ccd @nicksieger committed Nov 10, 2009
@@ -0,0 +1 @@
+data
@@ -0,0 +1,69 @@
+
+URLs = {
+ "data/google-news.xml" => "http://news.google.com/?output=atom",
+ "data/twitter-search.xml" => "http://search.twitter.com/search.atom?lang=en&q=xml&rpp=100",
+# Leave out Twitter timeline for now, as we can build benchmarks
+# around assumption of atom-formatted documents
+# "data/twitter-timeline.xml" => "http://twitter.com/statuses/public_timeline.xml"
+}
+
+require './harness/harness'
+$LOAD_PATH << "./parsers"
+
+directory "data"
+
+rule ".xml" => "data" do |t|
+ fail "Don't know URL where I can fetch #{t.name}!" unless URLs[t.name]
+ require 'net/http'
+ url = URI.parse(URLs[t.name])
+ puts "fetching #{url}..."
+ res = Net::HTTP.start(url.host, url.port) do |http|
+ http.get(url.request_uri)
+ end
+ res.error! unless Net::HTTPSuccess === res
+ File.open(t.name, "w") do |f|
+ f << res.body
+ end
+end
+
+desc "Clean cached data and any output files"
+task :clean do
+ rm_f URLs.keys
+end
+
+desc "Fetch new data"
+task :fetch_data => URLs.keys
+
+namespace :bench do
+ def run_file(f)
+ Harness.run_parser(f =~ %r{parsers/(.*)\.rb} && $1, URLs.keys.sort, ENV['N'] && ENV['N'].to_i)
+ end
+
+ desc "Run the benchmarks on all parsers."
+ task :all => :fetch_data do
+ FileList['parsers/**/*.rb'].each {|f| run_file(f) }
+ end
+
+ Dir['parsers/*'].each do |dir|
+ if File.directory?(dir)
+ basename = File.basename(dir)
+ desc "Run the #{basename} parser benchmarks."
+ task basename => :fetch_data do
+ FileList["#{dir}/**/*.rb"].each {|f| run_file(f) }
+ end
+ end
+ end
+end
+
+task :bench do
+ fail "specify parser with PARSERS=parsers/somefile.rb" unless ENV["PARSERS"]
+ FileList[ENV["PARSERS"]].each {|f| run_file(f) }
+end
+
+task :default do
+ puts "XML Parser benchmarks. Available tasks:"
+ Rake.application.options.show_tasks = true
+ Rake.application.options.full_description = false
+ Rake.application.options.show_task_pattern = //
+ Rake.application.display_tasks_and_comments
+end
@@ -0,0 +1,104 @@
+require 'benchmark'
+
+class Harness
+ module Parser
+ # Convert a Ruby stream into a possibly more efficient
+ # representation for parsing. For example, Java XML parsers are
+ # more likley to work with Java input streams or Java strings. The
+ # benchmark is for measuring parsing speed, not input conversion
+ # speed.
+ def prepare_input(xml_string)
+ xml_string
+ end
+
+ # Parse the XML input as created by #prepare_input and return the
+ # document or object representation, when applicable. The result
+ # will be handed to XPathSearch#search when performing the xpath
+ # search benchmark.
+ def parse(xml_input)
+ end
+ end
+
+ module XPathSearch
+ # Given a document object (result from Parse#parse) and an xpath
+ # expression, perform the work of the search on the document.
+ def search(document, xpath)
+ end
+ end
+
+ class Driver
+ attr_reader :label, :parser
+ def initialize(label, parser)
+ @label = label
+ @parser = parser
+ end
+
+ def prepare(*args)
+ @input = @parser.prepare_input(args[0])
+ end
+
+ def run
+ @parser.parse(@input)
+ end
+ end
+
+ # Default number of iterations.
+ DEFAULT_ITERATIONS = 100
+
+ def initialize(driver, num_iterations)
+ @driver = driver
+ @num_iterations = num_iterations
+ end
+
+ def run_bench(*args)
+ Benchmark.bmbm do |x|
+ args.each do |arg|
+ begin
+ @driver.prepare(*([arg].flatten))
+ x.report(@driver.label + ": " + arg.name) { @num_iterations.times { @driver.run } }
+ rescue => e
+ puts e.message, *e.backtrace
+ end
+ end
+ end
+ end
+
+ def runnable?
+ @driver.parser
+ end
+
+ # Redefine this method in specific parser code to create a parser.
+ #
+ # class Harness
+ # def self.parser
+ # # bootstrap your parser instance here that includes
+ # # or duck-types Harness::Parser
+ # end
+ # end
+ def self.parser
+ end
+
+ def self.create_harness(parser_name, num_iterations = DEFAULT_ITERATIONS)
+ load "#{parser_name}.rb"
+ driver = Driver.new(parser_name, parser)
+ new(driver, num_iterations)
+ end
+
+ def self.run_parser(parser, files, n)
+ docs = files.map do |f|
+ contents = File.read(f)
+ (class << contents; self; end).instance_eval do
+ define_method(:name) { f }
+ end
+ contents
+ end
+ args = [parser]; args << n if n
+ harness = Harness.create_harness(*args)
+ if harness.runnable?
+ puts "Running #{parser}"
+ harness.run_bench(*docs)
+ else
+ puts "Skipping #{parser}; no suitable driver available on this VM"
+ end
+ end
+end
@@ -0,0 +1,20 @@
+require 'hpricot'
+
+class Harness
+ module Hpricot
+ class Count
+ def prepare_input(xml_string)
+ xml_string
+ end
+
+ def parse(xml_input)
+ doc = ::Hpricot.XML(xml_input)
+ doc.search("//*").size
+ end
+ end
+ end
+
+ def self.parser
+ Harness::Hpricot::Count.new
+ end
+end
@@ -0,0 +1,23 @@
+require 'hpricot'
+
+class Harness
+ module Hpricot
+ class Parse
+ def prepare_input(xml_string)
+ xml_string
+ end
+
+ def parse(xml_input)
+ ::Hpricot.XML(xml_input)
+ end
+
+ def search(document, xpath)
+ document.search(xpath)
+ end
+ end
+ end
+
+ def self.parser
+ Harness::Hpricot::Parse.new
+ end
+end
@@ -0,0 +1,19 @@
+class Harness
+ module JavaDOM
+ class Parse
+ def prepare_input(xml_string)
+ @parser = Java::JavaxXmlParsers::DocumentBuilderFactory.newInstance.newDocumentBuilder
+ Java::JavaIo::ByteArrayInputStream.new(xml_string.to_java_bytes)
+ end
+
+ def parse(xml_input)
+ xml_input.reset
+ @parser.parse(xml_input)
+ end
+ end
+ end
+
+ def self.parser
+ Harness::JavaDOM::Parse.new if defined?(JRUBY_VERSION)
+ end
+end
@@ -0,0 +1,2 @@
+require 'jrexml'
+load File.dirname(__FILE__) + '/../rexml/count.rb'
@@ -0,0 +1,2 @@
+require 'jrexml'
+load File.dirname(__FILE__) + '/../rexml/parse.rb'
@@ -0,0 +1,20 @@
+require 'nokogiri'
+
+class Harness
+ module Nokogiri
+ class AtomEntries
+ def prepare_input(xml_string)
+ xml_string
+ end
+
+ def parse(xml_input)
+ doc = ::Nokogiri.XML(xml_input)
+ doc.xpath("//atom:entry/text()", "atom" => "http://www.w3.org/2005/Atom")
+ end
+ end
+ end
+
+ def self.parser
+ Harness::Nokogiri::AtomEntries.new
+ end
+end
@@ -0,0 +1,20 @@
+require 'nokogiri'
+
+class Harness
+ module Nokogiri
+ class Count
+ def prepare_input(xml_string)
+ xml_string
+ end
+
+ def parse(xml_input)
+ doc = ::Nokogiri.XML(xml_input)
+ doc.xpath("//*").size
+ end
+ end
+ end
+
+ def self.parser
+ Harness::Nokogiri::Count.new
+ end
+end
@@ -0,0 +1,19 @@
+require 'nokogiri'
+
+class Harness
+ module Nokogiri
+ class Parse
+ def prepare_input(xml_string)
+ xml_string
+ end
+
+ def parse(xml_input)
+ ::Nokogiri.XML(xml_input)
+ end
+ end
+ end
+
+ def self.parser
+ Harness::Nokogiri::Parse.new
+ end
+end
@@ -0,0 +1,19 @@
+require 'rexml/document'
+
+class Harness
+ module REXML
+ class Count
+ def prepare_input(xml_string)
+ xml_string
+ end
+
+ def parse(xml_input)
+ ::REXML::Document.new(xml_input).root.get_elements("//*").size
+ end
+ end
+ end
+
+ def self.parser
+ Harness::REXML::Count.new
+ end
+end
@@ -0,0 +1,23 @@
+require 'rexml/document'
+
+class Harness
+ module REXML
+ class Parse
+ def prepare_input(xml_string)
+ xml_string
+ end
+
+ def parse(xml_input)
+ ::REXML::Document.new(xml_input)
+ end
+
+ def search(document, xpath)
+ document.root.get_elements(xpath)
+ end
+ end
+ end
+
+ def self.parser
+ Harness::REXML::Parse.new
+ end
+end

0 comments on commit 1e925b4

Please sign in to comment.