Skip to content

Commit

Permalink
Tagging 0.5.3
Browse files Browse the repository at this point in the history
git-svn-id: http://svn.caldersphere.net/svn/main/rubyforge/jrexml/tags/0.5.3@194 b03c2d0b-2f10-0410-a2f9-fc8001506dfa
  • Loading branch information
nicksieger committed Jan 16, 2008
1 parent e9b8b36 commit 028d37a
Show file tree
Hide file tree
Showing 10 changed files with 112 additions and 19 deletions.
4 changes: 4 additions & 0 deletions History.txt
@@ -1,3 +1,7 @@
= 0.5.3

* Take advantage of the fact that the XPP parser expands entities for us, so that we don't have to use the ridiculously slow REXML::Text::unnormalize method.

= 0.5.2

* Raise REXML::ParseException on parse errors, instead of a custom error.
Expand Down
5 changes: 4 additions & 1 deletion Rakefile
Expand Up @@ -3,9 +3,12 @@ require 'spec/rake/spectask'
MANIFEST = FileList["History.txt", "Manifest.txt", "README.txt", "LICENSE.txt", "Rakefile",
"lib/**/*.rb", "lib/xpp*", "spec/**/*.rb", "spec/*.xml"]

File.open("Manifest.txt", "w") {|f| MANIFEST.each {|l| f.puts l } }

require './lib/jrexml/version'
begin
require 'hoe'
hoe = Hoe.new("jrexml", "0.5.2") do |p|
hoe = Hoe.new("jrexml", JREXML::Version) do |p|
p.rubyforge_name = "caldersphere"
p.url = "http://caldersphere.rubyforge.org/jrexml"
p.author = "Nick Sieger"
Expand Down
14 changes: 2 additions & 12 deletions lib/jrexml.rb
@@ -1,13 +1,3 @@
require 'rexml/parsers/baseparser'
require 'jrexml/java_pull_parser'

class REXML::Parsers::BaseParser #:nodoc:
# Extend every REXML base parser with a version that uses a Java pull parser
# library
def self.new(*args)
obj = allocate
obj.extend(JREXML::JavaPullParser)
obj.send :initialize, *args
obj
end
end
require 'jrexml/ext/base_parser'
require 'jrexml/ext/no_unnormalize'
26 changes: 26 additions & 0 deletions lib/jrexml/ext/base_parser.rb
@@ -0,0 +1,26 @@
require 'rexml/parsers/baseparser'

class REXML::Parsers::BaseParser #:nodoc:
class << self
# Set to true to disable JREXML (default nil/unset means use JREXML)
attr_accessor :default_parser

def new_default_parser(*args)
prev = self.default_parser
self.default_parser = true
new(*args)
ensure
self.default_parser = prev
end

# Extend every REXML base parser with a version that uses a Java pull parser
# library
def new(*args)
obj = allocate
obj.extend(JREXML::JavaPullParser) unless self.default_parser
class << obj; public :initialize; end
obj.initialize *args
obj
end
end
end
25 changes: 25 additions & 0 deletions lib/jrexml/ext/no_unnormalize.rb
@@ -0,0 +1,25 @@
require 'rexml/text'
require 'jrexml/ext/base_parser'

module REXML
class Text
alias_method :original_initialize, :initialize

# Redefine text initialize to receive the expanded value, since this is done
# by JREXML.
#
# Original arity/args is:
# def initialize arg, respect_whitespace=false, parent=nil, raw=nil, entity_filter=nil, illegal=ILLEGAL
def initialize(value, *args)
# Text.new is always called with raw = true from treeparser.rb
if !REXML::Parsers::BaseParser.default_parser && args[2]
args[2] = nil
original_initialize(value, *args)
# Set the 'unnormalized' ivar up front, since it's already expanded
@unnormalized = value
else
original_initialize(value, *args)
end
end
end
end
15 changes: 13 additions & 2 deletions lib/jrexml/java_pull_parser.rb
@@ -1,3 +1,5 @@
require 'rexml/parseexception'

module JREXML
begin
XmlPullParser = Java::org.xmlpull.v1.XmlPullParser
Expand Down Expand Up @@ -98,7 +100,12 @@ def convert_event(event)
when TEXT
text << @source.text
when ENTITY_REF
text << "&#{@source.name};"
val = @source.text
if val
text << val
else
text << "&#{@source.name};"
end
end
event = event_stack.shift
break unless event
Expand All @@ -108,7 +115,7 @@ def convert_event(event)
end
end
end
convert_event_without_text_or_entityref(event)
convert_event_without_text_or_entityref(event)
end

def convert_event_without_text_or_entityref(event)
Expand Down Expand Up @@ -168,5 +175,9 @@ def get_bytes(src)
def debug_event(event)
"XmlPullParser::#{XmlPullParser::TYPES[event]}" if event
end

def using_jrexml?
true
end
end
end
3 changes: 3 additions & 0 deletions lib/jrexml/version.rb
@@ -0,0 +1,3 @@
module JREXML
Version = "0.5.3"
end
17 changes: 17 additions & 0 deletions spec/document_spec.rb
@@ -0,0 +1,17 @@
require File.dirname(__FILE__) + '/spec_helper'
require 'rexml/document'

describe JREXML do
def document
REXML::Document.new %q(<document>text &lt; other &gt;&#x20;text</document>)
end

it "should not need REXML's unnormalize method" do
REXML::Parsers::BaseParser.default_parser = true
document.root.text.should == %q(text < other > text)

require 'jrexml/ext/no_unnormalize'
REXML::Parsers::BaseParser.default_parser = false
document.root.text.should == %q(text < other > text)
end
end
18 changes: 16 additions & 2 deletions spec/java_pull_parser_spec.rb
Expand Up @@ -3,11 +3,10 @@
describe JREXML::JavaPullParser do
def parse(source)
@parser = REXML::Parsers::BaseParser.new(source)
@parser.extend(JREXML::JavaPullParser)
@parser.stream = source
(class << @parser; self; end).send :define_method, "base_events" do
events = []
baseparser = REXML::Parsers::BaseParser.new(source)
baseparser = REXML::Parsers::BaseParser.new_default_parser(source)
loop do
event = baseparser.pull
events << event
Expand All @@ -20,6 +19,8 @@ def parse(source)

def verify_events
@parser.base_events.each do |evt|
# still need to expand entities to compare to REXML's base parser
evt[1] = REXML::Text::unnormalize(evt[1]) if evt[0] == :text
@parser.pull.should == evt
end
@parser.should be_empty
Expand All @@ -30,6 +31,10 @@ def parse_and_verify(source)
verify_events
end

it "should use JREXML by default once it's loaded" do
REXML::Parsers::BaseParser.new("<doc/>").should be_using_jrexml
end

it "should parse a document consisting of a single empty element" do
parse_and_verify %q(<document/>)
end
Expand Down Expand Up @@ -91,6 +96,15 @@ def parse_and_verify(source)
parse_and_verify %q(<document>text &lt; other &gt;&#x20;text</document>)
end

it "should not expand extended (e.g., HTML) entities" do
parse "<doc>&eacute;</doc>"
events = @parser.all_events

events[0].should == [:start_element, "doc", {}]
events[1].should == [:text, "&eacute;"]
events[2].should == [:end_element, "doc"]
end

it "should handle a longer, more complex document (50+K atom feed)" do
File.open(File.dirname(__FILE__) + "/atom_feed.xml") do |f|
parse_and_verify f.read
Expand Down
4 changes: 2 additions & 2 deletions spec/spec_helper.rb
@@ -1,6 +1,6 @@
$LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
require 'rexml/parsers/baseparser'
require 'jrexml'
require 'jrexml/java_pull_parser'
require 'jrexml/ext/base_parser'

Spec::Runner.configure do |config|
config.before :all do
Expand Down

0 comments on commit 028d37a

Please sign in to comment.