Skip to content

Commit

Permalink
Read quoted attributes in chunks (#126)
Browse files Browse the repository at this point in the history
  • Loading branch information
nobu committed May 16, 2024
1 parent e77365e commit 4325835
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 15 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ group :development do
gem "bundler"
gem "rake"
gem "test-unit"
gem "test-unit-ruby-core"
end
20 changes: 10 additions & 10 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -628,17 +628,17 @@ def parse_attributes(prefixes, curr_ns)
message = "Missing attribute equal: <#{name}>"
raise REXML::ParseException.new(message, @source)
end
unless match = @source.match(/(['"])(.*?)\1\s*/um, true)
if match = @source.match(/(['"])/, true)
message =
"Missing attribute value end quote: <#{name}>: <#{match[1]}>"
raise REXML::ParseException.new(message, @source)
else
message = "Missing attribute value start quote: <#{name}>"
raise REXML::ParseException.new(message, @source)
end
unless match = @source.match(/(['"])/, true)
message = "Missing attribute value start quote: <#{name}>"
raise REXML::ParseException.new(message, @source)
end
quote = match[1]
value = @source.read_until(quote)
unless value.chomp!(quote)
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
raise REXML::ParseException.new(message, @source)
end
value = match[2]
@source.match(/\s*/um, true)
if prefix == "xmlns"
if local_part == "xml"
if value != "http://www.w3.org/XML/1998/namespace"
Expand Down
29 changes: 24 additions & 5 deletions lib/rexml/source.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,11 @@ def encoding=(enc)
encoding_updated
end

def read
def read(term = nil)
end

def read_until(term)
@scanner.scan_until(Regexp.union(term)) or @scanner.rest
end

def ensure_buffer
Expand Down Expand Up @@ -158,16 +162,31 @@ def initialize(arg, block_size=500, encoding=nil)
end
end

def read
def read(term = nil)
begin
@scanner << readline
@scanner << readline(term)
true
rescue Exception, NameError
@source = nil
false
end
end

def read_until(term)
pattern = Regexp.union(term)
data = []
begin
until str = @scanner.scan_until(pattern)
@scanner << readline(term)
end
rescue EOFError
@scanner.rest
else
read if @scanner.eos? and !@source.eof?
str
end
end

def ensure_buffer
read if @scanner.eos? && @source
end
Expand Down Expand Up @@ -218,8 +237,8 @@ def current_line
end

private
def readline
str = @source.readline(@line_break)
def readline(term = nil)
str = @source.readline(term || @line_break)
if @pending_buffer
if str.nil?
str = @pending_buffer
Expand Down
11 changes: 11 additions & 0 deletions test/test_document.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# -*- coding: utf-8 -*-
# frozen_string_literal: false

require 'core_assertions'

module REXMLTests
class TestDocument < Test::Unit::TestCase
include Test::Unit::CoreAssertions

def test_version_attributes_to_s
doc = REXML::Document.new(<<~eoxml)
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
Expand Down Expand Up @@ -198,6 +202,13 @@ def test_xml_declaration_standalone
assert_equal('no', doc.stand_alone?, bug2539)
end

def test_gt_linear_performance
seq = [10000, 50000, 100000, 150000, 200000]
assert_linear_performance(seq) do |n|
REXML::Document.new('<test testing="' + ">" * n + '"></test>')
end
end

class WriteTest < Test::Unit::TestCase
def setup
@document = REXML::Document.new(<<-EOX)
Expand Down

0 comments on commit 4325835

Please sign in to comment.