diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index c01b087b..93a5b169 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -114,7 +114,7 @@ class BaseParser module Private INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um - TAG_PATTERN = /((?>#{QNAME_STR}))/um + TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um NAME_PATTERN = /\s*#{NAME}/um @@ -128,7 +128,6 @@ module Private def initialize( source ) self.stream = source @listeners = [] - @attributes_scanner = StringScanner.new('') end def add_listener( listener ) @@ -614,87 +613,60 @@ def process_instruction(start_position) def parse_attributes(prefixes, curr_ns) attributes = {} closed = false - match_data = @source.match(/^(.*?)(\/)?>/um, true) - if match_data.nil? - message = "Start tag isn't ended" - raise REXML::ParseException.new(message, @source) - end - - raw_attributes = match_data[1] - closed = !match_data[2].nil? - return attributes, closed if raw_attributes.nil? - return attributes, closed if raw_attributes.empty? - - @attributes_scanner.string = raw_attributes - scanner = @attributes_scanner - until scanner.eos? - if scanner.scan(/\s+/) - break if scanner.eos? - end - - start_position = scanner.pos - while true - break if scanner.scan(ATTRIBUTE_PATTERN) - unless scanner.scan(QNAME) - message = "Invalid attribute name: <#{scanner.rest}>" - raise REXML::ParseException.new(message, @source) - end - name = scanner[0] - unless scanner.scan(/\s*=\s*/um) + while true + if @source.match(">", true) + return attributes, closed + elsif @source.match("/>", true) + closed = true + return attributes, closed + elsif match = @source.match(QNAME, true) + name = match[1] + prefix = match[2] + local_part = match[3] + + unless @source.match(/\s*=\s*/um, true) message = "Missing attribute equal: <#{name}>" raise REXML::ParseException.new(message, @source) end - quote = scanner.scan(/['"]/) - unless quote - message = "Missing attribute value start quote: <#{name}>" - raise REXML::ParseException.new(message, @source) - end - unless scanner.scan(/.*#{Regexp.escape(quote)}/um) - @source.ensure_buffer - match_data = @source.match(/^(.*?)(\/)?>/um, true) - if match_data - scanner << "/" if closed - scanner << ">" - scanner << match_data[1] - scanner.pos = start_position - closed = !match_data[2].nil? - next + unless match = @source.match(/(['"])(.*?)\1\s*/um, true) + if match = @source.match(/(['"])/, true) + message = + "Missing attribute value end quote: <#{name}>: <#{match[1]}>" + raise REXML::ParseException.new(message, @source) + else + message = "Missing attribute value start quote: <#{name}>" + raise REXML::ParseException.new(message, @source) end - message = - "Missing attribute value end quote: <#{name}>: <#{quote}>" - raise REXML::ParseException.new(message, @source) end - end - name = scanner[1] - prefix = scanner[2] - local_part = scanner[3] - # quote = scanner[4] - value = scanner[5] - if prefix == "xmlns" - if local_part == "xml" - if value != "http://www.w3.org/XML/1998/namespace" - msg = "The 'xml' prefix must not be bound to any other namespace "+ + value = match[2] + if prefix == "xmlns" + if local_part == "xml" + if value != "http://www.w3.org/XML/1998/namespace" + msg = "The 'xml' prefix must not be bound to any other namespace "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" + raise REXML::ParseException.new( msg, @source, self ) + end + elsif local_part == "xmlns" + msg = "The 'xmlns' prefix must not be declared "+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" - raise REXML::ParseException.new( msg, @source, self ) + raise REXML::ParseException.new( msg, @source, self) end - elsif local_part == "xmlns" - msg = "The 'xmlns' prefix must not be declared "+ - "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" - raise REXML::ParseException.new( msg, @source, self) + curr_ns << local_part + elsif prefix + prefixes << prefix unless prefix == "xml" end - curr_ns << local_part - elsif prefix - prefixes << prefix unless prefix == "xml" - end - if attributes.has_key?(name) - msg = "Duplicate attribute #{name.inspect}" - raise REXML::ParseException.new(msg, @source, self) - end + if attributes[name] + msg = "Duplicate attribute #{name.inspect}" + raise REXML::ParseException.new(msg, @source, self) + end - attributes[name] = value + attributes[name] = value + else + message = "Invalid attribute name: <#{@source.buffer}>" + raise REXML::ParseException.new(message, @source) + end end - return attributes, closed end end end diff --git a/test/parse/test_element.rb b/test/parse/test_element.rb index 9f172a28..33dec8cd 100644 --- a/test/parse/test_element.rb +++ b/test/parse/test_element.rb @@ -39,11 +39,11 @@ def test_empty_namespace_attribute_name parse("") end assert_equal(<<-DETAIL.chomp, exception.to_s) -Invalid attribute name: <:a=""> +Invalid attribute name: <:a="">> Line: 1 -Position: 9 +Position: 13 Last 80 unconsumed characters: - +:a=""> DETAIL end diff --git a/test/test_core.rb b/test/test_core.rb index b4bfd337..44e2e7ea 100644 --- a/test/test_core.rb +++ b/test/test_core.rb @@ -116,11 +116,12 @@ def test_attribute def test_attribute_namespace_conflict # https://www.w3.org/TR/xml-names/#uniqAttrs - message = <<-MESSAGE + message = <<-MESSAGE.chomp Duplicate attribute "a" Line: 4 Position: 140 Last 80 unconsumed characters: +/> MESSAGE assert_raise(REXML::ParseException.new(message)) do Document.new(<<-XML) @@ -1323,11 +1324,12 @@ def test_ticket_21 exception = assert_raise(ParseException) do Document.new(src) end - assert_equal(<<-DETAIL, exception.to_s) + assert_equal(<<-DETAIL.chomp, exception.to_s) Missing attribute value start quote: Line: 1 Position: 16 Last 80 unconsumed characters: +value/> DETAIL end @@ -1336,11 +1338,12 @@ def test_parse_exception_on_missing_attribute_end_quote exception = assert_raise(ParseException) do Document.new(src) end - assert_equal(<<-DETAIL, exception.to_s) + assert_equal(<<-DETAIL.chomp, exception.to_s) Missing attribute value end quote: : <"> Line: 1 Position: 17 Last 80 unconsumed characters: +value/> DETAIL end