Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
343 changes: 343 additions & 0 deletions SPECS/rubygem-rexml/CVE-2024-39908.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,343 @@
From 66d3d405337c1dea5b4522bf87e06a8cfe815298 Mon Sep 17 00:00:00 2001
From: Kevin Lockwood <v-klockwood@microsoft.com>
Date: Tue, 18 Feb 2025 12:13:44 -0800
Subject: [PATCH] [Medium] rubygem-rexml: Patch CVE-2024-39908

Link: https://github.com/ruby/rexml/raw/refs/tags/v3.3.2/lib/rexml/parsers/baseparser.rb
---
lib/rexml/parsers/baseparser.rb | 126 ++++++++++++++++++++++++--------
1 file changed, 97 insertions(+), 29 deletions(-)

diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 25bc371..a2818ae 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -7,6 +7,17 @@ require "strscan"

module REXML
module Parsers
+ if StringScanner::Version < "3.0.8"
+ module StringScannerCaptures
+ refine StringScanner do
+ def captures
+ values_at(*(1...size))
+ end
+ end
+ end
+ using StringScannerCaptures
+ end
+
# = Using the Pull Parser
# <em>This API is experimental, and subject to change.</em>
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
@@ -113,6 +124,14 @@ module REXML
}

module Private
+ # Terminal requires two or more letters.
+ INSTRUCTION_TERM = "?>"
+ COMMENT_TERM = "-->"
+ CDATA_TERM = "]]>"
+ DOCTYPE_TERM = "]>"
+ # Read to the end of DOCTYPE because there is no proper ENTITY termination
+ ENTITY_TERM = DOCTYPE_TERM
+
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
@@ -121,14 +140,21 @@ module REXML
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
+ CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
+ CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
+ DEFAULT_ENTITIES_PATTERNS = {}
+ default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
+ default_entities.each do |term|
+ DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
+ end
end
private_constant :Private
- include Private

def initialize( source )
self.stream = source
@listeners = []
@entity_expansion_count = 0
+ @prefixes = Set.new
end

def add_listener( listener )
@@ -141,6 +167,7 @@ module REXML
def stream=( source )
@source = SourceFactory.create_from( source )
@closed = nil
+ @have_root = false
@document_status = nil
@tags = []
@stack = []
@@ -195,6 +222,8 @@ module REXML

# Returns the next event. This is a +PullEvent+ object.
def pull
+ @source.drop_parsed_content
+
pull_event.tap do |event|
@listeners.each do |listener|
listener.receive event
@@ -207,7 +236,12 @@ module REXML
x, @closed = @closed, nil
return [ :end_element, x ]
end
- return [ :end_document ] if empty?
+ if empty?
+ if @document_status == :in_doctype
+ raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
+ end
+ return [ :end_document ]
+ end
return @stack.shift if @stack.size > 0
#STDERR.puts @source.encoding
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
@@ -219,7 +253,14 @@ module REXML
return process_instruction(start_position)
elsif @source.match("<!", true)
if @source.match("--", true)
- return [ :comment, @source.match(/(.*?)-->/um, true)[1] ]
+ md = @source.match(/(.*?)-->/um, true, term: Private::COMMENT_TERM)
+ if md.nil?
+ raise REXML::ParseException.new("Unclosed comment", @source)
+ end
+ if /--|-\z/.match?(md[1])
+ raise REXML::ParseException.new("Malformed comment", @source)
+ end
+ return [ :comment, md[1] ]
elsif @source.match("DOCTYPE", true)
base_error_message = "Malformed DOCTYPE"
unless @source.match(/\s+/um, true)
@@ -231,7 +272,7 @@ module REXML
@source.position = start_position
raise REXML::ParseException.new(message, @source)
end
- @nsstack.unshift(curr_ns=Set.new)
+ @nsstack.unshift(Set.new)
name = parse_name(base_error_message)
if @source.match(/\s*\[/um, true)
id = [nil, nil, nil]
@@ -279,7 +320,7 @@ module REXML
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
return [ :elementdecl, "<!ELEMENT" + md[1] ]
elsif @source.match("ENTITY", true)
- match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
+ match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true, term: Private::ENTITY_TERM).captures.compact]
ref = false
if match[1] == '%'
ref = true
@@ -305,13 +346,13 @@ module REXML
match << '%' if ref
return match
elsif @source.match("ATTLIST", true)
- md = @source.match(ATTLISTDECL_END, true)
+ md = @source.match(Private::ATTLISTDECL_END, true)
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
element = md[1]
contents = md[0]

pairs = {}
- values = md[0].scan( ATTDEF_RE )
+ values = md[0].strip.scan( ATTDEF_RE )
values.each do |attdef|
unless attdef[3] == "#IMPLIED"
attdef.compact!
@@ -344,19 +385,22 @@ module REXML
raise REXML::ParseException.new(message, @source)
end
return [:notationdecl, name, *id]
- elsif md = @source.match(/--(.*?)-->/um, true)
+ elsif md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
case md[1]
when /--/, /-\z/
raise REXML::ParseException.new("Malformed comment", @source)
end
return [ :comment, md[1] ] if md
end
- elsif match = @source.match(/(%.*?;)\s*/um, true)
+ elsif match = @source.match(/(%.*?;)\s*/um, true, term: Private::DOCTYPE_TERM)
return [ :externalentity, match[1] ]
elsif @source.match(/\]\s*>/um, true)
@document_status = :after_doctype
return [ :end_doctype ]
end
+ if @document_status == :in_doctype
+ raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
+ end
end
if @document_status == :after_doctype
@source.match(/\s*/um, true)
@@ -364,10 +408,14 @@ module REXML
begin
start_position = @source.position
if @source.match("<", true)
+ # :text's read_until may remain only "<" in buffer. In the
+ # case, buffer is empty here. So we need to fill buffer
+ # here explicitly.
+ @source.ensure_buffer
if @source.match("/", true)
@nsstack.shift
last_tag = @tags.pop
- md = @source.match(CLOSE_PATTERN, true)
+ md = @source.match(Private::CLOSE_PATTERN, true)
if md and !last_tag
message = "Unexpected top-level end tag (got '#{md[1]}')"
raise REXML::ParseException.new(message, @source)
@@ -384,16 +432,15 @@ module REXML
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
raise REXML::ParseException.new("Malformed node", @source) unless md
if md[0][0] == ?-
- md = @source.match(/--(.*?)-->/um, true)
+ md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)

- case md[1]
- when /--/, /-\z/
+ if md.nil? || /--|-\z/.match?(md[1])
raise REXML::ParseException.new("Malformed comment", @source)
end

- return [ :comment, md[1] ] if md
+ return [ :comment, md[1] ]
else
- md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
+ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true, term: Private::CDATA_TERM)
return [ :cdata, md[1] ] if md
end
raise REXML::ParseException.new( "Declarations can only occur "+
@@ -402,19 +449,19 @@ module REXML
return process_instruction(start_position)
else
# Get the next tag
- md = @source.match(TAG_PATTERN, true)
+ md = @source.match(Private::TAG_PATTERN, true)
unless md
@source.position = start_position
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
tag = md[1]
@document_status = :in_element
- prefixes = Set.new
- prefixes << md[2] if md[2]
+ @prefixes.clear
+ @prefixes << md[2] if md[2]
@nsstack.unshift(curr_ns=Set.new)
- attributes, closed = parse_attributes(prefixes, curr_ns)
+ attributes, closed = parse_attributes(@prefixes, curr_ns)
# Verify that all of the prefixes have been defined
- for prefix in prefixes
+ for prefix in @prefixes
unless @nsstack.find{|k| k.member?(prefix)}
raise UndefinedNamespaceException.new(prefix,@source,self)
end
@@ -424,13 +471,25 @@ module REXML
@closed = tag
@nsstack.shift
else
+ if @tags.empty? and @have_root
+ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
+ end
@tags.push( tag )
end
+ @have_root = true
return [ :start_element, tag, attributes ]
end
else
- md = @source.match(/([^<]*)/um, true)
- text = md[1]
+ text = @source.read_until("<")
+ if text.chomp!("<")
+ @source.position -= "<".bytesize
+ end
+ if @tags.empty? and @have_root
+ unless /\A\s*\z/.match?(text)
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
+ end
+ return pull_event
+ end
return [ :text, text ]
end
rescue REXML::UndefinedNamespaceException
@@ -475,10 +534,14 @@ module REXML

# Unescapes all possible entities
def unnormalize( string, entities=nil, filter=nil )
- rv = string.gsub( /\r\n?/, "\n" )
+ if string.include?("\r")
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
+ else
+ rv = string.dup
+ end
matches = rv.scan( REFERENCE_RE )
return rv if matches.size == 0
- rv.gsub!( /&#((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
+ rv.gsub!( Private::CHARACTER_REFERENCES ) {
m=$1
if m.start_with?("x")
code_point = Integer(m[1..-1], 16)
@@ -494,7 +557,7 @@ module REXML
unless filter and filter.include?(entity_reference)
entity_value = entity( entity_reference, entities )
if entity_value
- re = /&#{entity_reference};/
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
rv.gsub!( re, entity_value )
sum += rv.bytesize
if sum > Security.entity_expansion_text_limit
@@ -506,7 +569,7 @@ module REXML
end
end
end
- rv.gsub!( /&amp;/, '&' )
+ rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
end
rv
end
@@ -527,7 +590,7 @@ module REXML
end

def parse_name(base_error_message)
- md = @source.match(NAME_PATTERN, true)
+ md = @source.match(Private::NAME_PATTERN, true)
unless md
if @source.match(/\s*\S/um)
message = "#{base_error_message}: invalid name"
@@ -606,13 +669,16 @@ module REXML
end

def process_instruction(start_position)
- match_data = @source.match(INSTRUCTION_END, true)
+ match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM)
unless match_data
message = "Invalid processing instruction node"
@source.position = start_position
raise REXML::ParseException.new(message, @source)
end
- if @document_status.nil? and match_data[1] == "xml"
+ if match_data[1] == "xml"
+ if @document_status
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
+ end
content = match_data[2]
version = VERSION.match(content)
version = version[1] unless version.nil?
@@ -654,8 +720,10 @@ module REXML
raise REXML::ParseException.new(message, @source)
end
quote = match[1]
+ start_position = @source.position
value = @source.read_until(quote)
unless value.chomp!(quote)
+ @source.position = start_position
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
raise REXML::ParseException.new(message, @source)
end
--
2.34.1

6 changes: 5 additions & 1 deletion SPECS/rubygem-rexml/rubygem-rexml.spec
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Summary: REXML is an XML toolkit for Ruby
Name: rubygem-%{gem_name}
Version: 3.2.7
Release: 3%{?dist}
Release: 4%{?dist}
License: BSD
Vendor: Microsoft Corporation
Distribution: Mariner
Expand All @@ -12,6 +12,7 @@ URL: https://github.com/ruby/rexml
Source0: https://github.com/ruby/rexml/archive/refs/tags/v%{version}.tar.gz#/%{gem_name}-%{version}.tar.gz
Patch0: CVE-2024-41946.patch
Patch1: CVE-2024-49761.patch
Patch2: CVE-2024-39908.patch
BuildRequires: git
BuildRequires: ruby
Requires: ruby(release)
Expand All @@ -36,6 +37,9 @@ gem install -V --local --force --install-dir %{buildroot}/%{gemdir} %{gem_name}-
%{gemdir}

%changelog
* Tue Feb 18 2025 Kevin Lockwood <v-klockwood@microsoft.com> - 3.2.7-4
- Add patch for CVE-2024-39908

* Mon Nov 04 2024 Saul Paredes <saulparedes@microsoft.com> - 3.2.7-3
- Add patch for CVE-2024-49761

Expand Down