Skip to content

Commit

Permalink
fix: ensure LinkSanitizer returns utf-8 encoded strings
Browse files Browse the repository at this point in the history
Previously, 2362298 ensured utf-8 encoded values were returned from
SafeListSanitizer, and 49dfc15 continued this practice for
FullSanitizer.

At no point was this behavior added to LinkSanitizer, which is fixed
by this commit.
  • Loading branch information
flavorjones committed May 17, 2023
1 parent 2b0dcb5 commit 714c77d
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
10 changes: 2 additions & 8 deletions lib/rails/html/sanitizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -182,12 +182,6 @@ def serialize(fragment)
properly_encode(fragment, encoding: "UTF-8")
end
end

module SimpleString
def serialize(fragment)
fragment.to_s
end
end
end
end
end
Expand Down Expand Up @@ -242,7 +236,7 @@ class LinkSanitizer < Rails::HTML::Sanitizer
include HTML::Concern::ComposedSanitize
include HTML::Concern::Parser::HTML4
include HTML::Concern::Scrubber::Link
include HTML::Concern::Serializer::SimpleString
include HTML::Concern::Serializer::UTF8Encode
end

# == Rails::HTML4::SafeListSanitizer
Expand Down Expand Up @@ -352,7 +346,7 @@ class LinkSanitizer < Rails::HTML::Sanitizer
include HTML::Concern::ComposedSanitize
include HTML::Concern::Parser::HTML5
include HTML::Concern::Scrubber::Link
include HTML::Concern::Serializer::SimpleString
include HTML::Concern::Serializer::UTF8Encode
end

# == Rails::HTML5::SafeListSanitizer
Expand Down
18 changes: 16 additions & 2 deletions test/sanitizer_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,13 @@ def test_full_sanitize_respect_html_escaping_of_the_given_string
assert_equal "omg &lt;script&gt;BOM&lt;/script&gt;", full_sanitize("omg &lt;script&gt;BOM&lt;/script&gt;")
end

def test_sanitize_ascii_8bit_string
full_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
assert_equal "hello", sanitized
assert_equal Encoding::UTF_8, sanitized.encoding
end
end

protected
def full_sanitize(input, options = {})
module_under_test::FullSanitizer.new.sanitize(input, options)
Expand Down Expand Up @@ -223,6 +230,13 @@ def test_strip_links_with_linkception
assert_equal "Magic", link_sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic")
end

def test_sanitize_ascii_8bit_string
link_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
assert_equal "<div>hello</div>", sanitized
assert_equal Encoding::UTF_8, sanitized.encoding
end
end

protected
def link_sanitize(input, options = {})
module_under_test::LinkSanitizer.new.sanitize(input, options)
Expand Down Expand Up @@ -671,8 +685,8 @@ def test_x03a_legitimate
end

def test_sanitize_ascii_8bit_string
safe_list_sanitize("<a>hello</a>".encode("ASCII-8BIT")).tap do |sanitized|
assert_equal "<a>hello</a>", sanitized
safe_list_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
assert_equal "<div><a>hello</a></div>", sanitized
assert_equal Encoding::UTF_8, sanitized.encoding
end
end
Expand Down

0 comments on commit 714c77d

Please sign in to comment.