Skip to content
This repository was archived by the owner on Jul 24, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion lib/openid/consumer/html_parse.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,15 @@ def OpenID.unescape_hash(h)


def OpenID.parse_link_attrs(html)
stripped = html.gsub(REMOVED_RE,'')
begin
stripped = html.gsub(REMOVED_RE,'')
rescue ArgumentError
begin
stripped = html.encode('UTF-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '').gsub(REMOVED_RE,'')
rescue Encoding::UndefinedConversionError #needed for a problem in JRuby where it can't handle the conversion
stripped = html.encode('UTF-8', 'ASCII', :invalid => :replace, :undef => :replace, :replace => '').gsub(REMOVED_RE,'')
end
end
parser = HTMLTokenizer.new(stripped)

links = []
Expand Down
12 changes: 12 additions & 0 deletions test/test_linkparse.rb
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def test_linkparse
assert(false, "datafile parsing error: bad header #{h}")
end
}
html = html.force_encoding('UTF-8') if html.respond_to? :force_encoding
links = OpenID::parse_link_attrs(html)

found = links.dup
Expand All @@ -97,5 +98,16 @@ def test_linkparse
end
}
assert_equal(numtests, testnum, "Number of tests")

# test handling of invalid UTF-8 byte sequences
if "".respond_to? :force_encoding
html = "<html><body>hello joel\255</body></html>".force_encoding('UTF-8')
else
html = "<html><body>hello joel\255</body></html>"
end
assert_nothing_raised do
OpenID::parse_link_attrs(html)
end

end
end