From 065bb913d07173d5a6a98a54b04fb976f69e38a4 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Fri, 3 May 2024 11:50:27 +0900 Subject: [PATCH] Fixed a problem in which ParseException error messages could not be retrieved if the error content contained Unicode characters. ## Why? If the xml tag contains Unicode characters when the error occurs, an `Encoding::CompatibilityError: incompatible character encodings: UTF-8 and ASCII-8BIT` exception is raised, ParseException error message cannot be retrieved. See: https://github.com/ruby/rexml/issues/29 --- lib/rexml/parseexception.rb | 1 + test/parse/test_element.rb | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/lib/rexml/parseexception.rb b/lib/rexml/parseexception.rb index 7b16cd1a..e57d05fd 100644 --- a/lib/rexml/parseexception.rb +++ b/lib/rexml/parseexception.rb @@ -29,6 +29,7 @@ def to_s err << "\nLine: #{line}\n" err << "Position: #{position}\n" err << "Last 80 unconsumed characters:\n" + err.force_encoding("ASCII-8BIT") err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ') end diff --git a/test/parse/test_element.rb b/test/parse/test_element.rb index 987214f3..37517123 100644 --- a/test/parse/test_element.rb +++ b/test/parse/test_element.rb @@ -47,6 +47,19 @@ def test_empty_namespace_attribute_name DETAIL end + def test_empty_namespace_attribute_name_with_utf8_character + exception = assert_raise(REXML::ParseException) do + parse("") + end + assert_equal(<<-DETAIL.chomp.force_encoding("ASCII-8BIT"), exception.to_s) +Invalid attribute name: <:\xE2\x80\x8B> +Line: 1 +Position: 8 +Last 80 unconsumed characters: +:\xE2\x80\x8B> + DETAIL + end + def test_garbage_less_than_before_root_element_at_line_start exception = assert_raise(REXML::ParseException) do parse("<\n")