From 3843fa608d8f0f636cb05361e58d48d0ba4866d8 Mon Sep 17 00:00:00 2001 From: Jacques Distler Date: Fri, 18 Feb 2011 12:39:19 -0600 Subject: [PATCH] Nasty! How did a well-formedness bug creep into the code? I *swear* this used to work. --- lib/instiki_stringsupport.rb | 18 +++++------------- test/unit/sanitizer_test.rb | 6 +++--- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/lib/instiki_stringsupport.rb b/lib/instiki_stringsupport.rb index 5ea52a558..0c25bcd32 100644 --- a/lib/instiki_stringsupport.rb +++ b/lib/instiki_stringsupport.rb @@ -2329,24 +2329,16 @@ def unescapeHTML when /\Aquot\z/ni then '"' when /\Aapos\z/ni then "'" when /\A#0*(\d+)\z/n then - if Integer($1) < 256 - Integer($1).chr + if Integer($1) < 1114111 + [Integer($1)].pack("U") else - if Integer($1) < 1114111 - [Integer($1)].pack("U") - else - "&##{$1};" - end + "&##{$1};" end when /\A#x([0-9a-f]+)\z/ni then - if $1.hex < 256 + if $1.hex < 1114111 [$1.hex].pack("U") else - if $1.hex < 1114111 - [$1.hex].pack("U") - else - "&#x#{$1};" - end + "&#x#{$1};" end else "&#{match};" diff --git a/test/unit/sanitizer_test.rb b/test/unit/sanitizer_test.rb index 7579674aa..a5cea1bf0 100644 --- a/test/unit/sanitizer_test.rb +++ b/test/unit/sanitizer_test.rb @@ -23,9 +23,9 @@ def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput) end def test_sanitize_named_entities - input = '

Greek &phis; φ, double-struck 𝔸, numeric 𝔸 ⁗, uppercase ™ <

' - output = "

Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 <

" - output2 = "

Greek \317\225 \317\206, double-struck \360\235\224\270, numeric 𝔸 ⁗, uppercase \342\204\242 <

" + input = '

Greek &phis; φ, double-struck 𝔸, numeric     𝔸 ⁗, uppercase ™ <

' + output = "

Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \302\240 \302\240 \360\235\224\270 \342\201\227, uppercase \342\204\242 <

" + output2 = "

Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \302\240   𝔸 ⁗, uppercase \342\204\242 <

" check_sanitization(input, output, output, output) assert_equal(output2, input.to_utf8.as_bytes) end