Permalink
Browse files

Make as_iri leave escapes not forming valid UTF-8 sequences

  • Loading branch information...
1 parent 280962b commit b1a770688af71aab31f77b935b0a5019726caa75 @gisle gisle committed Nov 7, 2009
Showing with 21 additions and 2 deletions.
  1. +17 −1 URI.pm
  2. +4 −1 t/iri.t
View
18 URI.pm
@@ -270,8 +270,24 @@ sub as_iri
}
}
if ($str =~ s/%([89A-F][0-9A-F])/chr(hex($1))/eg) {
+ # All this crap because the more obvious:
+ #
+ # Encode::decode("UTF-8", $str, sub { sprintf "%%%02X", shift })
+ #
+ # doesn't work. Apparently passing a sub as CHECK only works
+ # for 'ascii' and similar direct encodings.
+
require Encode;
- return Encode::decode("UTF-8", $str);
+ my $enc = Encode::find_encoding("UTF-8");
+ my $u = "";
+ while (length $str) {
+ $u .= $enc->decode($str, Encode::FB_QUIET());
+ if (length $str) {
+ # escape next char
+ $u .= URI::Escape::escape_char(substr($str, 0, 1, ""));
+ }
+ }
+ $str = $u;
}
return $str;
}
View
@@ -2,7 +2,7 @@
use utf8;
use strict;
-use Test::More tests => 10;
+use Test::More tests => 11;
use URI;
@@ -18,6 +18,9 @@ $u = URI->new("http://example.com/Bücher");
is $u, "http://example.com/B%C3%BCcher";
is $u->as_iri, "http://example.com/Bücher";
+$u = URI->new("http://example.com/B%FCcher"); # latin1 encoded stuff
+is $u->as_iri, "http://example.com/B%FCcher"; # ...should not be decoded
+
$u = URI->new("http://➡.ws/");
is $u, "http://xn--hgi.ws/";
is $u->host, "xn--hgi.ws";

0 comments on commit b1a7706

Please sign in to comment.