Permalink
Browse files

Escaping multi-byte characters [RT#15294]

Currently multi-byte characters (eg utf8) are currently thrown away.
The patch encodes multi-byte characters, per byte.
  • Loading branch information...
1 parent 78a5cc3 commit 1b62ac3c25c263b428b6aab371d12961862fa795 gerard committed with gisle Apr 2, 2008
Showing with 39 additions and 18 deletions.
  1. +1 −0 MANIFEST
  2. +4 −3 URI.pm
  3. +4 −0 URI/Escape.pm
  4. +4 −4 URI/Split.pm
  5. +3 −3 URI/_generic.pm
  6. +4 −4 URI/_query.pm
  7. +2 −2 URI/file/Base.pm
  8. +1 −1 URI/file/Mac.pm
  9. +1 −1 URI/sip.pm
  10. +15 −0 t/utf8.t
View
@@ -91,4 +91,5 @@ t/storable.t
t/storable-test.pl
t/urn-isbn.t
t/urn-oid.t
+t/utf8.t
uri-test
View
@@ -73,7 +73,8 @@ sub _init
{
my $class = shift;
my($str, $scheme) = @_;
- $str =~ s/([^$uric\#])/$URI::Escape::escapes{$1}/go;
+ # find all funny characters and encode the bytes.
+ $str =~ s*([^$uric\#])* URI::Escape::escape_char($1) *ego;
$str = "$scheme:$str" unless $str =~ /^$scheme_re:/o ||
$class->_no_scheme_ok;
my $self = bless \$str, $class;
@@ -204,7 +205,7 @@ sub opaque
my $new_opaque = shift;
$new_opaque = "" unless defined $new_opaque;
- $new_opaque =~ s/([^$uric])/$URI::Escape::escapes{$1}/go;
+ $new_opaque =~ s/([^$uric])/ URI::Escape::escape_char($1)/ego;
$$self = defined($old_scheme) ? $old_scheme : "";
$$self .= $new_opaque;
@@ -229,7 +230,7 @@ sub fragment
my $new_frag = shift;
if (defined $new_frag) {
- $new_frag =~ s/([^$uric])/$URI::Escape::escapes{$1}/go;
+ $new_frag =~ s/([^$uric])/ URI::Escape::escape_char($1) /ego;
$$self .= "#$new_frag";
}
$old;
View
@@ -211,4 +211,8 @@ sub uri_unescape
$str;
}
+sub escape_char {
+ return join '', @URI::Escape::escapes{$_[0] =~ /(\C)/g};
+}
+
1;
View
@@ -18,20 +18,20 @@ sub uri_join {
my $uri = defined($scheme) ? "$scheme:" : "";
$path = "" unless defined $path;
if (defined $auth) {
- $auth =~ s,([/?\#]),$URI::Escape::escapes{$1},g;
+ $auth =~ s,([/?\#]), URI::Escape::escape_char($1),eg;
$uri .= "//$auth";
$path = "/$path" if length($path) && $path !~ m,^/,;
}
elsif ($path =~ m,^//,) {
$uri .= "//"; # XXX force empty auth
}
unless (length $uri) {
- $path =~ s,(:),$URI::Escape::escapes{$1}, while $path =~ m,^[^:/?\#]+:,;
+ $path =~ s,(:), URI::Escape::escape_char($1),e while $path =~ m,^[^:/?\#]+:,;
}
- $path =~ s,([?\#]),$URI::Escape::escapes{$1},g;
+ $path =~ s,([?\#]), URI::Escape::escape_char($1),eg;
$uri .= $path;
if (defined $query) {
- $query =~ s,(\#),$URI::Escape::escapes{$1},g;
+ $query =~ s,(\#), URI::Escape::escape_char($1),eg;
$uri .= "?$query";
}
$uri .= "#$frag" if defined $frag;
View
@@ -22,7 +22,7 @@ sub authority
$$self = $1;
my $rest = $3;
if (defined $auth) {
- $auth =~ s/([^$ACHAR])/$URI::Escape::escapes{$1}/go;
+ $auth =~ s/([^$ACHAR])/ URI::Escape::escape_char($1)/ego;
$$self .= "//$auth";
}
_check_path($rest, $$self);
@@ -41,7 +41,7 @@ sub path
my $rest = $3;
my $new_path = shift;
$new_path = "" unless defined $new_path;
- $new_path =~ s/([^$PCHAR])/$URI::Escape::escapes{$1}/go;
+ $new_path =~ s/([^$PCHAR])/ URI::Escape::escape_char($1)/ego;
_check_path($new_path, $$self);
$$self .= $new_path . $rest;
}
@@ -58,7 +58,7 @@ sub path_query
my $rest = $3;
my $new_path = shift;
$new_path = "" unless defined $new_path;
- $new_path =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go;
+ $new_path =~ s/([^$URI::uric])/ URI::Escape::escape_char($1)/ego;
_check_path($new_path, $$self);
$$self .= $new_path . $rest;
}
View
@@ -13,7 +13,7 @@ sub query
my $q = shift;
$$self = $1;
if (defined $q) {
- $q =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go;
+ $q =~ s/([^$URI::uric])/ URI::Escape::escape_char($1)/ego;
$$self .= "?$q";
}
$$self .= $3;
@@ -40,12 +40,12 @@ sub query_form {
my @query;
while (my($key,$vals) = splice(@new, 0, 2)) {
$key = '' unless defined $key;
- $key =~ s/([;\/?:@&=+,\$\[\]%])/$URI::Escape::escapes{$1}/g;
+ $key =~ s/([;\/?:@&=+,\$\[\]%])/ URI::Escape::escape_char($1)/eg;
$key =~ s/ /+/g;
$vals = [ref($vals) eq "ARRAY" ? @$vals : $vals];
for my $val (@$vals) {
$val = '' unless defined $val;
- $val =~ s/([;\/?:@&=+,\$\[\]%])/$URI::Escape::escapes{$1}/g;
+ $val =~ s/([;\/?:@&=+,\$\[\]%])/ URI::Escape::escape_char($1)/eg;
$val =~ s/ /+/g;
push(@query, "$key=$val");
}
@@ -67,7 +67,7 @@ sub query_keywords
# Try to set query string
my @copy = @_;
@copy = @{$copy[0]} if @copy == 1 && ref($copy[0]) eq "ARRAY";
- for (@copy) { s/([;\/?:@&=+,\$\[\]%])/$URI::Escape::escapes{$1}/g; }
+ for (@copy) { s/([;\/?:@&=+,\$\[\]%])/ URI::Escape::escape_char($1)/eg; }
$self->query(@copy ? join('+', @copy) : undef);
}
return if !defined($old) || !defined(wantarray);
View
@@ -16,7 +16,7 @@ sub new
if (defined $auth) {
$auth =~ s,%,%25,g unless $escaped_auth;
- $auth =~ s,([/?\#]),$URI::Escape::escapes{$1},g;
+ $auth =~ s,([/?\#]), URI::Escape::escape_char($1),eg;
$auth = "//$auth";
if (defined $path) {
$path = "/$path" unless substr($path, 0, 1) eq "/";
@@ -28,7 +28,7 @@ sub new
$auth = "";
}
- $path =~ s,([%;?]),$URI::Escape::escapes{$1},g unless $escaped_path;
+ $path =~ s,([%;?]), URI::Escape::escape_char($1),eg unless $escaped_path;
$path =~ s/\#/%23/g;
my $uri = $auth . $path;
View
@@ -25,7 +25,7 @@ sub _file_extract_path
}
my $isdir = ($path =~ s/:$//);
- $path =~ s,([%/;]),$URI::Escape::escapes{$1},g;
+ $path =~ s,([%/;]), URI::Escape::escape_char($1),eg;
my @path = split(/:/, $path, -1);
for (@path) {
View
@@ -30,7 +30,7 @@ sub authority
$$self = defined($1) ? $1 : "";
my $rest = $3;
if (defined $auth) {
- $auth =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go;
+ $auth =~ s/([^$URI::uric])/ URI::Escape::escape_char($1)/ego;
$$self .= "$auth";
}
$$self .= $rest;
View
@@ -0,0 +1,15 @@
+#!perl
+
+use strict;
+use warnings;
+
+use utf8;
+
+use Test::More 'no_plan';
+use URI;
+
+is(URI->new('http://foobar/mooi€e')->as_string, 'http://foobar/mooi%E2%82%ACe');
+
+my $uri = URI->new('http:');
+$uri->query_form("mooi€e" => "mooi€e");
+is( $uri->query, "mooi%E2%82%ACe=mooi%E2%82%ACe" );

0 comments on commit 1b62ac3

Please sign in to comment.