Permalink
Browse files

Treat arguments as octets with Encode::_utf8_off

  • Loading branch information...
1 parent 19048cd commit 1a9327fbb945f9cda2883f7c507d79bd38169803 @nanto committed May 30, 2011
Showing with 51 additions and 8 deletions.
  1. +10 −4 URI.pm
  2. +6 −0 URI/QueryParam.pm
  3. +10 −2 URI/_generic.pm
  4. +12 −2 URI/_query.pm
  5. +13 −0 URI/_server.pm
View
14 URI.pm
@@ -4,7 +4,8 @@ use strict;
use vars qw($VERSION);
$VERSION = "1.58";
-use vars qw($ABS_REMOTE_LEADING_DOTS $ABS_ALLOW_RELATIVE_SCHEME $DEFAULT_QUERY_FORM_DELIMITER);
+use vars qw($ABS_REMOTE_LEADING_DOTS $ABS_ALLOW_RELATIVE_SCHEME $DEFAULT_QUERY_FORM_DELIMITER $COERCE_OCTETS);
+$COERCE_OCTETS = 1;
my %implements; # mapping from scheme to implementor class
@@ -19,6 +20,7 @@ $uric = quotemeta($reserved) . $unreserved . "%";
$scheme_re = '[a-zA-Z][a-zA-Z0-9.+\-]*';
use Carp ();
+use Encode ();
use URI::Escape ();
use overload ('""' => sub { ${$_[0]} },
@@ -77,10 +79,13 @@ sub _init
{
my $class = shift;
my($str, $scheme) = @_;
+ Encode::_utf8_off($str) if $URI::COERCE_OCTETS;
# find all funny characters and encode the bytes.
$str = $class->_uric_escape($str);
- $str = "$scheme:$str" unless $str =~ /^$scheme_re:/o ||
- $class->_no_scheme_ok;
+ unless ($str =~ /^$scheme_re:/o || $class->_no_scheme_ok) {
+ Encode::_utf8_off($scheme) if $URI::COERCE_OCTETS;
+ $str = "$scheme:$str";
+ }
my $self = bless \$str, $class;
$self;
}
@@ -217,6 +222,7 @@ sub opaque
my $new_opaque = shift;
$new_opaque = "" unless defined $new_opaque;
+ Encode::_utf8_off($new_opaque) if $URI::COERCE_OCTETS;
$new_opaque =~ s/([^$uric])/ URI::Escape::escape_char($1)/ego;
$$self = defined($old_scheme) ? $old_scheme : "";
@@ -242,6 +248,7 @@ sub fragment
my $new_frag = shift;
if (defined $new_frag) {
+ Encode::_utf8_off($new_frag) if $URI::COERCE_OCTETS;
$new_frag =~ s/([^$uric])/ URI::Escape::escape_char($1) /ego;
$$self .= "#$new_frag";
}
@@ -268,7 +275,6 @@ sub as_iri
# doesn't work before Encode 2.39. Wait for a standard release
# to bundle that version.
- require Encode;
my $enc = Encode::find_encoding("UTF-8");
my $u = "";
while (length $str) {
View
@@ -1,6 +1,8 @@
package URI::QueryParam;
use strict;
+use URI ();
+use Encode ();
sub URI::_query::query_param {
my $self = shift;
@@ -19,6 +21,8 @@ sub URI::_query::query_param {
my $key = shift;
my @i;
+ Encode::_utf8_off($key) if $URI::COERCE_OCTETS;
+
for (my $i = 0; $i < @old; $i += 2) {
push(@i, $i) if $old[$i] eq $key;
}
@@ -61,6 +65,8 @@ sub URI::_query::query_param_delete {
my @old = $self->query_form;
my @vals;
+ Encode::_utf8_off($key) if $URI::COERCE_OCTETS;
+
for (my $i = @old - 2; $i >= 0; $i -= 2) {
next if $old[$i] ne $key;
push(@vals, (splice(@old, $i, 2))[1]);
View
@@ -4,6 +4,7 @@ require URI::_query;
@ISA=qw(URI URI::_query);
use strict;
+use Encode ();
use URI::Escape qw(uri_unescape);
use Carp ();
@@ -22,6 +23,7 @@ sub authority
$$self = $1;
my $rest = $3;
if (defined $auth) {
+ Encode::_utf8_off($auth) if $URI::COERCE_OCTETS;
$auth =~ s/([^$ACHAR])/ URI::Escape::escape_char($1)/ego;
$$self .= "//$auth";
}
@@ -41,6 +43,7 @@ sub path
my $rest = $3;
my $new_path = shift;
$new_path = "" unless defined $new_path;
+ Encode::_utf8_off($new_path) if $URI::COERCE_OCTETS;
$new_path =~ s/([^$PCHAR])/ URI::Escape::escape_char($1)/ego;
_check_path($new_path, $$self);
$$self .= $new_path . $rest;
@@ -58,6 +61,7 @@ sub path_query
my $rest = $3;
my $new_path = shift;
$new_path = "" unless defined $new_path;
+ Encode::_utf8_off($new_path) if $URI::COERCE_OCTETS;
$new_path =~ s/([^$URI::uric])/ URI::Escape::escape_char($1)/ego;
_check_path($new_path, $$self);
$$self .= $new_path . $rest;
@@ -96,11 +100,15 @@ sub path_segments
if (ref($_)) {
my @seg = @$_;
$seg[0] =~ s/%/%25/g;
- for (@seg) { s/;/%3B/g; }
+ for (@seg) {
+ Encode::_utf8_off($_) if $URI::COERCE_OCTETS;
+ s/;/%3B/g;
+ }
$_ = join(";", @seg);
}
else {
- s/%/%25/g; s/;/%3B/g;
+ Encode::_utf8_off($_) if $URI::COERCE_OCTETS;
+ s/%/%25/g; s/;/%3B/g;
}
s,/,%2F,g;
}
View
@@ -2,6 +2,7 @@ package URI::_query;
use strict;
use URI ();
+use Encode ();
use URI::Escape qw(uri_unescape);
sub query
@@ -13,6 +14,7 @@ sub query
my $q = shift;
$$self = $1;
if (defined $q) {
+ Encode::_utf8_off($q) if $URI::COERCE_OCTETS;
$q =~ s/([^$URI::uric])/ URI::Escape::escape_char($1)/ego;
$$self .= "?$q";
}
@@ -42,18 +44,23 @@ sub query_form {
my @query;
while (my($key,$vals) = splice(@_, 0, 2)) {
$key = '' unless defined $key;
+ Encode::_utf8_off($key) if $URI::COERCE_OCTETS;
$key =~ s/([;\/?:@&=+,\$\[\]%])/ URI::Escape::escape_char($1)/eg;
$key =~ s/ /+/g;
$vals = [ref($vals) eq "ARRAY" ? @$vals : $vals];
for my $val (@$vals) {
$val = '' unless defined $val;
+ Encode::_utf8_off($val) if $URI::COERCE_OCTETS;
$val =~ s/([;\/?:@&=+,\$\[\]%])/ URI::Escape::escape_char($1)/eg;
$val =~ s/ /+/g;
push(@query, "$key=$val");
}
}
if (@query) {
- unless ($delim) {
+ if ($delim) {
+ Encode::_utf8_off($delim) if $URI::COERCE_OCTETS;
+ }
+ else {
$delim = $1 if $old && $old =~ /([&;])/;
$delim ||= $URI::DEFAULT_QUERY_FORM_DELIMITER || "&";
}
@@ -78,7 +85,10 @@ sub query_keywords
# Try to set query string
my @copy = @_;
@copy = @{$copy[0]} if @copy == 1 && ref($copy[0]) eq "ARRAY";
- for (@copy) { s/([;\/?:@&=+,\$\[\]%])/ URI::Escape::escape_char($1)/eg; }
+ for (@copy) {
+ Encode::_utf8_off($_) if $URI::COERCE_OCTETS;
+ s/([;\/?:@&=+,\$\[\]%])/ URI::Escape::escape_char($1)/eg;
+ }
$self->query(@copy ? join('+', @copy) : undef);
}
return if !defined($old) || !defined(wantarray);
View
@@ -3,6 +3,8 @@ require URI::_generic;
@ISA=qw(URI::_generic);
use strict;
+use URI ();
+use Encode ();
use URI::Escape qw(uri_unescape);
sub _uric_escape {
@@ -20,6 +22,16 @@ sub _uric_escape {
sub _host_escape {
return unless $_[0] =~ /[^URI::uric]/;
+ if ($URI::COERCE_OCTETS) {
+ eval {
+ require URI::_idna;
+ # don't encode invalid UTF-8 octets in Punycode
+ my $host = Encode::decode_utf8($_[0], Encode::FB_CROAK | Encode::LEAVE_SRC);
+ $_[0] = URI::_idna::encode($host);
+ Encode::_utf8_off($_[0]);
+ };
+ return !$@;
+ }
eval {
require URI::_idna;
$_[0] = URI::_idna::encode($_[0]);
@@ -75,6 +87,7 @@ sub host
my $port = ($tmp =~ /(:\d+)$/) ? $1 : "";
my $new = shift;
$new = "" unless defined $new;
+ Encode::_utf8_off($new) if $URI::COERCE_OCTETS;
if (length $new) {
$new =~ s/[@]/%40/g; # protect @
if ($new =~ /^[^:]*:\d*\z/ || $new =~ /]:\d*\z/) {

0 comments on commit 1a9327f

Please sign in to comment.