Browse files

added IDNA support

  • Loading branch information...
1 parent 498940a commit 2927843d9592ae11adc4e6b638a6f1329469b17b Sebastian Riedel committed Jan 11, 2010
Showing with 278 additions and 14 deletions.
  1. +5 −4 Changes
  2. +195 −0 lib/Mojo/ByteStream.pm
  3. +2 −0 lib/Mojo/Command/Get.pm
  4. +1 −1 lib/Mojo/Message/Request.pm
  5. +3 −3 lib/Mojo/Transaction/Single.pm
  6. +41 −4 lib/Mojo/URL.pm
  7. +9 −1 t/mojo/bytestream.t
  8. +22 −1 t/mojo/url.t
View
9 Changes
@@ -1,12 +1,9 @@
This file documents the revision history for Perl extension Mojo.
0.999915 2010-01-10 00:00:00
- - Added routes captures to params in Mojolicious.
- - Added charset plugin to Mojolicious. (charsbar)
- - Added native PSGI support.
- - Added simple reverse proxy support with tests.
- Added IPv6 support.
- Added SSL/TLS support.
+ - Added IDNA support.
- Added UNIX domain socket support to daemons.
- Added transparent kqueue and epoll support to daemons and client.
- Added support for listening to multiple locations to the daemons.
@@ -15,6 +12,10 @@ This file documents the revision history for Perl extension Mojo.
mojo daemon --listen http:*:3000,http:*:3001,http:*:3002
mojo daemon --listen http:[::1]:3000
mojo daemon --listen https:*:3001:/foo/server.crt:/foo/server.key
+ - Added routes captures to params in Mojolicious.
+ - Added native PSGI support.
+ - Added charset plugin to Mojolicious. (charsbar)
+ - Added simple reverse proxy support with tests.
- Added simpler way to define default controller and action for a
route.
$r->route('/foo')->to('mycontroller#myaction');
View
195 lib/Mojo/ByteStream.pm
@@ -16,6 +16,18 @@ require Encode;
require MIME::Base64;
require MIME::QuotedPrint;
+# Punycode bootstring parameters
+use constant PUNYCODE_BASE => 36;
+use constant PUNYCODE_TMIN => 1;
+use constant PUNYCODE_TMAX => 26;
+use constant PUNYCODE_SKEW => 38;
+use constant PUNYCODE_DAMP => 700;
+use constant PUNYCODE_INITIAL_BIAS => 72;
+use constant PUNYCODE_INITIAL_N => 128;
+
+# Punycode delimiter
+my $DELIMITER = chr 0x2D;
+
# XHTML 1.0 entities for html_unescape
my %ENTITIES = (
Aacute => 193,
@@ -440,6 +452,160 @@ sub md5_sum {
return $self;
}
+sub punycode_decode {
+ my $self = shift;
+
+ # Character semantics
+ no bytes;
+
+ # Input
+ my $input = $self->{bytestream};
+
+ # Defaults
+ my $n = PUNYCODE_INITIAL_N;
+ my $i = 0;
+ my $bias = PUNYCODE_INITIAL_BIAS;
+ my @output;
+
+ # Delimiter?
+ if ($input =~ s/(.*)$DELIMITER//os) { push @output, split //, $1 }
+
+ # Decode
+ while (length $input) {
+ my $oldi = $i;
+ my $w = 1;
+
+ # Base to infinity in steps of base
+ for (my $k = PUNYCODE_BASE; 1; $k += PUNYCODE_BASE) {
+
+ # Digit
+ my $digit = ord substr $input, 0, 1, '';
+ $digit =
+ $digit < 0x40 ? $digit + (26 - 0x30) : ($digit & 0x1f) - 1;
+
+ $i += $digit * $w;
+ my $t = $k - $bias;
+ $t =
+ $t < PUNYCODE_TMIN ? PUNYCODE_TMIN
+ : $t > PUNYCODE_TMAX ? PUNYCODE_TMAX
+ : $t;
+
+ # Break
+ last if $digit < $t;
+
+ $w *= (PUNYCODE_BASE - $t);
+ }
+
+ # Bias
+ $bias = _adapt($i - $oldi, @output + 1, $oldi == 0);
+
+ $n += $i / (@output + 1);
+ $i = $i % (@output + 1);
+
+ # Insert
+ splice @output, $i, 0, chr($n);
+
+ # Increment
+ $i++;
+ }
+
+ # Output
+ $self->{bytestream} = join '', @output;
+
+ return $self;
+}
+
+sub punycode_encode {
+ my $self = shift;
+
+ # Character semantics
+ no bytes;
+
+ # Input
+ my $input = $self->{bytestream};
+ my $output = $input;
+ my $length = length $input;
+
+ # Remove non basic characters
+ $output =~ s/[^\x00-\x7f]+//ogs;
+
+ # Non basic characters in input?
+ my $h = my $b = length $output;
+ $output .= $DELIMITER if $b > 0;
+
+ # Split input
+ my @input = map ord, split //, $input;
+ my @chars = sort grep { $_ >= PUNYCODE_INITIAL_N } @input;
+
+ # Defaults
+ my $n = PUNYCODE_INITIAL_N;
+ my $delta = 0;
+ my $bias = PUNYCODE_INITIAL_BIAS;
+
+ # Encode
+ for my $m (@chars) {
+
+ # Basic character
+ next if $m < $n;
+
+ # Delta
+ $delta += ($m - $n) * ($h + 1);
+
+ # Walk all code points in order
+ $n = $m;
+ for (my $i = 0; $i < $length; $i++) {
+ my $c = $input[$i];
+
+ # Basic character?
+ $delta++ if $c < $n;
+
+ # Non basic character
+ if ($c == $n) {
+ my $q = $delta;
+
+ # Base to infinity in steps of base
+ for (my $k = PUNYCODE_BASE; 1; $k += PUNYCODE_BASE) {
+ my $t = $k - $bias;
+ $t =
+ $t < PUNYCODE_TMIN ? PUNYCODE_TMIN
+ : $t > PUNYCODE_TMAX ? PUNYCODE_TMAX
+ : $t;
+
+ # Break
+ last if $q < $t;
+
+ # Code point for digit "t"
+ my $o = $t + (($q - $t) % (PUNYCODE_BASE - $t));
+ $output .= chr $o + ($o < 26 ? 0x61 : 0x30 - 26);
+
+ $q = ($q - $t) / (PUNYCODE_BASE - $t);
+ }
+
+ # Code point for digit "q"
+ $output .= chr $q + ($q < 26 ? 0x61 : 0x30 - 26);
+
+ # Bias
+ $bias = _adapt($delta, $h + 1, $h == $b);
+
+ # Reset delta
+ $delta = 0;
+
+ # Increment
+ $h++;
+ }
+ }
+
+ # Increment
+ $delta++;
+ $n++;
+ }
+
+ # Output
+ $self->{bytestream} = $output;
+
+ return $self;
+}
+
# Old people don't need companionship.
# They need to be isolated and studied so it can be determined what nutrients
# they have that might be extracted for our personal use.
@@ -532,6 +698,25 @@ sub xml_escape {
return $self;
}
+# Punycode helper
+sub _adapt {
+ my ($delta, $numpoints, $firsttime) = @_;
+
+ # Delta
+ $delta = $firsttime ? $delta / PUNYCODE_DAMP : $delta / 2;
+ $delta += $delta / $numpoints;
+
+ my $k = 0;
+ while ($delta > ((PUNYCODE_BASE - PUNYCODE_TMIN) * PUNYCODE_TMAX) / 2) {
+ $delta /= PUNYCODE_BASE - PUNYCODE_TMIN;
+ $k += PUNYCODE_BASE;
+ }
+
+ return $k
+ + ( ((PUNYCODE_BASE - PUNYCODE_TMIN + 1) * $delta)
+ / ($delta + PUNYCODE_SKEW));
+}
+
# Helper for url_sanitize
sub _sanitize {
my $hex = shift;
@@ -589,6 +774,8 @@ Mojo::ByteStream - ByteStream
$stream->url_sanitize;
$stream->url_unescape;
$stream->xml_escape;
+ $stream->punycode_encode;
+ $stream->punycode_decode;
my $size = $stream->size;
@@ -659,6 +846,14 @@ the following new ones.
$stream = $stream->md5_sum;
+=head2 C<punycode_decode>
+
+ $stream = $stream->punycode_decode;
+
+=head2 C<punycode_encode>
+
+ $stream = $stream->punycode_encode;
+
=head2 C<qp_decode>
$stream = $stream->qp_decode;
View
2 lib/Mojo/Command/Get.pm
@@ -7,6 +7,7 @@ use warnings;
use base 'Mojo::Command';
+use Mojo::ByteStream 'b';
use Mojo::Client;
use Mojo::Transaction::Single;
@@ -34,6 +35,7 @@ sub run {
# URL
my $url = shift;
die $self->usage unless $url;
+ $url = b($url)->decode('UTF-8')->to_string;
# Client
my $client = Mojo::Client->new;
View
2 lib/Mojo/Message/Request.pm
@@ -50,7 +50,7 @@ sub fix_headers {
# Host header is required in HTTP 1.1 requests
if ($self->at_least_version('1.1')) {
- my $host = $self->url->host;
+ my $host = $self->url->ihost;
my $port = $self->url->port;
$host .= ":$port" if $port;
$self->headers->host($host) unless $self->headers->host;
View
6 lib/Mojo/Transaction/Single.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2008-2010, Sebastian Riedel.
+# Copyright (C) 2008-2009, Sebastian Riedel.
package Mojo::Transaction::Single;
@@ -88,13 +88,13 @@ sub client_info {
my $self = shift;
my $scheme = $self->req->url->scheme;
- my $host = $self->req->url->host;
+ my $host = $self->req->url->ihost;
my $port = $self->req->url->port;
# Proxy
if (my $proxy = $self->req->proxy) {
$scheme = $proxy->scheme;
- $host = $proxy->host;
+ $host = $proxy->ihost;
$port = $proxy->port;
}
View
45 lib/Mojo/URL.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2008-2010, Sebastian Riedel.
+# Copyright (C) 2008-2009, Sebastian Riedel.
package Mojo::URL;
@@ -38,21 +38,23 @@ sub authority {
my $host = $authority;
# Userinfo
- if ($authority =~ /^([^\@]*)\@(.*)$/) {
+ if ($authority =~ /^([^\@]+)\@(.+)$/) {
$userinfo = $1;
$host = $2;
}
# Port
my $port = undef;
- if ($host =~ /^(.*)\:(\d*)$/) {
+ if ($host =~ /^(.+)\:(\d+)$/) {
$host = $1;
$port = $2;
}
$self->userinfo(
$userinfo ? b($userinfo)->url_unescape->to_string : undef);
- $self->host($host ? b($host)->url_unescape->to_string : undef);
+ $host
+ ? $self->ihost(b($host)->url_unescape->to_string)
+ : $self->host(undef);
$self->port($port);
return $self;
@@ -91,6 +93,36 @@ sub clone {
return $clone;
}
+sub ihost {
+ my ($self, $host) = @_;
+
+ # Set
+ if (defined $host) {
+
+ # Decode parts
+ my @decoded;
+ for my $part (split /\./, $_[1]) {
+ if ($part =~ /^xn--(.+)$/) {
+ $part = b($1)->punycode_decode->to_string;
+ }
+ push @decoded, $part;
+ }
+ $self->host(join '.', @decoded);
+
+ return $self;
+ }
+
+ # Encode parts
+ my @encoded;
+ for my $part (split /\./, $self->host || '') {
+ $part = 'xn--' . b($part)->punycode_encode->to_string
+ if $part =~ /[^\x00-\x7f]/;
+ push @encoded, $part;
+ }
+
+ return join '.', @encoded;
+}
+
sub is_abs {
my $self = shift;
return 1 if $self->scheme && $self->authority;
@@ -315,6 +347,11 @@ following new ones.
my $url2 = $url->clone;
+=head2 C<ihost>
+
+ my $ihost = $url->ihost;
+ $url = $url->ihost('xn--bcher-kva.ch');
+
=head2 C<is_abs>
my $is_abs = $url->is_abs;
View
10 t/mojo/bytestream.t
@@ -10,7 +10,7 @@ use utf8;
# Homer, we're going to ask you a few simple yes or no questions.
# Do you understand?
# Yes. *lie dectector blows up*
-use Test::More tests => 33;
+use Test::More tests => 35;
use_ok('Mojo::ByteStream', 'b');
@@ -136,3 +136,11 @@ is("$stream", 'привет&lt;foo&gt;');
# Decode invalid utf8
$stream = b("\x{1000}")->decode('UTF-8');
is($stream->to_string, undef);
+
+# punycode_encode
+$stream = b('bücher')->punycode_encode;
+is("$stream", 'bcher-kva');
+
+# punycode_decode
+$stream = b('bcher-kva')->punycode_decode;
+is("$stream", 'bücher');
View
23 t/mojo/url.t
@@ -5,7 +5,9 @@
use strict;
use warnings;
-use Test::More tests => 81;
+use utf8;
+
+use Test::More tests => 94;
# I don't want you driving around in a car you built yourself.
# You can sit there complaining, or you can knit me some seat belts.
@@ -148,3 +150,22 @@ is($url->host, '[::1]');
is($url->port, 3000);
is($url->path, '/');
is("$url", 'http://[::1]:3000/');
+
+# IDNA
+$url = Mojo::URL->new('http://bücher.ch:3000/foo');
+is($url->is_abs, 1);
+is($url->scheme, 'http');
+is($url->host, 'bücher.ch');
+is($url->ihost, 'xn--bcher-kva.ch');
+is($url->port, 3000);
+is($url->path, '/foo');
+is("$url", 'http://bücher.ch:3000/foo');
+
+# IDNA (snowman)
+$url = Mojo::URL->new('http://☃.net/');
+is($url->is_abs, 1);
+is($url->scheme, 'http');
+is($url->host, '☃.net');
+is($url->ihost, 'xn--n3h.net');
+is($url->path, '/');
+is("$url", 'http://☃.net/');

0 comments on commit 2927843

Please sign in to comment.