Skip to content

Commit

Permalink
Bugfix for #102 -- "5.11 breaks HTML::FormatExternal"
Browse files Browse the repository at this point in the history
Test case added.
Skipping attempt to unescape empty authority part.

See: #102

The previous fix checked the result of the regex-match.
However, the regex-match could have avoided the situation
in the first place.

The new regex now asks for a non-zero authority part.

Skip IPv6 handling of schemes that do not have an authority part.

Currently: data, file, ldapi, urn, sqlite, sqlite3

Fix: Fallback to pre 5.11 for specific schemes (i.e. 'mailto:').

Short test cases added for 'mailto:' URIs having
address literals (IPv4 and IPv6).

Modernized t/file.t to use Test::More instead of plain TAP.

In preparation of more future tests.

Tests added to show that domain in file:// is properly escaped.
  • Loading branch information
Perlbotics authored and oalders committed Jul 10, 2022
1 parent 093d956 commit 725fbfb
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 58 deletions.
6 changes: 6 additions & 0 deletions Changes
@@ -1,6 +1,12 @@
Revision history for URI

{{$NEXT}}
- Fix an issue where i.e. 'file:///tmp/###' was not properly escaped.
A non-existing authority part was accidentally processed.
Details: https://github.com/libwww-perl/URI/issues/102
(GH#102) (Perlbotics)
- Reverts to previous behavior (5.10) for 'mailto:' scheme for
escaping square brackets.

5.11 2022-07-04 20:53:38Z
- Fix some typos in URI::file (GH#94) (Olaf Alders)
Expand Down
24 changes: 23 additions & 1 deletion lib/URI.pm
Expand Up @@ -23,6 +23,20 @@ our $uric4user = quotemeta( q{!$'()*,;:._~%-+=%&} ) . "A-Za-z0-9" . ( HAS_RESER

our $scheme_re = '[a-zA-Z][a-zA-Z0-9.+\-]*';

# These schemes don't have an IPv6+ address part.
our $schemes_without_host_part_re = 'data|ldapi|urn|sqlite|sqlite3';

# These schemes can have an IPv6+ authority part:
# file, ftp, gopher, http, https, ldap, ldaps, mms, news, nntp, nntps, pop, rlogin, rtsp, rtspu, rsync, sip, sips, snews,
# telnet, tn3270, ssh, sftp
# (all DB URIs, i.e. cassandra, couch, couchdb, etc.), except 'sqlite:', 'sqlite3:'. Others?
#MAINT: URI has no test coverage for DB schemes
#MAINT: decoupling - perhaps let each class decide itself by defining a member function 'scheme_has_authority_part()'?

#MAINT: 'mailto:' needs special treatment for IPv* addresses / RFC 5321 (4.1.3). Until then: restore all '[', ']'
# These schemes need fallback to previous (<= 5.10) encoding until a specific handler is available.
our $fallback_schemes_re = 'mailto';

use Carp ();
use URI::Escape ();

Expand Down Expand Up @@ -100,8 +114,16 @@ sub _init
sub _fix_uric_escape_for_host_part {
return if HAS_RESERVED_SQUARE_BRACKETS;
return if $_[0] !~ /%/;
return if $_[0] =~ m{^(?:$URI::schemes_without_host_part_re):}os;

# until a scheme specific handler is available, fall back to previous behavior of v5.10 (i.e. 'mailto:')
if ($_[0] =~ m{^(?:$URI::fallback_schemes_re):}os) {
$_[0] =~ s/\%5B/[/gi;
$_[0] =~ s/\%5D/]/gi;
return;
}

if ($_[0] =~ m,^((?:$URI::scheme_re:)?)//([^/?\#]*)(.*)$,os) {
if ($_[0] =~ m{^((?:$URI::scheme_re:)?)//([^/?\#]+)(.*)$}os) {
my $orig = $2;
my ($user, $host) = $orig =~ /^(.*@)?([^@]*)$/;
$user ||= '';
Expand Down
155 changes: 98 additions & 57 deletions t/file.t
Expand Up @@ -3,63 +3,104 @@
use strict;
use warnings;

use Test::More;
use URI::file;

my @tests = (
[ "file", "unix", "win32", "mac" ],
#---------------- ------------ --------------- --------------
[ "file://localhost/foo/bar",
"!/foo/bar", "!\\foo\\bar", "!foo:bar", ],
[ "file:///foo/bar",
"/foo/bar", "\\foo\\bar", "!foo:bar", ],
[ "file:/foo/bar", "!/foo/bar", "!\\foo\\bar", "foo:bar", ],
[ "foo/bar", "foo/bar", "foo\\bar", ":foo:bar",],
[ "file://foo3445x/bar","!//foo3445x/bar", "!\\\\foo3445x\\bar", "!foo3445x:bar"],
[ "file://a:/", "!//a:/", "!A:\\", undef],
[ "file:///A:/", "/A:/", "A:\\", undef],
[ "file:///", "/", "\\", undef],
[ ".", ".", ".", ":"],
[ "..", "..", "..", "::"],
[ "%2E", "!.", "!.", ":."],
[ "../%2E%2E", "!../..", "!..\\..", "::.."],
);

my @os = @{shift @tests};
shift @os; # file

my $num = @tests;
print "1..$num\n";

my $testno = 1;

for my $t (@tests) {
my @t = @$t;
my $file = shift @t;
my $err;

my $u = URI->new($file, "file");
my $i = 0;
for my $os (@os) {
my $f = $u->file($os);
my $expect = $t[$i];
$f = "<undef>" unless defined $f;
$expect = "<undef>" unless defined $expect;
my $loose;
$loose++ if $expect =~ s/^!//;
if ($expect ne $f) {
print "URI->new('$file', 'file')->file('$os') ne $expect, but $f\n";
$err++;
}
if (defined($t[$i]) && !$loose) {
my $u2 = URI::file->new($t[$i], $os);
unless ($u2->as_string eq $file) {
print "URI::file->new('$t[$i]', '$os') ne $file, but $u2\n";
$err++;
}
}
$i++;
}
print "not " if $err;
print "ok $testno\n";
$testno++;

subtest 'OS related tests (unix, win32, mac)' => sub {

my @tests = (
["file", "unix", "win32", "mac"],

#---------------- ------------ --------------- --------------
["file://localhost/foo/bar", "!/foo/bar", "!\\foo\\bar", "!foo:bar",],
["file:///foo/bar", "/foo/bar", "\\foo\\bar", "!foo:bar",],
["file:/foo/bar", "!/foo/bar", "!\\foo\\bar", "foo:bar",],
["foo/bar", "foo/bar", "foo\\bar", ":foo:bar",],
[
"file://foo3445x/bar", "!//foo3445x/bar",
"!\\\\foo3445x\\bar", "!foo3445x:bar"
],
["file://a:/", "!//a:/", "!A:\\", undef],
["file:///A:/", "/A:/", "A:\\", undef],
["file:///", "/", "\\", undef],
[".", ".", ".", ":"],
["..", "..", "..", "::"],
["%2E", "!.", "!.", ":."],
["../%2E%2E", "!../..", "!..\\..", "::.."],
);

my @os = @{shift @tests};
shift @os; # file

for my $t (@tests) {
my @t = @$t;
my $file = shift @t;
my $u = URI->new($file, "file");
my $i = 0;

for my $os (@os) {
my $f = $u->file($os);
my $expect = $t[$i];
$f = "<undef>" unless defined $f;
$expect = "<undef>" unless defined $expect;
my $loose;
$loose++ if $expect =~ s/^!//;

is($f, $expect) or diag "URI->new('$file', 'file')->file('$os')";

if (defined($t[$i]) && !$loose) {
my $u2 = URI::file->new($t[$i], $os);
is($u2->as_string, $file)
or diag "URI::file->new('$t[$i]', '$os')";
}

$i++;
}
}

};


SKIP: {
skip "No pre 5.11 regression tests yet.", 1
if URI::HAS_RESERVED_SQUARE_BRACKETS;

subtest "Including Domains" => sub {

is(
URI->new('file://example.com/tmp/file.part[1]'),
'file://example.com/tmp/file.part%5B1%5D'
);
is(
URI->new('file://127.0.0.1/tmp/file.part[2]'),
'file://127.0.0.1/tmp/file.part%5B2%5D'
);
is(
URI->new('file://localhost/tmp/file.part[3]'),
'file://localhost/tmp/file.part%5B3%5D'
);
is(
URI->new('file://[1:2:3::beef]/tmp/file.part[4]'),
'file://[1:2:3::beef]/tmp/file.part%5B4%5D'
);
is(
URI->new('file:///[1:2:3::1ce]/tmp/file.part[5]'),
'file:///%5B1:2:3::1ce%5D/tmp/file.part%5B5%5D'
);

};

}


subtest "Regression Tests" => sub {

# Regression test for https://github.com/libwww-perl/URI/issues/102
my $with_hashes = URI::file->new_abs("/tmp/###");
is($with_hashes, 'file:///tmp/%23%23%23', "issue GH#102");

};


done_testing;
12 changes: 12 additions & 0 deletions t/mailto.t
Expand Up @@ -55,4 +55,16 @@ TODO: {
is $u, 'mailto:%22foo%20bar+baz%22@example.com', '... and stringification works';
}

# RFC 5321 (4.1.3) - Address Literals

# IPv4
$u = URI->new('mailto:user@[127.0.0.1]');
is $u->to, 'user@[127.0.0.1]', 'IPv4 host name';
is $u, 'mailto:user@[127.0.0.1]', '... and stringification works';

# IPv6
$u = URI->new('mailto:user@[IPv6:fe80::e828:209d:20e:c0ae]');
is $u->to, 'user@[IPv6:fe80::e828:209d:20e:c0ae]', 'IPv4 host name';
is $u, 'mailto:user@[IPv6:fe80::e828:209d:20e:c0ae]', '... and stringification works';

done_testing;

0 comments on commit 725fbfb

Please sign in to comment.