/
ch-2.p6
executable file
·106 lines (88 loc) · 3.1 KB
/
ch-2.p6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env perl6
use v6;
# We only do the opertions gauranteed to preserve semantics.
grammar URL {
token TOP {
^
<scheme> ':'
[
'//'
[ <userinfo> '@' ]?
<host>
[ ':' <port> ]?
]?
<path>
[ '?' <query> ]?
[ '#' <fragment> ]?
$
}
token scheme { <[A .. Z a .. z ]> <[ A .. Z a .. z 0 .. 9 . + - ]>* }
token userinfo { <username> ':' <password> }
token username { <[ \S ] - [ : ]>+ }
token password { <[ \S ] - [ @ ]>* }
token host { <[ \S ] - [ : ? / \# ]>+ }
token port { <[ \S ] - [ ? / \# ]>+ }
token path { [ '/' <[ \S ] - [ ? \# ]>* ]? }
token query { <[ \S ] - [ \# ]>* }
token fragment { \S* }
}
sub MAIN(Str:D $url) {
my $normalized = normalize($url);
say $normalized;
}
sub normalize($url -->Str:D) {
my $parse = URL.parse($url);
die "Invalid URL" unless $parse.defined;
my $str = $parse<scheme>.lc ~ ":";
$str ~= "//" if $parse<host>:exists;
$str ~= $parse<userinfo> ~ '@' if $parse<userinfo>:exists ;
$str ~= normalize-percent($parse<host>.lc) if $parse<host>:exists;
if $parse<scheme>.lc eq 'http' and $parse<port>:exists {
$str ~= ":" ~ $parse<port> if $parse<port> ≠ 80;
} elsif $parse<scheme>.lc eq 'https' and $parse<port>:exists {
$str ~= ":" ~ $parse<port> if $parse<port> ≠ 443;
} elsif $parse<port>:exists {
$str ~= ":" ~ $parse<port>;
}
$str ~= normalize-percent($parse<path>) if $parse<path>:exists;
$str ~= normalize-percent($parse<query>) if $parse<query>:exists;
$str ~= normalize-percent($parse<fragment>) if $parse<fragment>:exists;
return $str;
}
sub normalize-percent($part) {
my $remainder = $part;
my $output = '';
while $remainder.chars {
my $beginning = S/ ( <-[ % ]>* ) .*$/$0/ with $remainder;
my $end = S/ <-[ % ]>*// with $remainder;
$output ~= $beginning // '';
if ($end // '').chars {
if $end !~~ m:i/^ '%' ( <[ a..f 0..9 ]> ** 2..2 )/ {
die("Invalid percent encoding");
}
my $encoding = :16($end.substr(1, 2));
if 0x41 ≤ $encoding ≤ 0x5a { # Upper case
$output ~= $encoding.chr;
} elsif 0x61 ≤ $encoding ≤ 0x7a { # Lower case
$output ~= $encoding.chr;
} elsif 0x30 ≤ $encoding ≤ 0x39 { # Digits
$output ~= $encoding.chr;
} elsif $encoding == 0x2d { # Hyphen
$output ~= $encoding.chr;
} elsif $encoding == 0x2e { # Period
$output ~= $encoding.chr;
} elsif $encoding == 0x5f { # Underscore
$output ~= $encoding.chr;
} elsif $encoding == 0x7e { # Tilde
$output ~= $encoding.chr;
} else {
$output ~= "%" ~ $encoding.fmt("%02x");
}
$remainder = $end.substr(3);
} else {
# No defined end
$remainder = '';
}
}
return $output;
}