-
Notifications
You must be signed in to change notification settings - Fork 135
/
charset.t
143 lines (111 loc) · 4.87 KB
/
charset.t
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
use v6;
use Test;
=begin pod
This file was derived from the perl5 CPAN module Perl6::Rules,
version 0.3 (12 Apr 2004), file t/charset.t.
It has (hopefully) been, and should continue to be, updated to
be valid perl6.
=end pod
plan 55;
# Broken:
# L<S05/Extensible metasyntax (C<< <...> >>)/"A leading [ ">
ok("zyxaxyz" ~~ m/(<[aeiou]>)/, 'Simple set');
is($0, 'a', 'Simple set capture');
# L<S05/Extensible metasyntax (C<< <...> >>)/"A leading - indicates">
ok(!( "a" ~~ m/<-[aeiou]>/ ), 'Simple neg set failure');
ok("f" ~~ m/(<-[aeiou]>)/, 'Simple neg set match');
is($0, 'f', 'Simple neg set capture');
# RT #126746
{
ok "a" ~~ m/<![a]>/, "zerowidth negated character class can match at end of string";
}
# L<S05/Extensible metasyntax (C<< <...> >>)/Character classes can be combined>
ok(!( "a" ~~ m/(<[a..z]-[aeiou]>)/ ), 'Difference set failure');
ok("y" ~~ m/(<[a..z]-[aeiou]>)/, 'Difference set match');
is($0, 'y', 'Difference set capture');
# RT #115802
ok( "abc" ~~ m/<[\w]-[\n]>/, 'Difference set match 1');
ok(!("abc" ~~ m/<[\w]-[\N]>/), 'Difference set match 2');
is(("abc123" ~~ m/<[\w]-[a\d]>+/), 'bc', 'Difference set match 3');
is(("abc123" ~~ m/<[\w]-[1\D]>+/), '23', 'Difference set match 4');
#?niecza todo 'gives c123?'
is(("abc123def" ~~ m/<[\w]-[\D\n]>+/), '123', 'Difference set match 5');
is(("abc123def" ~~ m/<[\w]-[\D\h]>+/), '123', 'Difference set match 6');
is(("abc" ~~ /<-["\\\t\n]>+/), 'abc', 'Difference set match 7');
ok(!( "a" ~~ m/(<+alpha-[aeiou]>)/ ), 'Named difference set failure');
ok("y" ~~ m/(<+alpha-[aeiou]>)/, 'Named difference set match');
is($0, 'y', 'Named difference set capture');
ok(!( "y" ~~ m/(<[a..z]-[aeiou]-[y]>)/ ), 'Multi-difference set failure');
ok("f" ~~ m/(<[a..z]-[aeiou]-[y]>)/, 'Multi-difference set match');
is($0, 'f', 'Multi-difference set capture');
ok(']' ~~ m/(<[\]]>)/, 'quoted close LSB match');
is($0, ']', 'quoted close LSB capture');
ok('[' ~~ m/(<[\[]>)/, 'quoted open LSB match');
is($0, '[', 'quoted open LSB capture');
ok('{' ~~ m/(<[\{]>)/, 'quoted open LCB match');
is($0, '{', 'quoted open LCB capture');
ok('}' ~~ m/(<[\}]>)/, 'quoted close LCB match');
is($0, '}', 'quoted close LCB capture');
# RT #67124
eval-lives-ok( '"foo" ~~ /<[f] #`[comment] + [o]>/',
'comment embedded in charset can be parsed' );
ok( "foo" ~~ /<[f] #`[comment] + [o]>/, 'comment embedded in charset works' );
# RT #67122
ok "\x[FFEF]" ~~ /<[\x0..\xFFEF]>/, 'large \\x char spec';
#?niecza todo
throws-like "'RT #71702' ~~ /<[d..b]>? RT/", Exception,
'reverse range in charset is lethal (RT #71702)';
throws-like "'x' ~~ /<[abc] [def]>? RT/", Exception,
'missing + or - is fatal 1';
throws-like "'x' ~~ /<:Kata :Hira]>? RT/", Exception,
'missing + or - is fatal 2';
throws-like "'x' ~~ /<+alpha digit]>? RT/", Exception,
'missing + or - is fatal 3';
# RT #64220
ok 'b' ~~ /<[. .. b]>/, 'weird char class matches at least its end point';
# RT #69682
{
try { EVAL "/<[a-z]>/"; }
ok ~$! ~~ / 'Unsupported use of - as character range; in Perl 6 please use ..'/,
"STD error message for - as character range";
}
ok 'ab' ~~ /^(.*) b/,
'Quantifiers in capture groups work (RT #100650)';
# RT #74012
# backslashed characters in char classes
ok '[]\\' ~~ /^ <[ \[ .. \] ]>+ $ /, 'backslashed chars in char classes';
nok '^' ~~ / <[ \[ .. \] ]> /, '... does not match outside its range';
# RT #89470
{
nok '' ~~ / <[a..z]-[x]> /, 'Can match empty string against char class';
nok 'x' ~~ / <[a..z]-[x]> /, 'char excluded from class';
ok 'z' ~~ / <[a..z]-[x]> /, '... but others are fine';
}
# RT #120511
{
is "\r\na" ~~ /<?[\n]>"\r\na"/, "\r\na",
'look-ahead with windows newline does not advance cursor position';
}
{
grammar G { token TOP { <+ kebab-case> }; token kebab-case { 'a' } };
is G.subparse('aaa').Str, 'a', "kebab-case allowed in character classes";
dies-ok { 'a' ~~ / <+xdigit-digit> / }, "accidental kebabs disallowed";
}
#?rakudo.jvm 2 todo 'ignorecase and character ranges RT #125753'
dies-ok { EVAL '/<[Ḍ̇..\x2FFF]>/' }, 'Cannot use NFG synthetic as range endpoint';
# RT #125753
is "Aa1" ~~ /:i <[a..z0..9]>+/, "Aa1", ':i with cclass with multiple ranges works';
#?rakudo.jvm 3 skip '"ordbaseat NYI", ignorecase and character ranges RT #125753'
is '%E3%81%82' ~~ /:ignorecase ['%' (<[a..f0..9]>|x)**2]+/, '%E3%81%82',
':ignorecase in combination with charclass ranges works with LTM';
is 'Ä' ~~ /:ignoremark (<[A..F]>|x)/, 'Ä',
':ignoremark in combination with charclass ranges works with LTM';
is 'Ä' ~~ /:ignoremark :ignorecase (<[a..f]>|x)/, 'Ä',
':ignoremark :ignorecase in combination with charclass ranges works with LTM';
{
is ("\0\0\0" ~~ /<[\0]>+/).Str, "\0\0\0", '\0 works inside character classes and matches null';
}
# RT #128270
#?rakudo.jvm skip 'ordbaseat NYI'
ok "a" ~~ m:g:ignoremark/<[á]>/, ':g, :ignoremark, and cclass interaction ok';
# vim: ft=perl6