/
ignoremark.t
81 lines (70 loc) · 3.94 KB
/
ignoremark.t
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
use v6;
use Test;
plan 43;
=begin description
Testing the C<:m> or C<:ignoremark> regex modifier - more tests are always welcome
TODO: need some tests for chars with multiple markings.
=end description
#L<S05/Modifiers/"The :m (or :ignoremark) modifier">
my @a =
('ä', 'a', '', True),
('a', 'ä', '', True),
('à', 'a', '', True),
('á', 'a', '', True),
('â', 'a', '', True),
('å', 'a', '', True),
('ƌ', 'd', 'TOPBAR is not a mark', False),
( 'å', 'ä', 'Both pattern and string may contain accents', True),
('a', 'ä', 'Pattern may contain accents', True)
;
sub get-string ($Haystack, $needle, $expected, $str) {
“so('$Haystack' ~~ /:m $needle /), $expected — Ignoremark: $Haystack { $expected ?? ‘matches’ !! “doesn't match” } {$needle}{ $str ?? " — $str" !! "" }”;
}
for @a -> $i {
my $str = “so('$i[0]' ~~ m:m/ {$i[1]} /), $i[3] — Ignoremark: $i[0] { $i[3] ?? ‘matches’ !! “doesn't match” } {$i[1]}{ $i[2] ?? " — $i[2]" !! "" }”;
my $Haystack = $i[0];
my $needle = $i[1];
my $expected = $i[3];
is( so($Haystack ~~ /:m $needle /), $expected, get-string($Haystack, $needle, $expected, $i[2]));
next if $expected == False;
# Make sure uppercasing causes it not to match
$Haystack = $Haystack.uc;
is( so($Haystack ~~ /:m $needle /), !$expected, get-string($Haystack, $needle, !$expected, $i[2]));
$Haystack = $i[0];
$needle = $needle.uc;
is( so($Haystack ~~ /:m $needle /), !$expected, get-string($Haystack, $needle, !$expected, $i[2]));
}
ok( 'ä' ~~ m:ignoremark/a/, 'Ignoremark: spelling out :ignoremark also works');
ok( 'ä' ~~ /:ignoremark a/, 'Ignoremark: spelling out :ignoremark also works');
is('fooäàaáâåbar' ~~ m:m/a+ b/, 'äàaáâåb', 'Ignoremark: a+ b');
is('fooäàaáâåbar' ~~ m:m/<[ab]>+/, 'äàaáâåba', 'Ignoremark with character class');
is('fooäàaáâåbar' ~~ m:m/<-[a]>+/, 'foo', 'Ignoremark with negated character class');
is('fooäàaáâåbar' ~~ m:m/<[a..b]>+/, 'äàaáâåba', 'Ignoremark with range in character class');
# RT #116256
{
ok("ü" ~~ /:ignoremark 'u'/, 'Ignoremark with subrule');
}
# RT #130465
{
ok qq["\c[COMBINING TILDE]"] ~~ / ^ :ignoremark '"'/,
'Ignoremark on quoted double-quote';
ok qq["\c[COMBINING TILDE]"] ~~ / ^ :ignoremark \"/,
'Ignoremark on backslashed double-quote';
}
#
# Ensure that synthetics also properly can match the base character
is "\c[LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW]" ~~ /:m:i j /, 'ǰ̣', "Synthetics with decomposable base characters properly work with ignoremark";
ok qq{"\c[ZERO WIDTH JOINER]a"} ~~ / (:ignoremark ^ '"' ) /, "Synthetics properly can be matched with ignoremark";
is “{"\c[ARABIC NUMBER SIGN]" x 3}a” ~~ /:m a/, “{"\c[ARABIC NUMBER SIGN]" x 3}a”, "Igoremark supports Prepend";
my Int:D $val = 0;
for ^ 10 {
my Str:D $str = "\c[arabic number sign]" x $_ ~ 'a';
$val++ if "\c[arabic number sign]" x $_ ~ 'a' ~~ /:m a/ eq $str;
}
ok $val == 10, "Ignoremark supports 0..9 prepend marks";
is "\c[ARABIC NUMBER SIGN]" ~~ /:m "\c[ARABIC NUMBER SIGN]" /, "\c[ARABIC NUMBER SIGN]", "Ignoremark can match degenerate Prepend";
is "\c[SYRIAC ABBREVIATION MARK, ARABIC NUMBER SIGN]" ~~ /:m "\c[SYRIAC ABBREVIATION MARK]" /, "\c[SYRIAC ABBREVIATION MARK, ARABIC NUMBER SIGN]", "Ignoremark matches the first codepoint for all Prepend degenerates";
nok "\c[SYRIAC ABBREVIATION MARK, ARABIC NUMBER SIGN]" ~~ /:m "\c[ARABIC NUMBER SIGN]" /, "Ignoremark does not match the second codepoint for all Prepend degenerates";
nok "\c[SYRIAC ABBREVIATION MARK, COMBINING CARON]" ~~ /:m "\c[COMBINING CARON]" /, "Ignoremark doesn't match second codepoint for Prepend+Extend degenerate";
is "\c[SYRIAC ABBREVIATION MARK, COMBINING CARON]" ~~ /:m "\c[SYRIAC ABBREVIATION MARK]" /, "\c[SYRIAC ABBREVIATION MARK, COMBINING CARON]", "Ignoremark matches the first codepoint for all Prepend+Extend degenerates";
# vim: syn=perl6 sw=4 ts=4 expandtab