Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

303 lines (250 sloc) 11.071 kb
use v6;
use Test;
plan 79;
=begin pod
This file attempts to cover all the possible variants in regexes that use the
<...> syntax. They are listed in the same order as they are defined in S05.
Other files may have more comprehensive tests for a specific form (such as the
character classes), and those are referenced at the correct spot.
=end pod
# L<S05/Extensible metasyntax (C<< <...> >>)/>
# tests for the simpler parts of <...> syntax in regexes
# the first character is whitespace
{
is('aaaaa' ~~ /< a aa aaaa >/, 'aaaa', 'leading whitespace quotes words (space)');
is('aaaaa' ~~ /< a aa aaaa >/, 'aaaa', 'leading whitespace quotes words (tab)');
eval_dies_ok('"aaaa" ~~ /<a aa>/', '<...> without whitespace calls a function (not quote words)');
}
# A leading alphabetic character means it's a capturing grammatical assertion
{
is('moose' ~~ /<alpha>/, 'm', 'capturing grammatical assertion (1)');
is('1a2b3c' ~~ /<alpha>/, 'a', 'capturing grammatical assertion (2)');
}
{
my regex with-dash { '-' }
ok '-' ~~ /<with-dash>/, 'can call regexes which dashes (positive)';
ok '|' !~~ /<with-dash>/, 'can call regexes which dashes (negative)';
my regex with'hyphen { a }
ok 'a' ~~ /<with'hyphen>/, 'can call regex with hypen (positive)';
ok 'b' !~~ /<with'hyphen>/, 'can call regex with hypen (negative)';
}
# so if the first character is a left parenthesis, it really is a call
#?rakudo skip '<test()> not implemented'
#?niecza skip 'Unable to resolve method test in class Cursor'
{
my $pass = 0;
my sub test (Int $a = 1) {$pass += $a}
'3.14' ~~ /3 <test()>/;
ok($pass, 'function call (no arguments)');
}
#?rakudo skip '<test()> not implemented'
#?niecza skip 'Unable to resolve method test in class Cursor'
{
my $pass = 0;
my sub test (Int $a) {$pass += $a}
'3.14' ~~ /3 <test(2)>/;
ok($pass, 'function call (with arguments)');
}
# If the first character after the identifier is an =,
# then the identifier is taken as an alias for what follows
{
ok 'foo' ~~ /<foo=alpha>/, 'basic <foo=bar> aliasing';
is $<foo>, 'f', 'alias works';
is $<alpha>, 'f', 'alias does not throw away original name';
}
{
ok 'foo' ~~ /<foo=.alpha>/, 'basic <foo=.bar> aliasing';
is $<foo>, 'f', 'alias works';
ok !defined($<alpha>), 'alias does throw away original name';
}
{
ok '123gb' ~~ / <foo=.alpha> /, '<foo=.bar>';
is $<foo>, 'g', '=. renaming worked';
nok $<alpha>.defined, '=. removed the old capture name';
}
# If the first character after the identifier is whitespace, the subsequent
# text (following any whitespace) is passed as a regex
#?rakudo skip 'angle quotes in regexes'
#?niecza skip 'Unable to resolve method test in class Cursor'
{
my $is_regex = 0;
my sub test ($a) {$is_regex++ if $a ~~ Regex}
'whatever' ~~ /w < test hat >/;
ok($is_regex, 'text passed as a regex (1)');
$is_regex = 0;
'whatever' ~~ /w <test complicated . regex '<goes here>'>/;
ok($is_regex, 'more complicated text passed as a regex (2)');
}
# If the first character is a colon followed by whitespace the
# rest of the text is taken as a list of arguments to the method
#?rakudo skip 'colon arguments not implemented'
#?niecza skip 'Unable to resolve method test in class Cursor'
{
my $called_ok = 0;
my sub test ($a, $b) {$called_ok++ if $a && $b}
'some text' ~~ /some <test: 3, 5>/;
ok($called_ok, 'method call syntax in <...>');
}
# No other characters are allowed after the initial identifier.
{
eval_dies_ok('"foo" ~~ /<test*>/', 'no other characters are allowed (*)');
eval_dies_ok('"foo" ~~ /<test|>/', 'no other characters are allowed (|)');
eval_dies_ok('"foo" ~~ /<test&>/', 'no other characters are allowed (&)');
eval_dies_ok('"foo" ~~ /<test:>/', 'no other characters are allowed (:)');
}
# L<S05/Extensible metasyntax (C<< <...> >>)/explicitly calls a method as a subrule>
{
is('blorg' ~~ /<.alpha>/, 'b', 'leading . prevents capturing');
}
# If the dot is not followed by an identifier, it is parsed as
# a "dotty" postfix of some type, such as an indirect method call
#?niecza todo '<.$foo> syntax placeholder'
{
# placeholder test for <.$foo>
lives_ok({
my $method = 'WHAT';
'foo bar baz' ~~ /foo <.$method>/;
}, '<.$foo> syntax placeholder');
}
# A leading $ indicates an indirect subrule. The variable must contain
# either a Regex object, or a string to be compiled as the regex.
{
my $rule = rx/bar/;
my $str = 'qwe';
ok('bar' ~~ /<$rule>/, '<$whatever> subrule (Regex, 1)');
ok('qwer' ~~ /<$str>/, '<$whatever> subrule (String, 1)');
is('abar' ~~ /a<$rule>/, 'abar', '<$whatever> subrule (Regex, 2)');
is('qwer' ~~ /<$str>r/, 'qwer', '<$whatever> subrule (String, 2)');
}
# A leading :: indicates a symbolic indirect subrule
#?rakudo skip 'indirect subrule call not implemented'
{
my $name = 'alpha';
ok('abcdef' ~~ /<::($name)>/, '<::($name)> symbolic indirect subrule');
}
# A leading @ matches like a bare array except that each element is
# treated as a subrule (string or Regex object) rather than as a literal
#?rakudo skip 'TODO: array interpolation into regexes'
{
my @first = <a b c .**4>;
ok('dddd' ~~ /<@first>/, 'strings are treated as a subrule in <@foo>');
my @second = rx/\.**2/, rx/'.**2'/;
ok('abc.**2def' ~~ /<@second>/, 'Regexes are left alone in <@foo> subrule');
}
# A leading % matches like a bare hash except that
# a string value is always treated as a subrule
#?rakudo skip '<%hash> not implemented'
#?niecza skip 'Sigil % is not allowed for regex assertions'
{
my %first = {'<alpha>' => '', 'b' => '', 'c' => ''};
ok('aeiou' ~~ /<%first>/, 'strings are treated as a subrule in <%foo>');
my %second = {rx/\.**2/ => '', rx/'.**2'/ => ''};
ok('abc.**2def' ~~ /<%second>/, 'Regexes are left alone in <%foo> subrule');
}
# A leading { indicates code that produces a regex to be
# interpolated into the pattern at that point as a subrule:
{
ok('abcdef' ~~ /<{'<al' ~ 'pha>'}>/, 'code interpolation');
}
# A leading & interpolates the return value of a subroutine call as a regex.
#?rakudo skip '<&foo()> not implemented'
#?niecza skip 'Anonymous submatch returned a Str instead of a Cursor, violating the submatch protocol'
{
my sub foo {return '<alpha>'}
ok('abcdef' ~~ /<&foo()>/, 'subroutine call interpolation');
}
# If it is a string, the compiled form is cached with the string so that
# it is not recompiled next time you use it unless the string changes.
#?rakudo skip '<$subrule> not implemented'
{
my $counter = 0;
my $subrule = '{$counter++; \'<alpha>\'}';
'abc' ~~ /<$subrule>/;
is($counter, 1, 'code inside string was executed');
'def' ~~ /<$subrule>/;
#?niecza todo "string value was cached"
is($counter, 1, 'string value was cached');
}
# A leading ?{ or !{ indicates a code assertion
{
ok('192' ~~ /(\d**3) <?{$0 < 256}>/, '<?{...}> works');
ok(!('992' ~~ /(\d**3) <?{$0 < 256}>/), '<?{...}> works');
ok(!('192' ~~ /(\d**3) <!{$0 < 256}>/), '<!{...}> works');
ok('992' ~~ /(\d**3) <!{$0 < 256}>/, '<!{...}> works');
}
# A leading [ indicates an enumerated character class
# A leading - indicates a complemented character class
# A leading + may also be supplied
# see charset.t
# The special assertion <.>
# see combchar.t
# L<S05/Extensible metasyntax (C<< <...> >>)/A leading ! indicates a negated meaning (always a zero-width assertion)>
{
ok('1./:"{}=-' ~~ /^[<!alpha> .]+$/, '<!alpha> matches non-letter characters');
ok(!('abcdef' ~~ /<!alpha>./), '<!alpha> does not match letter characters');
is(+('.2 1' ~~ /<!before 2> \d/), 1, '<!before>');
is +$/.caps, 0, '<!before 2> does not capture';
}
# A leading ? indicates a positive zero-width assertion
{
is(~('123abc456def' ~~ /(.+? <?alpha>)/), '123', 'positive zero-width assertion');
}
# The <...>, <???>, and <!!!> special tokens have the same "not-defined-yet"
# meanings within regexes that the bare elipses have in ordinary code
#?rakudo skip '..., !!! and ??? in regexes'
#?niecza skip 'Action method assertion:sym<???> not yet implemented'
{
eval_dies_ok('"foo" ~~ /<...>/', '<...> dies in regex match');
# XXX: Should be warns_ok, but we don't have that yet
lives_ok({'foo' ~~ /<???>/}, '<???> lives in regex match');
eval_dies_ok('"foo" ~~ /<!!!>/', '<!!!> dies in regex match');
}
# A leading * indicates that the following pattern allows a partial match.
# It always succeeds after matching as many characters as possible.
#?rakudo skip '<*literal>'
#?niecza skip 'Action method assertion:sym<*> not yet implemented'
{
is('' ~~ /^ <*xyz> $ /, '', 'partial match (0)');
is('x' ~~ /^ <*xyz> $ /, 'x', 'partial match (1a)');
is('xz' ~~ /^ <*xyz> $ /, 'x', 'partial match (1b)');
is('yzx' ~~ /^ <*xyz> $ /, 'x', 'partial match (1c)');
is('xy' ~~ /^ <*xyz> $ /, 'xy', 'partial match (2a)');
is('xyx' ~~ /^ <*xyz> $ /, 'xy', 'partial match (2a)');
is('xyz' ~~ /^ <*xyz> $ /, 'xyz', 'partial match (3)');
is('abc' ~~ / ^ <*ab+c> $ /, 'abc', 'partial match with quantifier (1)');
is('abbbc' ~~ / ^ <*ab+c> $ /, 'abbbc', 'partial match with quantifier (2)');
is('ababc' ~~ / ^ <*'ab'+c> $ /, 'ababc', 'partial match with quantifier (3)');
is('aba' ~~ / ^ <*'ab'+c> $ /, 'ababc', 'partial match with quantifier (4)');
}
# A leading ~~ indicates a recursive call back into some or all of the
# current rule. An optional argument indicates which subpattern to re-use
#?niecza skip 'Action method assertion:sym<~~>'
{
ok('1.2.' ~~ /\d+\. <~~> | <?>/, 'recursive regex using whole pattern');
#?rakudo skip '<~~ ... >'
ok('foodbard' ~~ /(foo|bar) d <~~0>/, 'recursive regex with partial pattern');
}
# The following tokens include angles but are not required to balance
# A <( token indicates the start of a result capture,
# while the corresponding )> token indicates its endpoint
{
is('foo123bar' ~~ /foo <(\d+)> bar/, 123, '<(...)> pair');
is('foo456bar' ~~ /foo <(\d+ bar/, '456bar', '<( match');
is('foo789bar' ~~ /foo \d+)> bar/, 'foo789', ')> match');
ok(!('foo123' ~~ /foo <(\d+)> bar/), 'non-matching <(...)>');
is('foo123bar' ~~ /foo <( bar || ....../, 'foo123', '<( in backtracking');
#?niecza todo
is('foo123bar' ~~ /foo <( 123 <( bar/, 'bar', 'multiple <(');
is('foo123bar' ~~ /foo <( 123 [ <( xyz ]?/, '123', 'multiple <( backtracking');
}
# A « or << token indicates a left word boundary.
# A » or >> token indicates a right word boundary.
{
is('abc' ~~ /<<abc/, 'abc', 'left word boundary (string beginning)');
is('!abc' ~~ /<<abc/, 'abc', 'left word boundary (\W character)');
is('abc' ~~ /abc>>/, 'abc', 'right word boundary (string end)');
is('abc!' ~~ /abc>>/, 'abc', 'right word boundary (\W character)');
is('!abc!' ~~ /<<abc>>/, 'abc', 'both word boundaries (\W character)');
}
done();
# vim: ft=perl6
Jump to Line
Something went wrong with that request. Please try again.