Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
[nrx] Reimplement quantifiers
  • Loading branch information
sorear committed Sep 5, 2010
1 parent 066f98c commit 9cf770c
Show file tree
Hide file tree
Showing 4 changed files with 221 additions and 85 deletions.
2 changes: 2 additions & 0 deletions src/CgOp.pm
Expand Up @@ -512,6 +512,8 @@ use warnings;
CgOp::Primitive->new(op => [ 'callframe' ]);
}

sub rxframe { getfield('rx', callframe) }

sub letvar {
$_[1] ?
CgOp::Primitive->new(op => [ 'poke_let', $_[0] ], zyg => [ $_[1] ]):
Expand Down
19 changes: 13 additions & 6 deletions src/Niecza/Actions.pm
Expand Up @@ -73,6 +73,9 @@ sub unv { }
sub begid { }
sub comment { }
sub comment__S_Sharp { }
sub comment__S_SharpGraveParenDotDotDotThesis { }
sub opener { }
sub starter { }
sub spacey { }
sub unspacey { }
sub unsp { }
Expand Down Expand Up @@ -409,9 +412,9 @@ sub quantified_atom { my ($cl, $M) = @_; # :: RxOp
$q->{mod} //= '';
}

if ($q->{simple}) {
$atom = RxOp::Quantifier->new(type => $q->{simple}, zyg => [$atom],
minimal => ($q->{mod} && $q->{mod} eq '?'));
if (defined $q->{min}) {
$atom = RxOp::Quantifier->new(min => $q->{min}, max => $q->{max},
zyg => [$atom], minimal => ($q->{mod} && $q->{mod} eq '?'));
}

if (defined $q->{mod} && $q->{mod} eq '') {
Expand All @@ -424,13 +427,13 @@ sub quantified_atom { my ($cl, $M) = @_; # :: RxOp
# :: Context hash interpreted by quantified_atom
sub quantifier {}
sub quantifier__S_Star { my ($cl, $M) = @_;
$M->{_ast} = { simple => '*', mod => $M->{quantmod}{_ast} };
$M->{_ast} = { min => 0, mod => $M->{quantmod}{_ast} };
}
sub quantifier__S_Plus { my ($cl, $M) = @_;
$M->{_ast} = { simple => '+', mod => $M->{quantmod}{_ast} };
$M->{_ast} = { min => 1, mod => $M->{quantmod}{_ast} };
}
sub quantifier__S_Question { my ($cl, $M) = @_;
$M->{_ast} = { simple => '?', mod => $M->{quantmod}{_ast} };
$M->{_ast} = { min => 0, max => 1, mod => $M->{quantmod}{_ast} };
}
sub quantifier__S_Colon { my ($cl, $M) = @_;
$M->{_ast} = { mod => '' };
Expand Down Expand Up @@ -847,6 +850,10 @@ sub nibbler { my ($cl, $M) = @_;
# garden variety nibbler
my @bits;
for my $n (@{ $M->{nibbles} }) {
if (!blessed($n)) {
say(STDERR YAML::XS::Dump($n));
next;
}
my $bit = $n->isa('Str') ? $n->{TEXT} : $n->{_ast};

if (ref($bit) && ref($bit) eq 'CClass') {
Expand Down
95 changes: 39 additions & 56 deletions src/RxOp.pm
Expand Up @@ -51,69 +51,52 @@ use CgOp;
use Moose;
extends 'RxOp';

has type => (isa => 'Str', is => 'ro', required => 1);
has minimal => (isa => 'Bool', is => 'ro', required => 1);
# ? + * only
# zyg * 1
has min => (isa => 'Int', is => 'ro', required => 1);
has max => (isa => 'Maybe[Int]', is => 'ro', default => undef);

my %qf = ( '+', 'plus', '*', 'star', '?', 'opt' );
sub op { goto &{ $_[0]->can('op_' . $qf{$_[0]->type}) }; }
sub code {
my ($self, $body) = @_;
my @code;

sub op_opt {
my ($self, $cn, $cont) = @_;
my $kcl = $self->_close_k($cn, $cont);
my $zzcn = Niecza::Actions->gensym;
my ($zcn, $zcont) = $self->zyg->[0]->op($zzcn,
Op::CallSub->new(invocant => Op::Lexical->new(name => $kcl->var),
positionals => [Op::Lexical->new(name => $zzcn)]));
$zcn, Op::StatementList->new(children => [
$kcl, $zcont, Op::CallSub->new(
invocant => Op::Lexical->new(name => $kcl->var),
positionals => [Op::Lexical->new(name => $zcn)])]);
}

# (sub loop($C) { zyg($C, &loop); cont($C) })($C)
sub op_star {
my ($self, $cn, $cont) = @_;
my $lpn = Niecza::Actions->gensym;
my $zzcn = Niecza::Actions->gensym;
my ($zcn, $zcont) = $self->zyg->[0]->op($zzcn, Op::CallSub->new(
invocant => Op::Lexical->new(name => $lpn),
positionals => [Op::Lexical->new(name => $zzcn)]));
$cn, Op::CallSub->new(
invocant => Op::SubDef->new(var => $lpn,
once => 1, body =>
Body->new(type => 'sub', signature => Sig->simple($zcn), do =>
Op::StatementList->new(children => [ $zcont,
Op::CallSub->new(
invocant => $self->_close_k($cn, $cont),
positionals => [Op::Lexical->new(name => $zcn)])]))),
positionals => [Op::Lexical->new(name => $cn)]);
}

# (sub loop($C) { zyg($C, -> $nC { loop($nC); cont($nC) }) })($C)
sub op_plus {
my ($self, $cn, $cont) = @_;
my $lpn = Niecza::Actions->gensym;
my ($zcn, $zcont) = $self->zyg->[0]->op($cn, Op::StatementList->new(
children => [
Op::CallSub->new(
invocant => Op::Lexical->new(name => $lpn),
positionals => [Op::Lexical->new(name => $cn)]),
$cont
]));
$cn, Op::CallSub->new(
invocant => Op::SubDef->new(var => $lpn, once => 1,
body => Body->new(type => 'sub', signature =>
Sig->simple($zcn), do => $zcont)),
positionals => [Op::Lexical->new(name => $cn)]);
my $exit = $self->label;
my $repeat = $self->label;

push @code, CgOp::rawcall(CgOp::rxframe, 'OpenQuant');
push @code, CgOp::label($repeat);
push @code, CgOp::ternary(CgOp::compare('>=',
CgOp::rawcall(CgOp::rxframe, 'GetQuant'),
CgOp::int($self->min)),
CgOp::rxpushb('QUANT', $exit), CgOp::prog());
if (defined $self->max) {
push @code, CgOp::ternary(CgOp::compare('>=',
CgOp::rawcall(CgOp::rxframe, 'GetQuant'),
CgOp::int($self->max)),
CgOp::rawccall(CgOp::rxframe, 'Backtrack'), CgOp::prog());
}
push @code, $self->zyg->[0]->code($body);
push @code, CgOp::rawcall(CgOp::rxframe, 'IncQuant');
push @code, CgOp::goto($repeat);
push @code, CgOp::label($exit);
push @code, CgOp::rawcall(CgOp::rxframe, 'CloseQuant');

@code;
}

sub lad {
my ($self) = @_;
$self->minimal ? CgOp::rawnew('LADImp') :
CgOp::rawnew('LAD' . ucfirst($qf{$self->type}),
$self->zyg->[0]->lad);
if ($self->minimal) { return CgOp::rawnew('LADImp'); }
my ($mi,$ma) = ($self->min, $self->max // -1);
my $str;
if ($mi == 0 && $ma == -1) { $str = 'Star' }
if ($mi == 1 && $ma == -1) { $str = 'Plus' }
if ($mi == 0 && $ma == 1) { $str = 'Opt' }

if ($str) {
CgOp::rawnew("LAD$str", $self->zyg->[0]->lad);
} else {
CgOp::rawnew("LADImp");
}
}

__PACKAGE__->meta->make_immutable;
Expand Down
190 changes: 167 additions & 23 deletions test2.pl
Expand Up @@ -3,22 +3,6 @@

#ok '{}' ~~ / \{ <.ws> \} /, 'ws matches between \W';

#sub rxt($C) {
# Q:CgOp {
# (prog
# (setfield rx (callframe) (rawnew RxFrame (cast Cursor (@ {$C}))))
# (rxpushb SEQALT b1)
# (rxbprim ExactOne (char x))
# (goto b2)
# (label b1)
# (rxbprim ExactOne (char y))
# (label b2)
# (rawccall (getfield rx (callframe)) End)
# (rawccall (getfield rx (callframe)) Backtrack)
# (null Variable))
# }
#}

PRE-INIT {
Q:CgOp {
(prog
Expand All @@ -32,13 +16,173 @@
}
}
is +("x" ~~ /x||y/), 1, "x ~~ /x||y/";
is +("y" ~~ /x||y/), 1, "y ~~ /x||y/";
is +("z" ~~ /x||y/), 0, "z !~~ /x||y/";
is +(/x||x/(Cursor.new("x"))), 2, "x ~~ /x||x/ (2x) ";
{
ok ("a" ~~ /a/), "letter matches itself";
ok !("a" ~~ /b/), "letter does not match other";
ok ("xxa" ~~ /a/), "leading garbage ignored";
ok ("axx" ~~ /a/), "trailing garbage ignored";
ok ("ab" ~~ /ab/), "sequence matches sequence";
ok !("ab" ~~ /ba/), "sequence requires order";
ok ("abc" ~~ /ab?c/), "conditional can match";
ok ("ac" ~~ /ab?c/), "conditional can match nothing";
ok !("adc" ~~ /ab?c/), "conditional cannot match something else";
ok ("ac" ~~ /ab*c/), "kleene closure can match none";
ok ("abbc" ~~ /ab*c/), "kleene closure can match many";
ok !("adc" ~~ /ab*c/), "kleene closure cannot match other";
ok ("abc" ~~ /ab+c/), "plus can match one";
ok ("abbbc" ~~ /ab+c/), "plus can match many";
ok !("adc" ~~ /ab+c/), "plus cannot match other";
ok !("ac" ~~ /ab+c/), "plus cannot match none";
#grammar Bob {
# rule TOP {ab*c}
#}
#ok Bob.parse("abbc"), "grammars work (1)";
#ok !Bob.parse("adc"), "grammars work (2)";
#ok !Bob.parse("xac"), "grammars anchor (1)";
#ok !Bob.parse("acx"), "grammars anchor (2)";
}
# {
# my grammar G1 {
# regex TOP { <.foo> }
# regex foo { x }
# }
#
# ok G1.parse("x"), "subrules work (positive)";
# ok !G1.parse("y"), "subrules work (negative)";
#
# my grammar G2 {
# regex TOP { y <.foo> <.foo> y }
# regex foo { x }
# }
#
# ok G2.parse("yxxy"), "subrule position tracking works";
# ok !G2.parse("yxy"), "subrule position tracking works (2)";
#
# my grammar G3 {
# regex TOP { <moo> }
# regex moo { x }
# }
#
# ok G3.parse("x"), "capturing subrules work (positive)";
# ok !G3.parse("y"), "capturing subrules work (negative)";
# }
#
# {
# ok ("aab" ~~ /a* ab/), "a*ab backtracks";
# ok !("aab" ~~ /a*: ab/), "a*: ab doesn't";
# ok ("aab" ~~ /a*! ab/), "a*! ab backtracks";
# ok !("aab" ~~ /:r a* ab/), "ratcheting a* ab does not";
# ok !("aab" ~~ /:r a*: ab/), "ratcheting a*: ab does not";
# ok ("aab" ~~ /:r a*! ab/), "ratcheting a*! ab does";
# ok !("aab" ~~ token { a* ab }), "a* ab in a token does not";
#
# ok ("ab ab" ~~ / ab <.ws> ab /), "ws matches a space";
# ok (q:to/end/ ~~ / ab <.ws> ab /), "ws matches a newline";
# ab
# ab
# end
# ok ("ab ab" ~~ / ab <.ws> ab /), "ws matches several spaces";
# ok !("abab" ~~ / ab <.ws> ab /), "ws does not match nothing";
# ok ("ab ab" ~~ rule { ab ab }), "rule gives space";
# }
#
# {
# # doing a more reasonable test will probably require embedded blocks
# ok "foobarx" ~~ / [ foo | foobar ]: x /, "LTM picks longest even if second";
# ok "foobarx" ~~ / [ foobar | foo ]: x /, "LTM picks longest even if first";
# }
#
# {
# my $x = '';
# ok !("a" ~~ / a { $x = 1; } b /), '{} does not terminate regex';
# is $x, 1, '{} is run even if regex fails';
# $x = '';
# ok !("" ~~ / a { $x = 1; } b /), '{} does not affect regex that ends before it';
# is $x, '', '{} is only run if reached';
# $x = 0;
# ok ("aab" ~~ / a* { $x++ } ab /), '{} does not block backtracking';
# is $x, 2, '{} is run multiple times when backtracking';
#
# $x = '';
# ok ("foo" ~~ / foo { $x = $x ~ 1 } | foo { $x = $x ~ 2 } /),
# "foo ~~ foo|foo";
# is $x, 1, "with no other constraints, first item is used";
# $x = '';
# ok ("foo" ~~ / fo* { $x = $x ~ 1 } | foo { $x = $x ~ 2 } /),
# "foo ~~ fo*|foo";
# is $x, 2, "longer literal prefix wins over seniority";
# $x = '';
# ok ("fooo" ~~ / fo* { $x = $x ~ 1 } | foo { $x = $x ~ 2 } /),
# "foo ~~ fo*|foo";
# is $x, 1, "longer length wins over prefix";
# $x = '';
# ok !("fooo" ~~ / [ fo*: { $x = $x ~ 1 } | foo { $x = $x ~ 2 } ] x /),
# "foo !~~ [fo*:|foo]x";
# is $x, '12', "will backtrack into shorter token";
#
# my grammar G5 {
# token a { foo }
# token b { foobar }
# token c { <a> | <b> }
# token d { <c> x }
#
# token e { x <e> x | y }
#
# token TOP { A <d> | E <e> }
# }
#
# ok G5.parse('Afoobarx'), 'LTM works even through subrules';
# ok G5.parse('Exxyxx'), 'recursivity does not crash LTM';
#
# my grammar G6 {
# token a { fo* { $x = 1 } }
# token b { foo { $x = 2 } }
# token TOP { <a> | <b> }
# }
# G6.parse("foo");
# is $x, 2, "prefix length testing works in subrules";
# }
#
# {
# my grammar G7 {
# proto token tok {*}
# token tok:sym<+> { <sym> }
# token tok:foo { <sym> }
#
# rule TOP { <tok> }
# }
#
# ok G7.parse('+'), "can parse :sym<> symbols";
# ok G7.parse('foo'), "can parse : symbols";
# }
#
# {
# ok 'xxy' ~~ /x { $a = $/.pos } /, "can match with \$/ stuff";
# is $a, 1, '$/.pos is the right sort of thing';
# 'xxy' ~~ /x { $a = ($¢ ~~ Cursor) }/;
# is $a, True, '$¢ isa Cursor';
# }

#is +rxt(Cursor.new("x")), 1, "/x||y/ ~~ x";
#is +rxt(Cursor.new("y")), 1, "/x||y/ ~~ y";
#is +rxt(Cursor.new("z")), 0, "/x||y/ !~~ z";
#rxtest /x.y/, "x.y", ("xay", "x y"), ("xy", "xaay");
#rxtest /<!>/, '<!>', Nil, ("", "x");
#rxtest /\s/, '\s', (" ", ("\n" => '\n'), ("\r" => '\r'), "\x3000"),
# ("x", "1", "+");
#rxtest /\S/, '\S', ("x", "1", "+"),
# (" ", ("\n" => '\n'), ("\r" => '\r'), ("\x3000" => 'id space'));
#rxtest /\w/, '\w', ("x", "1", "_", "\x4E00"), ("+", " ");
#rxtest /<[ y ]>/, '<[ y ]>', ("y"), (" ", "x", "z");
#rxtest /<[ i .. k ]>/, '<[ i .. k ]>', ("i", "j", "k"), ("h", "l");
#rxtest /<[ \W a..z ]>/, '<[\W a..z]>', ("a", "z", "+"), ("\x4E00");
#
#rxtest /a || b/, 'a || b', ("a", "b"), ("c", "");
#rxtest /x [a || aa]: c/, 'x[a||b]:c', ("xac",), ("xaac",);

#ok "axy" ~~ / a <before x> \w y / , "before is zero-width";
#ok "axy" ~~ / a <?before x> \w y / , "?before is zero-width";
#ok "azy" ~~ / a <!before x> \w y / , "!before is zero-width";
#ok !("azy" ~~ / a <?before x> \w y /) , "?before x needs x";
#ok !("axy" ~~ / a <!before x> \w y /) , "!before x needs !x";
done-testing;

0 comments on commit 9cf770c

Please sign in to comment.