Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
[nrx] Reinstate LTM
  • Loading branch information
sorear committed Sep 6, 2010
1 parent b2c626c commit 39a55eb
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 121 deletions.
10 changes: 9 additions & 1 deletion lib/Cursor.cs
Expand Up @@ -160,6 +160,14 @@ public struct State {
return !(bt.obj.pos == end || !x.Accepts(orig[bt.obj.pos++]));
}

public void LTMPushAlts(Lexer lx, int[] addrs) {
PushBacktrack("LTM", -1);
int[] cases = lx.Run(orig_s, bt.obj.pos);
for (int i = cases.Length - 1; i >= 0; i--) {
PushBacktrack("LTMALT", addrs[cases[i]]);
}
}

public void OpenQuant() {
bt.obj.reps = new PSN<int>(0, bt.obj.reps);
}
Expand Down Expand Up @@ -775,7 +783,7 @@ public class Lexer {
Environment.GetEnvironmentVariable("NIECZA_LTM_TRACE") != null;

public Lexer(IP6 cursorObj, string tag, LAD[] alts) {
pad.cursor_class = ((DynObject)cursorObj).klass;
pad.cursor_class = cursorObj.GetMO();
this.alts = alts;
this.tag = tag;
int root = pad.AddNode();
Expand Down
4 changes: 4 additions & 0 deletions src/CgOp.pm
Expand Up @@ -433,6 +433,10 @@ use warnings;
CgOp::Primitive->new(op => [ 'clr_double', $_[0] ]);
}

sub labelid {
CgOp::Primitive->new(op => [ 'labelid', $_[0] ], zyg => [ ]);
}

sub int {
CgOp::Primitive->new(op => [ 'clr_int', $_[0] ]);
}
Expand Down
13 changes: 12 additions & 1 deletion src/CodeGen.pm
Expand Up @@ -67,6 +67,7 @@ use 5.010;
CloseQuant => [m => 'Int32'],
CommitGroup => [m => 'Void'],
GetCursorList => [m => 'Variable'],
LTMPushAlts => [m => 'Void'],
PushCursorList => [m => 'Void'],
MakeCursor => [m => 'Cursor'],
SetPos => [m => 'Void'],
Expand Down Expand Up @@ -221,7 +222,12 @@ use 5.010;
sub _savestackstate {
my ($self, $lbl) = @_;
my %save;
die "Invalid operation of CPS converter" if @{ $self->stacktype };
if (@{ $self->stacktype }) {
print for @{ $self->buffer };
print(YAML::XS::Dump($self->stacktype));
print(YAML::XS::Dump($self->stackterm));
Carp::confess "Invalid operation of CPS converter";
}
$save{lettypes} = [ @{ $self->lettypes } ];
$save{letstack} = [ @{ $self->letstack } ];
$self->savedstks->{$lbl} = \%save;
Expand Down Expand Up @@ -513,6 +519,11 @@ use 5.010;
$self->_push('System.Double', "((Double)$val)");
}

sub labelid {
my ($self, $lbl) = @_;
$self->_push('Int32', "\@\@L$lbl");
}

sub clr_arith {
my ($self, $op) = @_;
my ($a1, $a2, $ty1, $ty2) = $self->_popn(2);
Expand Down
24 changes: 23 additions & 1 deletion src/RxOp.pm
Expand Up @@ -78,7 +78,7 @@ use CgOp;
push @code, CgOp::rawcall(CgOp::rxframe, 'IncQuant');
push @code, CgOp::goto($repeat);
push @code, CgOp::label($exit);
push @code, CgOp::rawcall(CgOp::rxframe, 'CloseQuant');
push @code, CgOp::sink(CgOp::rawcall(CgOp::rxframe, 'CloseQuant'));

@code;
}
Expand Down Expand Up @@ -337,6 +337,28 @@ use CgOp;
use Moose;
extends 'RxOp';

sub code {
my ($self, $body) = @_;
my @ls = map { $self->label } @{ $self->zyg };
my $end = $self->label;

my @code;
push @code, CgOp::rawcall(CgOp::rxframe, "LTMPushAlts",
CgOp::rawnew('Lexer',
CgOp::rawcall(CgOp::rxframe,'MakeCursor'), CgOp::clr_string(''),
CgOp::rawnewarr('LAD', map { $_->lad } @{ $self->zyg })),
CgOp::rawnewarr('Int32', map { CgOp::labelid($_) } @ls));
push @code, CgOp::rawccall(CgOp::rxframe, 'Backtrack');
for (my $i = 0; $i < @ls; $i++) {
push @code, CgOp::label($ls[$i]);
push @code, $self->zyg->[$i]->code($body);
push @code, CgOp::goto($end) unless $i == @ls - 1;
}
push @code, CgOp::label($end);
push @code, CgOp::rxpushb('ENDLTM');
@code;
}

sub lad {
my ($self) = @_;
CgOp::rawnew('LADAny', CgOp::rawnewarr('LAD',
Expand Down
168 changes: 50 additions & 118 deletions test2.pl
Expand Up @@ -3,60 +3,6 @@

ok '{}' ~~ / \{ <.ws> \} /, 'ws matches between \W';

{
ok ("a" ~~ /a/), "letter matches itself";
ok !("a" ~~ /b/), "letter does not match other";
ok ("xxa" ~~ /a/), "leading garbage ignored";
ok ("axx" ~~ /a/), "trailing garbage ignored";
ok ("ab" ~~ /ab/), "sequence matches sequence";
ok !("ab" ~~ /ba/), "sequence requires order";
ok ("abc" ~~ /ab?c/), "conditional can match";
ok ("ac" ~~ /ab?c/), "conditional can match nothing";
ok !("adc" ~~ /ab?c/), "conditional cannot match something else";
ok ("ac" ~~ /ab*c/), "kleene closure can match none";
ok ("abbc" ~~ /ab*c/), "kleene closure can match many";
ok !("adc" ~~ /ab*c/), "kleene closure cannot match other";
ok ("abc" ~~ /ab+c/), "plus can match one";
ok ("abbbc" ~~ /ab+c/), "plus can match many";
ok !("adc" ~~ /ab+c/), "plus cannot match other";
ok !("ac" ~~ /ab+c/), "plus cannot match none";

grammar Bob {
regex TOP {ab*c}
}

ok Bob.parse("abbc"), "grammars work (1)";
ok !Bob.parse("adc"), "grammars work (2)";
ok !Bob.parse("xac"), "grammars anchor (1)";
ok !Bob.parse("acx"), "grammars anchor (2)";
}

{
my grammar G1 {
regex TOP { <.foo> }
regex foo { x }
}

ok G1.parse("x"), "subrules work (positive)";
ok !G1.parse("y"), "subrules work (negative)";

my grammar G2 {
regex TOP { y <.foo> <.foo> y }
regex foo { x }
}

ok G2.parse("yxxy"), "subrule position tracking works";
ok !G2.parse("yxy"), "subrule position tracking works (2)";

my grammar G3 {
regex TOP { <moo> }
regex moo { x }
}

ok G3.parse("x"), "capturing subrules work (positive)";
ok !G3.parse("y"), "capturing subrules work (negative)";
}

{
ok ("aab" ~~ /a* ab/), "a*ab backtracks";
ok !("aab" ~~ /a*: ab/), "a*: ab doesn't";
Expand All @@ -77,60 +23,60 @@
}

{
# # doing a more reasonable test will probably require embedded blocks
# ok "foobarx" ~~ / [ foo | foobar ]: x /, "LTM picks longest even if second";
# ok "foobarx" ~~ / [ foobar | foo ]: x /, "LTM picks longest even if first";
# doing a more reasonable test will probably require embedded blocks
ok "foobarx" ~~ / [ foo | foobar ]: x /, "LTM picks longest even if second";
ok "foobarx" ~~ / [ foobar | foo ]: x /, "LTM picks longest even if first";
}

{
# my $x = '';
# ok !("a" ~~ / a { $x = 1; } b /), '{} does not terminate regex';
# is $x, 1, '{} is run even if regex fails';
# $x = '';
# ok !("" ~~ / a { $x = 1; } b /), '{} does not affect regex that ends before it';
# is $x, '', '{} is only run if reached';
# $x = 0;
# ok ("aab" ~~ / a* { $x++ } ab /), '{} does not block backtracking';
# is $x, 2, '{} is run multiple times when backtracking';
#
# $x = '';
# ok ("foo" ~~ / foo { $x = $x ~ 1 } | foo { $x = $x ~ 2 } /),
# "foo ~~ foo|foo";
# is $x, 1, "with no other constraints, first item is used";
# $x = '';
# ok ("foo" ~~ / fo* { $x = $x ~ 1 } | foo { $x = $x ~ 2 } /),
# "foo ~~ fo*|foo";
# is $x, 2, "longer literal prefix wins over seniority";
# $x = '';
# ok ("fooo" ~~ / fo* { $x = $x ~ 1 } | foo { $x = $x ~ 2 } /),
# "foo ~~ fo*|foo";
# is $x, 1, "longer length wins over prefix";
# $x = '';
# ok !("fooo" ~~ / [ fo*: { $x = $x ~ 1 } | foo { $x = $x ~ 2 } ] x /),
# "foo !~~ [fo*:|foo]x";
# is $x, '12', "will backtrack into shorter token";
my $x = '';
ok !("a" ~~ / a { $x = 1; } b /), '{} does not terminate regex';
is $x, 1, '{} is run even if regex fails';
$x = '';
ok !("" ~~ / a { $x = 1; } b /), '{} does not affect regex that ends before it';
is $x, '', '{} is only run if reached';
$x = 0;
ok ("aab" ~~ / a* { $x++ } ab /), '{} does not block backtracking';
is $x, 2, '{} is run multiple times when backtracking';

$x = '';
ok ("foo" ~~ / foo { $x = $x ~ 1 } | foo { $x = $x ~ 2 } /),
"foo ~~ foo|foo";
is $x, 1, "with no other constraints, first item is used";
$x = '';
ok ("foo" ~~ / fo* { $x = $x ~ 1 } | foo { $x = $x ~ 2 } /),
"foo ~~ fo*|foo";
is $x, 2, "longer literal prefix wins over seniority";
$x = '';
ok ("fooo" ~~ / fo* { $x = $x ~ 1 } | foo { $x = $x ~ 2 } /),
"foo ~~ fo*|foo";
is $x, 1, "longer length wins over prefix";
$x = '';
ok !("fooo" ~~ / [ fo*: { $x = $x ~ 1 } | foo { $x = $x ~ 2 } ] x /),
"foo !~~ [fo*:|foo]x";
is $x, '12', "will backtrack into shorter token";

my grammar G5 {
token a { foo }
token b { foobar }
token c { <a> | <b> }
token d { <c> x }

token e { x <e> x | y }

token TOP { A <d> | E <e> }
}

# my grammar G5 {
# token a { foo }
# token b { foobar }
# token c { <a> | <b> }
# token d { <c> x }
#
# token e { x <e> x | y }
#
# token TOP { A <d> | E <e> }
# }
#
# ok G5.parse('Afoobarx'), 'LTM works even through subrules';
# ok G5.parse('Exxyxx'), 'recursivity does not crash LTM';
#
# my grammar G6 {
# token a { fo* { $x = 1 } }
# token b { foo { $x = 2 } }
# token TOP { <a> | <b> }
# }
# G6.parse("foo");
# is $x, 2, "prefix length testing works in subrules";
ok G5.parse('Afoobarx'), 'LTM works even through subrules';
ok G5.parse('Exxyxx'), 'recursivity does not crash LTM';

my grammar G6 {
token a { fo* { $x = 1 } }
token b { foo { $x = 2 } }
token TOP { <a> | <b> }
}
G6.parse("foo");
is $x, 2, "prefix length testing works in subrules";
}

# {
Expand All @@ -154,20 +100,6 @@
is $a, True, '$¢ isa Cursor';
}

rxtest /x.y/, "x.y", ("xay", "x y"), ("xy", "xaay");
rxtest /<!>/, '<!>', Nil, ("", "x");
rxtest /\s/, '\s', (" ", ("\n" => '\n'), ("\r" => '\r'), "\x3000"),
("x", "1", "+");
rxtest /\S/, '\S', ("x", "1", "+"),
(" ", ("\n" => '\n'), ("\r" => '\r'), ("\x3000" => 'id space'));
rxtest /\w/, '\w', ("x", "1", "_", "\x4E00"), ("+", " ");
rxtest /<[ y ]>/, '<[ y ]>', ("y"), (" ", "x", "z");
rxtest /<[ i .. k ]>/, '<[ i .. k ]>', ("i", "j", "k"), ("h", "l");
rxtest /<[ \W a..z ]>/, '<[\W a..z]>', ("a", "z", "+"), ("\x4E00");

rxtest /a || b/, 'a || b', ("a", "b"), ("c", "");
rxtest /x [a || aa]: c/, 'x[a||aa]:c', ("xac",), ("xaac",);

#ok "axy" ~~ / a <before x> \w y / , "before is zero-width";
#ok "axy" ~~ / a <?before x> \w y / , "?before is zero-width";
#ok "azy" ~~ / a <!before x> \w y / , "!before is zero-width";
Expand Down

0 comments on commit 39a55eb

Please sign in to comment.