Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
[nrx] Reinstate protoregexes and <sym>
  • Loading branch information
sorear committed Sep 6, 2010
1 parent 39a55eb commit 2eff169
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 98 deletions.
11 changes: 7 additions & 4 deletions lib/Cursor.cs
Expand Up @@ -139,6 +139,10 @@ public struct State {
}
}

public bool IsTopCut() {
return bt.next.obj.xact.committed;
}

public bool Exact(string st) {
if (bt.obj.pos + st.Length > end)
return false;
Expand Down Expand Up @@ -868,13 +872,12 @@ public class Lexer {
}

public static IP6[] RunProtoregex(IP6 cursor, string name) {
DynObject dc = (DynObject)cursor;
DynObject[] candidates = ResolveProtoregex(dc.klass, name);
DynObject[] candidates = ResolveProtoregex(cursor.GetMO(), name);
LAD[] branches = new LAD[candidates.Length];
for (int i = 0; i < candidates.Length; i++)
branches[i] = ((SubInfo) candidates[i].slots["info"]).ltm;
Lexer l = new Lexer(dc, name, branches);
Cursor c = (Cursor)Kernel.UnboxAny(cursor);
Lexer l = new Lexer(cursor, name, branches);
Cursor c = (Cursor)cursor;
int[] brnum = l.Run(c.backing, c.pos);
IP6[] ret = new IP6[brnum.Length];
for (int i = 0; i < brnum.Length; i++)
Expand Down
1 change: 1 addition & 0 deletions src/CodeGen.pm
Expand Up @@ -61,6 +61,7 @@ use 5.010;
'RxFrame' =>
{ Exact => [m => 'Boolean'],
Exact1 => [m => 'Boolean'],
IsTopCut => [m => 'Boolean'],
IncQuant => [m => 'Void'],
GetQuant => [m => 'Int32'],
OpenQuant => [m => 'Void'],
Expand Down
4 changes: 3 additions & 1 deletion src/Niecza/Actions.pm
Expand Up @@ -371,6 +371,7 @@ sub regex_def { my ($cl, $M) = @_;
$ast = RxOp::ProtoRedis->new(name => $name);
}

local $::symtext = $symtext;
$M->{_ast} = Op::SubDef->new(
var => $var,
method_too => ($scope eq 'has' ? $name : undef),
Expand All @@ -380,7 +381,8 @@ sub regex_def { my ($cl, $M) = @_;
class => 'Regex',
type => 'regex',
signature => $sig->for_regex,
do => Op::RegexBody->new(name => ($name // ''), rxop => $ast)));
do => Op::RegexBody->new(sym => $symtext,
name => ($name // ''), rxop => $ast)));
}

sub regex_declarator { my ($cl, $M) = @_;
Expand Down
3 changes: 3 additions & 0 deletions src/Op.pm
Expand Up @@ -1015,12 +1015,15 @@ use CgOp;

has rxop => (isa => 'RxOp', is => 'ro', required => 1);
has name => (isa => 'Str', is => 'ro', default => '');
has sym => (isa => 'Maybe[Str]', is => 'ro');

sub zyg { $_[0]->rxop->opzyg }

sub code {
my ($self, $body) = @_;

local $::symtext = $self->sym;

CgOp::prog(
CgOp::setfield('rx', CgOp::callframe,
CgOp::rawnew('RxFrame', CgOp::clr_string($self->name),
Expand Down
46 changes: 36 additions & 10 deletions src/RxOp.pm
Expand Up @@ -281,6 +281,10 @@ use CgOp;
my $bt = $self->label;
my $sk = $self->label;

if ($self->name eq 'sym') {
return RxOp::String->new(text => $::symtext)->code($body);
}

my @code;
push @code, CgOp::rawcall(CgOp::rxframe, "PushCursorList",
CgOp::rawnewarr('String', map { CgOp::clr_string($_) } @{ $self->captures }),
Expand Down Expand Up @@ -400,16 +404,38 @@ use CgOp;
has name => (isa => 'Str', is => 'ro', required => 1);
has cutltm => (isa => 'Bool', is => 'ro', default => 0);

sub op {
my ($self, $cn, $cont) = @_;
my $icn = Niecza::Actions->gensym;
$icn, Op::CallSub->new(
invocant => Op::Lexical->new(name => '&_rxproto'),
positionals => [
Op::Lexical->new(name => $icn),
$self->_close_k($cn, $cont),
Op::StringLiteral->new(text => $self->name)
]);
sub code {
my ($self, $body) = @_;
# will probably break with complicated harnesses
CgOp::letn(
"fns", CgOp::rawscall('Lexer.RunProtoregex',
CgOp::fetch(CgOp::scopedlex('')),
CgOp::clr_string($self->name)),
"i", CgOp::int(0),
CgOp::rxpushb('LTM'),
CgOp::whileloop(0, 0,
CgOp::compare('<', CgOp::letvar("i"),
CgOp::getfield("Length", CgOp::letvar("fns"))),
CgOp::ternary(
CgOp::rawcall(CgOp::rxframe, 'IsTopCut'),
CgOp::letvar("i", CgOp::int(2**31-1)),
CgOp::letn(
"ks", CgOp::methodcall(CgOp::methodcall(
CgOp::subcall(CgOp::getindex(CgOp::letvar("i"),
CgOp::letvar("fns")), CgOp::scopedlex('')),
'list'), 'clone'),
CgOp::letvar("i", CgOp::arith('+',
CgOp::letvar("i"), CgOp::int(1))),
CgOp::whileloop(0, 0,
CgOp::unbox('Boolean', CgOp::fetch(
CgOp::methodcall(CgOp::letvar('ks'), 'Bool'))),
CgOp::prog(
CgOp::rawcall(CgOp::rxframe, 'SetPos',
CgOp::getfield('pos', CgOp::cast('Cursor',
CgOp::fetch(CgOp::methodcall(CgOp::letvar('ks'),
'shift'))))),
CgOp::rawccall(CgOp::rxframe, 'End')))))),
CgOp::rawccall(CgOp::rxframe, 'Backtrack'));
}

sub lad {
Expand Down
90 changes: 7 additions & 83 deletions test2.pl
Expand Up @@ -4,94 +4,18 @@
ok '{}' ~~ / \{ <.ws> \} /, 'ws matches between \W';

{
ok ("aab" ~~ /a* ab/), "a*ab backtracks";
ok !("aab" ~~ /a*: ab/), "a*: ab doesn't";
ok ("aab" ~~ /a*! ab/), "a*! ab backtracks";
ok !("aab" ~~ /:r a* ab/), "ratcheting a* ab does not";
ok !("aab" ~~ /:r a*: ab/), "ratcheting a*: ab does not";
ok ("aab" ~~ /:r a*! ab/), "ratcheting a*! ab does";
ok !("aab" ~~ token { a* ab }), "a* ab in a token does not";
my grammar G7 {
proto token tok {*}
token tok:sym<+> { <sym> }
token tok:foo { <sym> }

ok ("ab ab" ~~ / ab <.ws> ab /), "ws matches a space";
ok (q:to/end/ ~~ / ab <.ws> ab /), "ws matches a newline";
ab
ab
end
ok ("ab ab" ~~ / ab <.ws> ab /), "ws matches several spaces";
ok !("abab" ~~ / ab <.ws> ab /), "ws does not match nothing";
ok ("ab ab" ~~ rule { ab ab }), "rule gives space";
}

{
# doing a more reasonable test will probably require embedded blocks
ok "foobarx" ~~ / [ foo | foobar ]: x /, "LTM picks longest even if second";
ok "foobarx" ~~ / [ foobar | foo ]: x /, "LTM picks longest even if first";
}

{
my $x = '';
ok !("a" ~~ / a { $x = 1; } b /), '{} does not terminate regex';
is $x, 1, '{} is run even if regex fails';
$x = '';
ok !("" ~~ / a { $x = 1; } b /), '{} does not affect regex that ends before it';
is $x, '', '{} is only run if reached';
$x = 0;
ok ("aab" ~~ / a* { $x++ } ab /), '{} does not block backtracking';
is $x, 2, '{} is run multiple times when backtracking';

$x = '';
ok ("foo" ~~ / foo { $x = $x ~ 1 } | foo { $x = $x ~ 2 } /),
"foo ~~ foo|foo";
is $x, 1, "with no other constraints, first item is used";
$x = '';
ok ("foo" ~~ / fo* { $x = $x ~ 1 } | foo { $x = $x ~ 2 } /),
"foo ~~ fo*|foo";
is $x, 2, "longer literal prefix wins over seniority";
$x = '';
ok ("fooo" ~~ / fo* { $x = $x ~ 1 } | foo { $x = $x ~ 2 } /),
"foo ~~ fo*|foo";
is $x, 1, "longer length wins over prefix";
$x = '';
ok !("fooo" ~~ / [ fo*: { $x = $x ~ 1 } | foo { $x = $x ~ 2 } ] x /),
"foo !~~ [fo*:|foo]x";
is $x, '12', "will backtrack into shorter token";

my grammar G5 {
token a { foo }
token b { foobar }
token c { <a> | <b> }
token d { <c> x }

token e { x <e> x | y }

token TOP { A <d> | E <e> }
rule TOP { <tok> }
}

ok G5.parse('Afoobarx'), 'LTM works even through subrules';
ok G5.parse('Exxyxx'), 'recursivity does not crash LTM';

my grammar G6 {
token a { fo* { $x = 1 } }
token b { foo { $x = 2 } }
token TOP { <a> | <b> }
}
G6.parse("foo");
is $x, 2, "prefix length testing works in subrules";
ok G7.parse('+'), "can parse :sym<> symbols";
ok G7.parse('foo'), "can parse : symbols";
}

# {
# my grammar G7 {
# proto token tok {*}
# token tok:sym<+> { <sym> }
# token tok:foo { <sym> }
#
# rule TOP { <tok> }
# }
#
# ok G7.parse('+'), "can parse :sym<> symbols";
# ok G7.parse('foo'), "can parse : symbols";
# }

{
my $a;
ok 'xxy' ~~ /x { $a = $/.pos } /, "can match with \$/ stuff";
Expand Down

0 comments on commit 2eff169

Please sign in to comment.