Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

[GGE::Exp] codegen reimplementation

We no longer use TreeSpider to execute the regular expression. Instead, the
required Perl 6 code is generated and then compiled.

After this change, all of the old tests pass, and a few more to boot.
  • Loading branch information...
commit f9e2d4a3d33533270a54a523277d0b9bebd995cc 1 parent ad73e3f
@masak authored
View
4 Makefile.in
@@ -2,8 +2,8 @@ PERL6=<PERL6>
RAKUDO_DIR=<RAKUDO_DIR>
PERL6LIB='<PERL6LIB>:$(RAKUDO_DIR)'
-SOURCES=lib/GGE/Match.pm lib/GGE/Exp.pm lib/GGE/TreeSpider.pm \
- lib/GGE/OPTable.pm lib/GGE/Perl6Regex.pm lib/GGE.pm
+SOURCES=lib/GGE/Match.pm lib/GGE/Exp.pm lib/GGE/OPTable.pm \
+ lib/GGE/Perl6Regex.pm lib/GGE.pm
PIRS=$(SOURCES:.pm=.pir)
View
789 lib/GGE/Exp.pm
@@ -1,6 +1,27 @@
use v6;
use GGE::Match;
+class CodeString {
+ has Str $!contents = '';
+ my $counter = 0;
+
+ method emit($string, *@args, *%kwargs) {
+ $!contents ~= $string\
+ .subst(/\%(\d)/, { @args[$0] // '...' }, :g)\
+ .subst(/\%(\w)/, { %kwargs{$0} // '...' }, :g);
+ }
+
+ method escape($string) {
+ q['] ~ $string.trans( [ q['], q[\\] ] => [ q[\\'], q[\\\\] ] ) ~ q['];
+ }
+
+ method unique($prefix = '') {
+ $prefix ~ $counter++
+ }
+
+ method Str { $!contents }
+}
+
# a GGE::Exp describing what it contains, most commonly its .ast property,
# but sometimes other things.
role GGE::ShowContents {
@@ -9,36 +30,16 @@ role GGE::ShowContents {
}
}
-# The set of possible responses sent back to GGE::TreeSpider.
-
-# RAKUDO: Could name this one GGE::Exp::Actions or something, if enums
+# RAKUDO: Could name this one GGE::Exp::CUT or something, if enums
# with '::' in them worked, which they don't. [perl #71460]
-enum Action <
- DESCEND
- MATCH
- FAIL
- FAIL_GROUP
- FAIL_RULE
- BACKTRACK
->;
-
-role GGE::Backtracking {} # a GGE::Exp involved in backtracking
-role GGE::Container {} # a GGE::Exp containing other GGE::Exp nodes
-role GGE::MultiChild does GGE::Container {} # ...containing several...
-
-# RAKUDO: Blablabla, GGE::Exp::Cut, blablabla, see above. [perl #71460]
-enum Cut <
- CUT_GROUP
- CUT_RULE
- CUT_MATCH
->;
+enum CUT (
+ CUT_GROUP => -1,
+ CUT_RULE => -2,
+ CUT_MATCH => -3,
+);
class GGE::Exp is GGE::Match {
- method start($, $, %) { MATCH }
- method succeeded($, %) { MATCH }
- method failed($, %) { FAIL }
- method failed-group($, %) { FAIL_GROUP }
- method failed-rule($, %) { FAIL_RULE }
+ my $group;
method structure($indent = 0) {
# RAKUDO: The below was originally written as a map, but there's
@@ -58,22 +59,166 @@ class GGE::Exp is GGE::Match {
$contents ~= $inside;
' ' x $indent ~ self.WHAT.perl.subst(/^.*':'/, '') ~ $contents;
}
-}
-class GGE::Exp::Literal is GGE::Exp does GGE::ShowContents {
- method start($string, $pos is rw, %pad) {
- my $value = ~self.ast;
- if $pos < $string.chars
- && (self.hash-access('ignorecase')
- && $string.substr($pos, $value.chars).lc eq $value.lc
- || $string.substr($pos, $value.chars) eq $value) {
- $pos += $value.chars;
- MATCH
+ method compile(:$debug) {
+ my $source = self.root-p6(:$debug);
+ if $debug {
+ say $source;
+ say '';
}
- else {
- FAIL
+ my $binary = eval $source
+ or die ~$!;
+ return $binary;
+ }
+
+ method reduce() {
+ self;
+ }
+
+ method root-p6(:$debug) {
+ my $code = CodeString.new();
+ $code.unique(); # XXX: Remove this one when we do other real calls
+ $code.emit( q[[sub ($target, :$debug) {
+ my $mob = GGE::Match.new(:$target);
+ my $mfrom;
+ my $cpos = 0;
+ my $pos;
+ my $rep;
+ my $lastpos = $target.chars;
+ my $cutmark;
+ my @gpad; # TODO: PGE generates this one only when needed
+ my @ustack; # TODO: PGE generates this one only when needed
+ my $captscope = $mob; # TODO: PGE generates this one only when needed
+ my $captob; # TODO: PGE generates this one only when needed
+ my @cstack = 'try_match';
+ my &goto = -> $label { @cstack[*-1] = $label };
+ my &local-branch = -> $label {
+ @cstack[*-1] ~= '_cont';
+ @cstack.push($label)
+ };
+ my &local-return = -> { @cstack.pop };
+ loop {
+ given @cstack[*-1] {
+ when 'try_match' {
+ if $cpos > $lastpos { goto('fail_rule'); break; }
+ $mfrom = $pos = $cpos;
+ $cutmark = 0;
+ local-branch('R');
+ }
+ when 'try_match_cont' {
+ if $cutmark <= %0 { goto('fail_cut'); break; }
+ ++$cpos;
+ goto('try_match');
+ }
+ when 'fail_rule' {
+ # $cutmark = %0 # XXX: Not needed yet
+ goto('fail_cut');
+ }
+ when 'fail_cut' {
+ $mob.from = 0;
+ $mob.to = -2;
+ return $mob;
+ }
+ when 'succeed' {
+ $mob.from = $mfrom;
+ $mob.to = $pos;
+ return $mob;
+ }
+ when 'fail' {
+ local-return();
+ } ]], CUT_RULE);
+ my $explabel = 'R';
+ $GGE::Exp::group = self;
+ my $exp = self.reduce;
+ if $debug {
+ say $exp.structure;
+ say '';
+ }
+ $exp.p6($code, $explabel, 'succeed');
+ $code.emit( q[[
+ default {
+ die "No such label: {@cstack[*-1]}";
+ }
}
}
+} ]]);
+ }
+
+ method getargs($label, $next, %hash?) {
+ %hash<L S> = $label, $next;
+ if %hash.exists('quant') {
+ my $quant = %hash<quant>;
+ %hash<m> = $quant.hash-access('min');
+ %hash<M> = %hash<m> == 0 ?? '### ' !! '';
+ %hash<n> = $quant.hash-access('max');
+ %hash<N> = %hash<n> == Inf ?? '### ' !! '';
+ my $bt = $quant.hash-access('backtrack').name.lc;
+ %hash<Q> = sprintf '%s..%s (%s)', %hash<m>, %hash<n>, $bt;
+ }
+ return %hash;
+ }
+
+ method gencapture($label) {
+ my $cname = self.hash-access('cname');
+ my $captgen = CodeString.new;
+ my $captsave = CodeString.new;
+ my $captback = CodeString.new;
+ if self.hash-access('iscapture') {
+ if self.hash-access('isarray') {
+ $captsave.emit('$captscope[%0].push($captob);', $cname);
+ $captback.emit('$captscope[%0].pop();', $cname);
+ $captgen.emit( q[[if defined $captscope[%0] {
+ goto('%1_cgen');
+ break;
+ }
+ $captscope[%0] = [];
+ local-branch('%1_cgen');
+ }
+ when '%1_cont' {
+ $captscope[%0] = undef;
+ goto('fail');
+ }
+ when '%1_cgen' { ]], $cname, $label);
+ }
+ else {
+ if $cname.substr(0, 1) eq q['] {
+ $captsave.emit('$captscope.hash-access(%0) = $captob;',
+ $cname);
+ $captback.emit('$captscope.delete(%0);', $cname);
+ }
+ else {
+ $captsave.emit('$captscope[%0] = $captob;', $cname);
+ $captback.emit('$captscope[%0] = undef;', $cname);
+ }
+ }
+ }
+ # RAKUDO: Cannot do multiple returns yet.
+ return ($captgen, $captsave, $captback);
+ }
+}
+
+class GGE::Exp::Literal is GGE::Exp does GGE::ShowContents {
+ method p6($code, $label, $next) {
+ my %args = self.getargs($label, $next);
+ my $literal = self.ast;
+ my $litlen = $literal.chars;
+ %args<I> = '';
+ if self.hash-access('ignorecase') {
+ %args<I> = '.lc';
+ $literal .= lc;
+ }
+ $literal = $code.escape($literal);
+ $code.emit( q[
+ when '%L' {
+ if $pos + %0 > $lastpos
+ || $target.substr($pos, %0)%I ne %1 {
+ goto('fail');
+ break;
+ }
+ $pos += %0;
+ goto('%S');
+ } ], $litlen, $literal, |%args);
+ }
}
enum GGE_BACKTRACK <
@@ -82,7 +227,7 @@ enum GGE_BACKTRACK <
NONE
>;
-class GGE::Exp::Quant is GGE::Exp does GGE::Backtracking does GGE::Container {
+class GGE::Exp::Quant is GGE::Exp {
method contents() {
my ($min, $max, $bt) = map { self.hash-access($_) },
<min max backtrack>;
@@ -90,142 +235,240 @@ class GGE::Exp::Quant is GGE::Exp does GGE::Backtracking does GGE::Container {
"{$bt.name.lc} $min..$max"
}
- method start($_: $, $, %pad is rw) {
- %pad<reps> = 0;
- my $bt = .hash-access('backtrack') // GREEDY;
- if .hash-access('min') > 0 {
- DESCEND
- }
- elsif .hash-access('max') > 0 && $bt != EAGER {
- if %pad<reps> >= .hash-access('min') {
- (%pad<mempos> //= []).push(%pad<pos>);
+ method p6($code, $label, $next) {
+ my %args = self.getargs($label, $next, { quant => self });
+ my $replabel = $label ~ '_repeat';
+ my $nextlabel = $code.unique('R');
+ %args<c C> = 0, '### ';
+ given self.hash-access('backtrack') {
+ when EAGER {
+ $code.emit( q[[
+ when '%L' { # quant %Q eager
+ push @gpad, 0;
+ local-branch('%0');
}
- DESCEND
- }
- else {
- MATCH
- }
- }
-
- method succeeded($_: $, %pad is rw) {
- ++%pad<reps>;
- if (.hash-access('backtrack') // GREEDY) != EAGER
- && %pad<reps> < .hash-access('max') {
- if %pad<reps> > .hash-access('min') {
- (%pad<mempos> //= []).push(%pad<pos>);
+ when '%L_cont' {
+ pop @gpad;
+ goto('fail');
+ }
+ when '%0' {
+ $rep = @gpad[*-1];
+ %Mif $rep < %m { goto('%L_1'); break; }
+ pop @gpad;
+ push @ustack, $pos;
+ push @ustack, $rep;
+ local-branch('%S');
+ }
+ when '%0_cont' {
+ $rep = pop @ustack;
+ $pos = pop @ustack;
+ push @gpad, $rep;
+ goto('%L_1');
+ }
+ when '%L_1' {
+ %Nif $rep >= %n { goto('fail'); break; }
+ ++$rep;
+ @gpad[*-1] = $rep;
+ goto('%1');
+ } ]], $replabel, $nextlabel, |%args);
+ }
+ when NONE {
+ %args<c C> = $code.unique(), '';
+ if self.hash-access('min') != 0
+ || self.hash-access('max') != Inf {
+ continue;
+ }
+ $code.emit( q[[
+ when '%L' { # quant 0..Inf none
+ local-branch('%0');
+ }
+ when '%L_cont' {
+ if $cutmark != %c { goto('fail'); break; }
+ $cutmark = 0;
+ goto('fail');
+ }
+ when '%0' {
+ push @ustack, $pos;
+ local-branch('%1');
+ }
+ when '%0_cont' {
+ $pos = pop @ustack;
+ if $cutmark != 0 { goto('fail'); break; }
+ local-branch('%S');
+ }
+ when '%0_cont_cont' {
+ if $cutmark != 0 { goto('fail'); break; }
+ $cutmark = %c;
+ goto('fail');
+ } ]], $replabel, $nextlabel, |%args);
+ }
+ default {
+ $code.emit( q[[
+ when '%L' { # quant %Q greedy/none
+ push @gpad, 0;
+ local-branch('%0');
+ }
+ when '%L_cont' {
+ pop @gpad;
+ %Cif $cutmark != %c { goto('fail'); break; }
+ %C$cutmark = 0;
+ goto('fail');
+ }
+ when '%0' {
+ $rep = @gpad[*-1];
+ %Nif $rep >= %n { goto('%L_1'); break; }
+ ++$rep;
+ @gpad[*-1] = $rep;
+ push @ustack, $pos;
+ push @ustack, $rep;
+ local-branch('%1');
+ }
+ when '%0_cont' {
+ $rep = pop @ustack;
+ $pos = pop @ustack;
+ if $cutmark != 0 { goto('fail'); break; }
+ --$rep;
+ goto('%L_1');
+ }
+ when '%L_1' {
+ %Mif $rep < %m { goto('fail'); break; }
+ pop @gpad;
+ push @ustack, $rep;
+ local-branch('%S');
+ }
+ when '%L_1_cont' {
+ $rep = pop @ustack;
+ push @gpad, $rep;
+ if $cutmark != 0 { goto('fail'); break; }
+ %C$cutmark = %c;
+ goto('fail');
+ } ]], $replabel, $nextlabel, |%args);
}
- DESCEND
- }
- else {
- MATCH
- }
- }
-
- method failed($_: $pos, %pad is rw) {
- if %pad<reps> >= .hash-access('min') {
- MATCH
- }
- else {
- FAIL
- }
- }
-
- method backtracked($_: $pos is rw, %pad) {
- my $bt = .hash-access('backtrack') // GREEDY;
- if $bt == EAGER
- && %pad<reps> < .hash-access('max') {
- DESCEND
- }
- elsif $bt == GREEDY && +%pad<mempos> {
- $pos = pop %pad<mempos>;
- MATCH
- }
- else {
- FAIL
}
+ self[0].p6($code, $nextlabel, $replabel);
}
}
class GGE::Exp::CCShortcut is GGE::Exp does GGE::ShowContents {
- method start($string, $pos is rw, %pad) {
- my $cc-char = self.ast.substr(1);
- if $pos >= $string.chars {
- FAIL
- }
- elsif self.ast eq '.'
- || self.ast eq '\\N' && !($string.substr($pos, 1) eq "\n"|"\r")
- || self.ast eq '\\s' && $string.substr($pos, 1) ~~ /\s/
- || self.ast eq '\\S' && $string.substr($pos, 1) ~~ /\S/
- || self.ast eq '\\w' && $string.substr($pos, 1) ~~ /\w/
- || self.ast eq '\\W' && $string.substr($pos, 1) ~~ /\W/
- || self.ast eq '\\d' && $string.substr($pos, 1) ~~ /\d/
- || self.ast eq '\\D' && $string.substr($pos, 1) ~~ /\D/ {
- ++$pos;
- MATCH
- }
- else {
- FAIL
- }
+ method p6($code, $label, $next) {
+ my $failcond = self.ast eq '.'
+ ?? 'False'
+ !! sprintf '$target.substr($pos, 1) !~~ /%s/', self.ast;
+ $code.emit( q[
+ when '%0' { # ccshortcut %1
+ if $pos >= $lastpos || %2 {
+ goto('fail');
+ break;
+ }
+ ++$pos;
+ goto('%3');
+ } ], $label, self.ast, $failcond, $next );
}
}
class GGE::Exp::Newline is GGE::Exp does GGE::ShowContents {
- method start($string, $pos is rw, %pad) {
- if $pos >= $string.chars {
- FAIL
- }
- elsif $string.substr($pos, 2) eq "\r\n" {
- $pos += 2;
- MATCH
- }
- elsif $string.substr($pos, 1) eq "\n"|"\r" {
- ++$pos;
- MATCH
- }
- else {
- FAIL
- }
+ method p6($code, $label, $next) {
+ $code.emit( q[
+ when '%0' { # newline
+ unless $target.substr($pos, 1) eq "\n"|"\r" {
+ goto('fail');
+ break;
+ }
+ my $twochars = $target.substr($pos, 2);
+ ++$pos;
+ if $twochars eq "\r\n" {
+ ++$pos;
+ }
+ goto('%1');
+ } ], $label, $next);
}
}
class GGE::Exp::Anchor is GGE::Exp does GGE::ShowContents {
- method start($string, $pos is rw, %pad) {
- my $matches = self.ast eq '^' && $pos == 0
- || self.ast eq '$' && $pos == $string.chars
- || self.ast eq '<<' && $string.substr($pos, 1) ~~ /\w/
- && ($pos == 0 || $string.substr($pos - 1, 1) !~~ /\w/)
- || self.ast eq '>>' && $pos > 0
- && $string.substr($pos - 1, 1) ~~ /\w/
- && ($pos == $string.chars || $string.substr($pos, 1) !~~ /\w/)
- || self.ast eq '^^' && ($pos == 0 || $pos < $string.chars
- && $string.substr($pos - 1, 1) eq "\n")
- || self.ast eq '$$' && ($string.substr($pos, 1) eq "\n"
- || $pos == $string.chars
- && ($pos < 1 || $string.substr($pos - 1, 1) ne "\n"));
- $matches ?? MATCH !! FAIL;
+ method p6($code, $label, $next) {
+ $code.emit( q[
+ when '%0' { # anchor %1 ], $label, self.ast );
+ given self.ast {
+ when '^' {
+ $code.emit( q[
+ if $pos == 0 { goto('%0'); break; }
+ goto('fail'); ], $next );
+ }
+ when '$' {
+ $code.emit( q[
+ if $pos == $lastpos { goto('%0'); break; }
+ goto('fail'); ], $next );
+ }
+ when '<<' {
+ $code.emit( q[
+ if $target.substr($pos, 1) ~~ /\w/
+ && ($pos == 0 || $target.substr($pos - 1, 1) !~~ /\w/) {
+ goto('%0');
+ break;
+ }
+ goto('fail'); ], $next );
+ }
+ when '>>' {
+ $code.emit( q[
+ if $pos > 0 && $target.substr($pos - 1, 1) ~~ /\w/
+ && ($pos == $lastpos || $target.substr($pos, 1) !~~ /\w/) {
+ goto('%0');
+ break;
+ }
+ goto('fail'); ], $next );
+ }
+ when '^^' {
+ $code.emit( q[
+ if $pos == 0 || $pos < $lastpos
+ && $target.substr($pos - 1, 1) eq "\n" {
+ goto('%0');
+ break;
+ }
+ goto('fail'); ], $next );
+ }
+ when '$$' {
+ $code.emit( q[
+ if $target.substr($pos, 1) eq "\n"
+ || $pos == $lastpos
+ && ($pos < 1 || $target.substr($pos - 1, 1) ne "\n") {
+ goto('%0');
+ break;
+ }
+ goto('fail'); ], $next );
+ }
+ }
+ $code.emit( q[
+ } ]);
}
}
-class GGE::Exp::Concat is GGE::Exp does GGE::MultiChild {
- method start($, $, %pad is rw) {
- %pad<child> = 0;
- DESCEND
+class GGE::Exp::Concat is GGE::Exp {
+ method reduce() {
+ my $n = self.elems;
+ my @old-children = self.llist;
+ self.clear;
+ for @old-children -> $old-child {
+ my $new-child = $old-child.reduce();
+ self.push($new-child);
+ }
+ return self.llist == 1 ?? self[0] !! self;
}
- method succeeded($, %pad is rw) {
- if ++%pad<child> == self.elems {
- MATCH
- }
- else {
- DESCEND
+ method p6($code, $label, $next) {
+ $code.emit( q[
+ # concat ]);
+ my $cl = $label;
+ my $nl;
+ my $end = self.llist.elems - 1;
+ for self.llist.kv -> $i, $child {
+ $nl = $i == $end ?? $next !! $code.unique('R');
+ $child.p6($code, $cl, $nl);
+ $cl = $nl;
}
}
}
-class GGE::Exp::Modifier is GGE::Exp
- does GGE::ShowContents
- does GGE::Container
-{
+class GGE::Exp::Modifier is GGE::Exp does GGE::ShowContents {
method contents() {
self.hash-access('key');
}
@@ -241,94 +484,210 @@ class GGE::Exp::EnumCharList is GGE::Exp does GGE::ShowContents {
qq[$zw$neg$list]
}
- method start($string, $pos is rw, %pad) {
- if $pos >= $string.chars && !self.hash-access('iszerowidth') {
- FAIL
- }
- elsif defined(self.ast.index($string.substr($pos, 1)))
- xor self.hash-access('isnegated') {
- unless self.hash-access('iszerowidth') {
+ method p6($code, $label, $next) {
+ my $test = self.hash-access('isnegated') ?? 'defined' !! '!defined';
+ my $charlist = $code.escape(self.ast);
+ $code.emit( q[
+ when '%0' {
+ if $pos >= $lastpos
+ || %1 %2.index($target.substr($pos, 1)) {
+ goto('fail');
+ break;
+ }
++$pos;
- }
- MATCH
- }
- else {
- FAIL
- }
+ goto('%3');
+ } ], $label, $test, $charlist, $next);
}
}
-class GGE::Exp::Alt is GGE::Exp does GGE::MultiChild does GGE::Backtracking {
- method start($, $pos, %pad) {
- %pad<child> = 0;
- %pad<orig-pos> = $pos;
- DESCEND
- }
-
- method failed($pos is rw, %pad is rw) {
- FAIL
+class GGE::Exp::Alt is GGE::Exp {
+ method reduce() {
+ self[0] .= reduce;
+ self[1] .= reduce;
+ return self;
}
- method backtracked($pos is rw, %pad is rw) {
- if %pad<child> {
- FAIL
- }
- else {
- $pos = %pad<orig-pos>;
- %pad<child> = 1;
- DESCEND
- }
+ method p6($code, $label, $next) {
+ my $exp0label = $code.unique('R');
+ my $exp1label = $code.unique('R');
+ $code.emit( q[
+ when '%0' { # alt %1, %2
+ push @ustack, $pos;
+ local-branch('%1');
+ }
+ when '%0_cont' {
+ $pos = pop @ustack;
+ if $cutmark != 0 { goto('fail'); break; }
+ goto('%2');
+ } ], $label, $exp0label, $exp1label);
+ self[0].p6($code, $exp0label, $next);
+ self[1].p6($code, $exp1label, $next);
}
}
-class GGE::Exp::Conj is GGE::Exp does GGE::MultiChild {
- method start($, $pos, %pad) {
- %pad<child> = 0;
- %pad<orig-pos> = $pos;
- DESCEND
+class GGE::Exp::Conj is GGE::Exp {
+ method reduce() {
+ self[0] .= reduce;
+ self[1] .= reduce;
+ return self;
}
- method succeeded($pos is rw, %pad) {
- if %pad<child> {
- if $pos == %pad<firstmatch-pos> {
- MATCH
+ method p6($code, $label, $next) {
+ my $exp0label = $code.unique('R');
+ my $exp1label = $code.unique('R');
+ my $chk0label = $label ~ '_chk0';
+ my $chk1label = $label ~ '_chk1';
+ $code.emit( q[[
+ when '%0' { # conj %1, %2
+ push @gpad, $pos, $pos;
+ local-branch('%1');
}
- else {
- FAIL
+ when '%0_cont' {
+ pop @gpad;
+ pop @gpad;
+ goto('fail');
}
- }
- else {
- %pad<firstmatch-pos> = $pos;
- $pos = %pad<orig-pos>;
- %pad<child> = 1;
- DESCEND
- }
+ when '%3' {
+ @gpad[*-1] = $pos;
+ $pos = @gpad[*-2];
+ goto('%2');
+ }
+ when '%4' {
+ if $pos != @gpad[*-1] {
+ goto('fail');
+ break;
+ }
+ my $p1 = pop @gpad;
+ my $p2 = pop @gpad;
+ push @ustack, $p2, $p1;
+ local-branch('%5');
+ }
+ when '%4_cont' {
+ my $p1 = pop @ustack;
+ my $p2 = pop @ustack;
+ push @gpad, $p2, $p1;
+ goto('fail');
+ } ]], $label, $exp0label, $exp1label, $chk0label, $chk1label,
+ $next);
+ self[0].p6($code, $exp0label, $chk0label);
+ self[1].p6($code, $exp1label, $chk1label);
}
}
-class GGE::Exp::Group is GGE::Exp does GGE::Container {
- method start($, $, %) { DESCEND }
- method failed-group($, %) { FAIL }
+class GGE::Exp::Group is GGE::Exp {
+ method reduce() {
+ my $group = $GGE::Exp::group;
+ $GGE::Exp::group = self;
+ self[0] .= reduce;
+ $GGE::Exp::group = $group;
+ return self.exists('cutmark') && self.hash-access('cutmark') > 0
+ || self.exists('iscapture') && self.hash-access('iscapture') != 0
+ ?? self
+ !! self[0];
+ }
+
+ method p6($code, $label, $next) {
+ self[0].p6($code, $label, $next);
+ }
}
-class GGE::Exp::CGroup is GGE::Exp::Group does GGE::Backtracking {
- method start($, $, %) { DESCEND }
- method failed-group($, %) { FAIL }
- method backtracked($, %) { FAIL }
+class GGE::Exp::CGroup is GGE::Exp::Group {
+ method p6($code, $label, $next) {
+ my $explabel = $code.unique('R');
+ my $expnext = $label ~ '_close';
+ my %args = self.getargs($label, $next);
+ my ($captgen, $captsave, $captback) = self.gencapture($label);
+ %args<c C> = self.hash-access('cutmark'), '### ';
+ %args<X> = self.hash-access('isscope') ?? '' !! '### ';
+ $code.emit( q[[
+ when '%L' { # capture
+ %0
+ goto('%L_1');
+ }
+ when '%L_1' {
+ $captob = $captscope.new($captscope);
+ $captob.from = $pos; # XXX: PGE uses .pos here somehow.
+ push @gpad, $captscope;
+ push @gpad, $captob;
+ %X$captscope = $captob;
+ local-branch('%E');
+ }
+ when '%L_1_cont' {
+ $captob = pop @gpad;
+ $captscope = pop @gpad;
+ %Cif $cutmark != %c { goto('fail'); break; }
+ %C$cutmark = 0;
+ goto('fail');
+ }
+ when '%L_close' {
+ push @ustack, $captscope;
+ $captob = pop @gpad;
+ $captscope = pop @gpad;
+ $captob.to = $pos;
+ %1
+ push @ustack, $captob;
+ local-branch('%S');
+ }
+ when '%L_close_cont' {
+ $captob = pop @ustack;
+ %2
+ push @gpad, $captscope;
+ push @gpad, $captob;
+ $captscope = pop @ustack;
+ goto('fail');
+ } ]], $captgen, $captsave, $captback, :E($explabel), |%args);
+ self[0].p6($code, $explabel, $expnext);
+ }
}
-class GGE::Exp::Cut is GGE::Exp does GGE::Backtracking {
- method backtracked($pos, %pad) {
- if self.hash-access('cutmark') == CUT_GROUP {
- FAIL_GROUP
- }
- else {
- FAIL_RULE
+class GGE::Exp::Cut is GGE::Exp {
+ method reduce() {
+ if self.hash-access('cutmark') > CUT_RULE {
+ my $group = $GGE::Exp::group;
+ if !$group.hash-access('cutmark') {
+ $group.hash-access('cutmark') = CodeString.unique();
+ }
+ self.hash-access('cutmark') = $group.hash-access('cutmark');
}
+ return self;
+ }
+
+ method p6($code, $label, $next) {
+ my $cutmark = self.hash-access('cutmark') // 'NO_CUTMARK';
+ $code.emit( q[
+ when '%0' { # cut %2
+ local-branch('%1');
+ }
+ when '%0_cont' {
+ $cutmark = %2;
+ goto('fail');
+ } ], $label, $next, $cutmark);
}
}
class GGE::Exp::Scalar is GGE::Exp does GGE::ShowContents {
+ method p6($code, $label, $next) {
+ my $cname = self.hash-access('cname');
+ my $C = $cname.substr(0, 1) eq q[']
+ ?? '$mob.hash-access(' ~ $cname ~ ')'
+ !! '$mob[' ~ $cname ~ ']';
+ $code.emit( q[[
+ when '%0' { # scalar %2
+ my $capture = %C;
+ if $capture ~~ Array {
+ $capture = $capture[*-1];
+ }
+ my $length = $capture.chars;
+ if $pos + $length > $lastpos
+ || $target.substr($pos, $length) ne $capture {
+ goto('fail');
+ break;
+ }
+ $pos += $length;
+ goto('%1');
+ } ]], $label, $next, $cname, :$C);
+ return;
+ }
}
class GGE::Exp::Alias is GGE::Exp {
View
253 lib/GGE/Perl6Regex.pm
@@ -2,43 +2,62 @@ use v6;
use GGE::Match;
use GGE::Exp;
use GGE::OPTable;
-use GGE::TreeSpider;
-
-class GGE::Exp::WS is GGE::Exp does GGE::Backtracking {
- # XXX: This class should really derive from GGE::Exp::Subrule, but
- # that class hasn't been implemented yet, so...
- method start($string, $pos is rw, %pad) {
- %pad<from> = $pos;
- if $pos >= $string.chars {
- %pad<mpos> = $pos;
- MATCH
- }
- elsif $pos == 0 || $string.substr($pos, 1) ~~ /\W/
- || $string.substr($pos - 1, 1) ~~ /\W/ {
- while $pos < $string.chars && $string.substr($pos, 1) ~~ /\s/ {
- ++$pos;
- }
- %pad<mpos> = $pos;
- MATCH
- }
- else {
- FAIL
- }
- }
- method backtracked($_: $pos is rw, %pad) {
- $pos = --%pad<mpos>;
- if $pos >= %pad<from> {
- MATCH
- }
- else {
- FAIL
- }
+class GGE::Exp::WS is GGE::Exp {
+ # The below code is a working implementation of <.ws>, but it shouldn't
+ # be defined here. It should be defined in a method called 'ws' in the
+ # GGE::Match class. However, before we start calling other rules, this
+ # will do.
+ method p6($code, $label, $next) {
+ my %args = self.getargs($label, $next);
+ my $replabel = $label ~ '_repeat';
+ $code.emit( q[[
+ when '%L' { # ws
+ if $pos >= $lastpos {
+ goto('%S');
+ }
+ elsif $pos == 0 || $target.substr($pos, 1) ~~ /\W/
+ || $target.substr($pos - 1, 1) ~~ /\W/ {
+ push @gpad, 0;
+ local-branch('%0');
+ }
+ else {
+ goto('fail');
+ }
+ }
+ when '%L_cont' {
+ pop @gpad;
+ goto('fail');
+ }
+ when '%0' {
+ $rep = @gpad[*-1];
+ ++$rep;
+ if $target.substr($pos, 1) ~~ /\s/ {
+ ++$pos;
+ goto('%0');
+ break;
+ }
+ if $cutmark != 0 { goto('fail'); break; }
+ --$rep;
+ goto('%L_1')
+ }
+ when '%L_1' {
+ pop @gpad;
+ push @ustack, $rep;
+ local-branch('%S');
+ }
+ when '%L_1_cont' {
+ $rep = pop @ustack;
+ push @gpad, $rep;
+ if $cutmark != 0 { goto('fail'); break; }
+ goto('fail');
+ } ]], $replabel, |%args);
}
}
class GGE::Perl6Regex {
- has $!regex;
+ has GGE::Exp $!exp;
+ has Callable $!binary;
my &unescape = -> @codes { join '', map { chr(:16($_)) }, @codes };
my $h-whitespace = unescape <0009 0020 00a0 1680 180e 2000 2001 2002 2003
@@ -54,95 +73,93 @@ class GGE::Perl6Regex {
't' => "\t",
;
- method new($pattern) {
- my $optable = GGE::OPTable.new();
- $optable.newtok('term:', :precedence('='),
- :nows, :parsed(&GGE::Perl6Regex::parse_term));
- $optable.newtok('term:#', :equiv<term:>,
- :nows, :parsed(&GGE::Perl6Regex::parse_term_ws));
- $optable.newtok('term:\\', :equiv<term:>,
- :nows, :parsed(&GGE::Perl6Regex::parse_term_backslash));
- $optable.newtok('term:^', :equiv<term:>,
- :nows, :match(GGE::Exp::Anchor));
- $optable.newtok('term:^^', :equiv<term:>,
- :nows, :match(GGE::Exp::Anchor));
- $optable.newtok('term:$$', :equiv<term:>,
- :nows, :match(GGE::Exp::Anchor));
- $optable.newtok('term:<<', :equiv<term:>,
- :nows, :match(GGE::Exp::Anchor));
- $optable.newtok('term:>>', :equiv<term:>,
- :nows, :match(GGE::Exp::Anchor));
- $optable.newtok('term:.', :equiv<term:>,
- :nows, :match(GGE::Exp::CCShortcut));
- $optable.newtok('term:\\d', :equiv<term:>,
- :nows, :match(GGE::Exp::CCShortcut));
- $optable.newtok('term:\\D', :equiv<term:>,
- :nows, :match(GGE::Exp::CCShortcut));
- $optable.newtok('term:\\s', :equiv<term:>,
- :nows, :match(GGE::Exp::CCShortcut));
- $optable.newtok('term:\\S', :equiv<term:>,
- :nows, :match(GGE::Exp::CCShortcut));
- $optable.newtok('term:\\w', :equiv<term:>,
- :nows, :match(GGE::Exp::CCShortcut));
- $optable.newtok('term:\\W', :equiv<term:>,
- :nows, :match(GGE::Exp::CCShortcut));
- $optable.newtok('term:\\N', :equiv<term:>,
- :nows, :match(GGE::Exp::CCShortcut));
- $optable.newtok('term:\\n', :equiv<term:>,
- :nows, :match(GGE::Exp::Newline));
- $optable.newtok('term:$', :equiv<term:>,
- :nows, :parsed(&GGE::Perl6Regex::parse_dollar));
- $optable.newtok('term:<[', :equiv<term:>,
- :nows, :parsed(&GGE::Perl6Regex::parse_enumcharclass));
- $optable.newtok('term:<-', :equiv<term:>,
- :nows, :parsed(&GGE::Perl6Regex::parse_enumcharclass));
- $optable.newtok("term:'", :equiv<term:>,
- :nows, :parsed(&GGE::Perl6Regex::parse_quoted_literal));
- $optable.newtok('term:::', :equiv<term:>,
- :nows, :match(GGE::Exp::Cut));
- $optable.newtok('term::::', :equiv<term:>,
- :nows, :match(GGE::Exp::Cut));
- $optable.newtok('term:<commit>', :equiv<term:>,
- :nows, :match(GGE::Exp::Cut));
- $optable.newtok('circumfix:[ ]', :equiv<term:>,
- :nows, :match(GGE::Exp::Group));
- $optable.newtok('circumfix:( )', :equiv<term:>,
- :nows, :match(GGE::Exp::CGroup));
- $optable.newtok('postfix:*', :looser<term:>,
- :parsed(&GGE::Perl6Regex::parse_quant));
- $optable.newtok('postfix:+', :equiv<postfix:*>,
- :parsed(&GGE::Perl6Regex::parse_quant));
- $optable.newtok('postfix:?', :equiv<postfix:*>,
- :parsed(&GGE::Perl6Regex::parse_quant));
- $optable.newtok('postfix::', :equiv<postfix:*>,
- :parsed(&GGE::Perl6Regex::parse_quant));
- $optable.newtok('postfix:**', :equiv<postfix:*>,
- :parsed(&GGE::Perl6Regex::parse_quant));
- $optable.newtok('infix:', :looser<postfix:*>, :assoc<list>,
- :nows, :match(GGE::Exp::Concat));
- $optable.newtok('infix:&', :looser<infix:>,
- :nows, :match(GGE::Exp::Conj));
- $optable.newtok('infix:|', :looser<infix:&>,
- :nows, :match(GGE::Exp::Alt));
- $optable.newtok('prefix:|', :equiv<infix:|>,
- :nows, :match(GGE::Exp::Alt));
- $optable.newtok('infix:=', :tighter<infix:>, :assoc<right>,
- :match(GGE::Exp::Alias));
- $optable.newtok('prefix::', :looser<infix:|>,
- :parsed(&GGE::Perl6Regex::parse_modifier));
+ my $optable = GGE::OPTable.new();
+ $optable.newtok('term:', :precedence('='),
+ :nows, :parsed(&GGE::Perl6Regex::parse_term));
+ $optable.newtok('term:#', :equiv<term:>,
+ :nows, :parsed(&GGE::Perl6Regex::parse_term_ws));
+ $optable.newtok('term:\\', :equiv<term:>,
+ :nows, :parsed(&GGE::Perl6Regex::parse_term_backslash));
+ $optable.newtok('term:^', :equiv<term:>,
+ :nows, :match(GGE::Exp::Anchor));
+ $optable.newtok('term:^^', :equiv<term:>,
+ :nows, :match(GGE::Exp::Anchor));
+ $optable.newtok('term:$$', :equiv<term:>,
+ :nows, :match(GGE::Exp::Anchor));
+ $optable.newtok('term:<<', :equiv<term:>,
+ :nows, :match(GGE::Exp::Anchor));
+ $optable.newtok('term:>>', :equiv<term:>,
+ :nows, :match(GGE::Exp::Anchor));
+ $optable.newtok('term:.', :equiv<term:>,
+ :nows, :match(GGE::Exp::CCShortcut));
+ $optable.newtok('term:\\d', :equiv<term:>,
+ :nows, :match(GGE::Exp::CCShortcut));
+ $optable.newtok('term:\\D', :equiv<term:>,
+ :nows, :match(GGE::Exp::CCShortcut));
+ $optable.newtok('term:\\s', :equiv<term:>,
+ :nows, :match(GGE::Exp::CCShortcut));
+ $optable.newtok('term:\\S', :equiv<term:>,
+ :nows, :match(GGE::Exp::CCShortcut));
+ $optable.newtok('term:\\w', :equiv<term:>,
+ :nows, :match(GGE::Exp::CCShortcut));
+ $optable.newtok('term:\\W', :equiv<term:>,
+ :nows, :match(GGE::Exp::CCShortcut));
+ $optable.newtok('term:\\N', :equiv<term:>,
+ :nows, :match(GGE::Exp::CCShortcut));
+ $optable.newtok('term:\\n', :equiv<term:>,
+ :nows, :match(GGE::Exp::Newline));
+ $optable.newtok('term:$', :equiv<term:>,
+ :nows, :parsed(&GGE::Perl6Regex::parse_dollar));
+ $optable.newtok('term:<[', :equiv<term:>,
+ :nows, :parsed(&GGE::Perl6Regex::parse_enumcharclass));
+ $optable.newtok('term:<-', :equiv<term:>,
+ :nows, :parsed(&GGE::Perl6Regex::parse_enumcharclass));
+ $optable.newtok("term:'", :equiv<term:>,
+ :nows, :parsed(&GGE::Perl6Regex::parse_quoted_literal));
+ $optable.newtok('term:::', :equiv<term:>,
+ :nows, :match(GGE::Exp::Cut));
+ $optable.newtok('term::::', :equiv<term:>,
+ :nows, :match(GGE::Exp::Cut));
+ $optable.newtok('term:<commit>', :equiv<term:>,
+ :nows, :match(GGE::Exp::Cut));
+ $optable.newtok('circumfix:[ ]', :equiv<term:>,
+ :nows, :match(GGE::Exp::Group));
+ $optable.newtok('circumfix:( )', :equiv<term:>,
+ :nows, :match(GGE::Exp::CGroup));
+ $optable.newtok('postfix:*', :looser<term:>,
+ :parsed(&GGE::Perl6Regex::parse_quant));
+ $optable.newtok('postfix:+', :equiv<postfix:*>,
+ :parsed(&GGE::Perl6Regex::parse_quant));
+ $optable.newtok('postfix:?', :equiv<postfix:*>,
+ :parsed(&GGE::Perl6Regex::parse_quant));
+ $optable.newtok('postfix::', :equiv<postfix:*>,
+ :parsed(&GGE::Perl6Regex::parse_quant));
+ $optable.newtok('postfix:**', :equiv<postfix:*>,
+ :parsed(&GGE::Perl6Regex::parse_quant));
+ $optable.newtok('infix:', :looser<postfix:*>, :assoc<list>,
+ :nows, :match(GGE::Exp::Concat));
+ $optable.newtok('infix:&', :looser<infix:>,
+ :nows, :match(GGE::Exp::Conj));
+ $optable.newtok('infix:|', :looser<infix:&>,
+ :nows, :match(GGE::Exp::Alt));
+ $optable.newtok('prefix:|', :equiv<infix:|>,
+ :nows, :match(GGE::Exp::Alt));
+ $optable.newtok('infix:=', :tighter<infix:>, :assoc<right>,
+ :match(GGE::Exp::Alias));
+ $optable.newtok('prefix::', :looser<infix:|>,
+ :parsed(&GGE::Perl6Regex::parse_modifier));
+
+ method new($pattern, :$debug) {
my $match = $optable.parse($pattern);
die 'Perl6Regex rule error: can not parse expression'
if $match.to < $pattern.chars;
- my $expr = $match.hash-access('expr');
- return self.bless(*, :regex(perl6exp($expr, {})));
+ my $exp = perl6exp($match.hash-access('expr'), {});
+ my $binary = $exp.compile(:$debug);
+ return self.bless(*, :$exp, :$binary);
}
method postcircumfix:<( )>($target, :$debug) {
- if $debug {
- say $!regex.structure;
- say '';
- }
- GGE::TreeSpider.new(:$!regex, :$target, :pos(*)).crawl(:$debug);
+ $!binary($target, :$debug);
}
sub parse_term($mob) {
@@ -405,6 +422,9 @@ class GGE::Perl6Regex {
if $m.target.substr($m.to, 2) eq '..' {
$m.to += 2;
$m.hash-access('max') = $m.target.substr($m.to, 1);
+ if $m.hash-access('max') eq '*' {
+ $m.hash-access('max') = 'Inf';
+ }
++$m.to;
}
if $brackets {
@@ -493,7 +513,7 @@ class GGE::Perl6Regex {
%pad{$key} = $exp.ast;
$exp[0] = perl6exp($exp[0], %pad);
%pad{$key} = $temp;
- return $exp;
+ return $exp[0];
}
multi sub perl6exp(GGE::Exp::Concat $exp is rw, %pad) {
@@ -556,6 +576,7 @@ class GGE::Perl6Regex {
}
multi sub perl6exp(GGE::Exp::CGroup $exp is rw, %pad) {
+ $exp.hash-access('iscapture') = True;
unless $exp.exists('isscope') {
$exp.hash-access('isscope') = True;
}
View
236 lib/GGE/TreeSpider.pm
@@ -1,236 +0,0 @@
-use v6;
-use GGE::Exp;
-
-class GGE::TreeSpider {
- has GGE::Exp $!top;
- has Str $!target;
- has Int $!from;
- has Int $!pos;
- has Bool $!iterate-positions;
- has GGE::Match $!match;
- has @!capstack;
-
- has GGE::Exp $!current;
- has Int $!pos;
- has Action $!last;
- # RAKUDO: Originally had @!nodestack typed as 'GGE::Exp', but that
- # triggered a bug.
- has @!nodestack;
- has @!padstack;
- has @!savepoints;
-
- submethod BUILD(GGE::Exp :$regex!, Str :$!target!, :$pos!) {
- $!top = $regex;
- # RAKUDO: Smartmatch on type yields an Int, must convert to Bool
- # manually. [perl #71462]
- if $!iterate-positions = ?($pos ~~ Whatever) {
- $!from = 0;
- }
- else {
- $!from = $pos;
- }
- }
-
- method crawl(:$debug) {
- my &debug = $debug ?? -> *@_ { $*ERR.say(|@_) } !! -> *@_ { ; };
- $!match = GGE::Match.new(:target($!target));
- my @start-positions = $!iterate-positions ?? ^$!target.chars !! $!from;
- for @start-positions -> $start-position {
- debug 'Starting at position ', $start-position;
- @!savepoints = ();
- $!match.from = $!pos = $start-position;
- $!match.clear;
- @!capstack = $!match;
- $!current = $!top;
- $!last = DESCEND;
- loop {
- my %pad = $!last == DESCEND ?? (:pos($!pos)) !! pop @!padstack;
- my $nodename = $!current.WHAT.perl.subst(/.* '::'/, '');
- if $!current.?contents {
- $nodename ~= '(' ~ $!current.contents ~ ')';
- }
- my $fragment = ($!target ~ '«END»').substr($!pos, 5)\
- .trans( [ "\n", "\t" ] => [ "\\n", "\\t" ] )\
- .substr(0, 5);
- if $!last == BACKTRACK|FAIL|FAIL_GROUP {
- $!pos = %pad<pos>;
- }
- if $!current ~~ GGE::Exp::CGroup
- && $!last == BACKTRACK {
- my $cname = $!current.hash-access('cname');
- @!capstack[*-1].[$cname] = undef;
- }
- if $!current ~~ GGE::Exp::Quant
- && $!current[0] ~~ GGE::Exp::CGroup
- && $!last == DESCEND {
- @!capstack.push([]);
- }
- if $!current ~~ GGE::Exp::Quant
- && $!current.hash-access('backtrack') == NONE
- && $!last == DESCEND {
- %pad<ratchet-savepoints> = +@!savepoints;
- }
- elsif $!current ~~ GGE::Exp::Group
- && $!last == DESCEND {
- %pad<group-savepoints> = +@!savepoints;
- }
- my $action = do given $!last {
- when DESCEND { $!current.start($!target, $!pos, %pad) }
- when MATCH { $!current.succeeded($!pos, %pad) }
- when FAIL { $!current.failed($!pos, %pad) }
- when BACKTRACK { $!current.backtracked($!pos, %pad) }
- when FAIL_GROUP { $!current.failed-group($!pos, %pad) }
- when FAIL_RULE { $!current.failed-rule($!pos, %pad) }
- when * { die 'Unknown action ', $!last.name }
- };
- if $!current ~~ GGE::Exp::Scalar {
- # Find a capture that is a scope.
- my $ix = @!capstack.end;
- --$ix
- while $ix > 0
- && @!capstack[$ix] !~~ Array
- && ! @!capstack[$ix].hash-access('isscope');
- my $topcap = @!capstack[$ix];
- my $cname = $!current.hash-access('cname');
- my $subrule = $cname.substr(0, 1) eq "'"
- ?? $topcap.hash-access($cname.substr(1, -1))
- !! $topcap[$cname];
- my $backref = $subrule ~~ Array
- ?? ~$subrule[*-1]
- !! ~$subrule;
- if $!pos <= $!target.chars - $backref.chars
- && $!target.substr($!pos, $backref.chars) eq $backref {
- $!pos += $backref.chars;
- }
- else {
- $action = FAIL;
- }
- }
- if $action != DESCEND
- && ($!last == BACKTRACK || !($!current ~~ GGE::Container)) {
- my $participle
- = $!last == BACKTRACK ?? 'backtracking' !! 'matching';
- debug sprintf '%-20s %12s "%-5s": %s',
- $nodename,
- $participle,
- $fragment,
- $action.name;
- }
- %pad<pos> = $!pos;
- push @!padstack, \%pad;
- if $!last == DESCEND {
- push @!nodestack, $!current;
- }
- if $!current ~~ GGE::Exp::Alt
- && $!last == DESCEND {
- @!savepoints.push(
- [[@!nodestack.list], [@!padstack.list]]
- );
- }
- if $!current ~~ GGE::Backtracking && $action == MATCH {
- if $!current ~~ GGE::Exp::Quant
- && $!current.hash-access('backtrack') == NONE {
- @!savepoints.=splice(0, %pad<ratchet-savepoints>);
- }
- else {
- @!savepoints.push(
- [[@!nodestack.list], [@!padstack.list]]
- );
- }
- }
- if $!current ~~ GGE::Exp::CGroup {
- given $action {
- when DESCEND {
- my $cap = GGE::Match.new( :target($!target),
- :from($!pos) );
- $cap.hash-access('isscope')
- = $!current.hash-access('isscope');
- @!capstack.push($cap);
- }
- when MATCH {
- my $cap = @!capstack.pop;
- $cap.to = $!pos;
- $cap.delete('isscope');
- # Find a capture that is a scope.
- my $ix = @!capstack.end;
- --$ix
- while $ix > 0
- && @!capstack[$ix] !~~ Array
- && ! @!capstack[$ix]\
- .hash-access('isscope');
- my $topcap = @!capstack[$ix];
- if $topcap ~~ Array {
- $topcap.push($cap);
- }
- elsif $!current.hash-access('isarray') {
- my $cname = $!current.hash-access('cname');
- ($topcap[$cname] //= []).push($cap);
- }
- else {
- my $cname = $!current.hash-access('cname');
- if $cname.substr(0, 1) eq q['] {
- my $key = $cname.substr(1, -1);
- $topcap.hash-access($key) = $cap;
- }
- else {
- $topcap[$cname] = $cap;
- }
- }
- }
- when FAIL | FAIL_GROUP | FAIL_RULE {
- if $!last != BACKTRACK {
- @!capstack.pop;
- }
- }
- }
- }
- elsif $!current ~~ GGE::Exp::Quant
- && $!current[0] ~~ GGE::Exp::CGroup
- && $action == MATCH {
- my $array = @!capstack.pop;
- my $cname = $!current[0].hash-access('cname');
- @!capstack[*-1].[$cname] = $array;
- }
- elsif $!current ~~ GGE::Exp::Group && $!last == FAIL_GROUP {
- @!savepoints.=splice(0, %pad<group-savepoints>);
- }
- if $action == DESCEND {
- $!current = $!current[ $!current ~~ GGE::MultiChild
- ?? %pad<child> !! 0 ];
- }
- else {
- pop @!nodestack;
- if $action == FAIL && ! @!nodestack && @!savepoints {
- my @sp = @!savepoints.pop.list;
- @!nodestack = @sp[0].list;
- @!padstack = @sp[1].list;
- $!current = @!nodestack[*-1];
- $!last = BACKTRACK;
- next;
- }
- if @!nodestack {
- $!current = @!nodestack[*-1];
- pop @!padstack;
- }
- else {
- $!last = $action;
- last;
- }
- }
- $!last = $action;
- }
- if $!last == FAIL_RULE {
- last;
- }
- if $!last == MATCH {
- $!match.to = $!pos;
- return $!match;
- }
- }
-
- # The match failed
- $!match.from = 0;
- $!match.to = -2;
- $!match;
- }
-}
View
2  t/perl6regex/01-regex.t
@@ -5,12 +5,12 @@ use GGE;
sub dirname($path) { $path.comb(/<-[/]>+ '/'/).join() } #' (vim fix)
my @test-files = <
- captures
metachars
quantifiers
backtrack
charclass
modifiers
+ captures
>;
for @test-files -> $test-file {
View
2  t/perl6regex/rx_captures
@@ -48,7 +48,9 @@ $1=[ (.) (.) (.) ] (.) abcdefg /mob 5: <d @ 3>/ perl5 numbered captures $1
[(.)$0]+ bookkeeper /mob 0 2: <e @ 5>/ backref $0[2]
(.)*x 123x /mob: <123x @ 0>/ repeated dot capture
+# todo :gge<no subrules yet>
$<key>=<alpha> 12ab34 /mob<key>: <a @ 2>/ alias capture
+# todo :gge<no subrules yet>
<key=alpha> 12ab34 /mob<key>: <a @ 2>/ alias capture
## vim: noexpandtab tabstop=4 shiftwidth=4
View
4 t/perl6regex/rx_modifiers
@@ -67,7 +67,7 @@ ab [:ignorecase cd ] ef aBCDef n ignorecase, lexical (:ignorecase)
:sigspace b c d a b c def y sigspace (:sigspace)
:sigspace b c d ab c d ef n sigspace (:sigspace)
:s(1) b c [:s(0) d e f ] a b c def y sigspace, lexical repetition (:s)
-# todo :pge<how can this pass in PGE already?>
+# todo :gge<how can this pass in PGE already?>
:s b c [:!s d e f ] a b c def y sigspace, lexical repetition (:s)
:s(0) b c [:s(1) d e f ] a b c def n sigspace, lexical repetition (:s)
# todo :pge<feature>
@@ -97,9 +97,7 @@ foo '-'? bar foo - bar n basic non-match
# todo :gge<no <?lookahead> yet>
:s<?wb>foo '-'? bar foo - bar y basic ws match with boundary modifier separation
:s::foo '-'? bar foo - bar y basic ws match with backtrack no-op modifier separation
-# todo :gge<no captures yet>
:s::(\w+) ':=' (\S+) dog := spot /mob 0: <dog @ 0>/ sigspace and capture together
-# todo :gge<no captures yet>
:s::(\w+) ':=' (\S+) dog := spot /mob 1: <spot @ 7>/ sigspace and capture together
# todo :pge<feature>
:perl5 \A.*? bcd\Q$\E..\z a bcd$ef y perl5 syntax (:perl5)
View
4 test-regex
@@ -7,9 +7,9 @@ die "Usage: ./test-regex <pattern> <target>"
my ($pattern, $target) = @*ARGS;
my GGE::Match $match
- = GGE::Perl6Regex.new($pattern).(backslash_escape($target), :debug);
+ = GGE::Perl6Regex.new($pattern, :debug)\
+ .( backslash_escape($target), :debug );
-say '';
say $match ?? $match.dump_str('mob', ' ', '') !! "No match\n";
sub backslash_escape($string) {
Please sign in to comment.
Something went wrong with that request. Please try again.