Skip to content

Commit edc965f

Browse files
committed
implement ignoremark for moarvm backend
Ranges in character classes are still todo and require some thought. Also, this patch does not include the required stage0 bump, which we need because we use a new op called nqp::ordbaseat(str, offset).
1 parent a5b7ade commit edc965f

File tree

5 files changed

+95
-15
lines changed

5 files changed

+95
-15
lines changed

src/QRegex/NFA.nqp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ class QRegex::NFA {
2121
my $EDGE_CHARRANGE_NEG := 13;
2222
my $EDGE_CODEPOINT_LL := 14;
2323
my $EDGE_CODEPOINT_I_LL := 15;
24+
my $EDGE_CODEPOINT_M := 16;
25+
my $EDGE_CODEPOINT_M_NEG := 17;
2426

2527
my $ACTIONS;
2628
my $nfadeb;
@@ -171,7 +173,7 @@ class QRegex::NFA {
171173
%cclass_code<n> := nqp::const::CCLASS_NEWLINE;
172174
%cclass_code<nl> := nqp::const::CCLASS_NEWLINE;
173175
$nfadeb := nqp::existskey(nqp::getenvhash(),'NQP_NFA_DEB');
174-
$ACTIONS := ['FATE','EPSILON','CODEPOINT','CODEPOINT_NEG','CHARCLASS','CHARCLASS_NEG','CHARLIST','CHARLIST_NEG','SUBRULE','CODEPOINT_I','CODEPOINT_I_NEG','GENERIC_VAR','CHARRANGE','CHARRANGE_NEG','CODEPOINT_LL','CODEPOINT_I_LL'];
176+
$ACTIONS := ['FATE','EPSILON','CODEPOINT','CODEPOINT_NEG','CHARCLASS','CHARCLASS_NEG','CHARLIST','CHARLIST_NEG','SUBRULE','CODEPOINT_I','CODEPOINT_I_NEG','GENERIC_VAR','CHARRANGE','CHARRANGE_NEG','CODEPOINT_LL','CODEPOINT_I_LL','CODEPOINT_M','CODEPOINT_M_NEG'];
175177
# $ind := 0;
176178
# $indent := '';
177179
$nfatime := 0;
@@ -244,6 +246,15 @@ class QRegex::NFA {
244246
dentout(self.addedge($from, $to, $!LITEND ?? $EDGE_CODEPOINT_I !! $EDGE_CODEPOINT_I_LL,
245247
[nqp::ord($litconst_lc, $i), nqp::ord($litconst_uc, $i)]));
246248
}
249+
elsif $node.subtype eq 'ignoremark' {
250+
my str $litconst := $node[0];
251+
while $i <= $litlen {
252+
$from := self.addedge($from, -1, $EDGE_CODEPOINT_M, nqp::ordbaseat($litconst, $i));
253+
$i := $i + 1;
254+
}
255+
# XXX $EDGE_CODEPOINT_M_LL ?
256+
}
257+
# XXX elsif $node.subtype eq 'ignorecase+ignoremark' { ... }
247258
else {
248259
my str $litconst := $node[0];
249260
while $i < $litlen {

src/QRegex/P6Regex/Actions.nqp

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,15 @@ class QRegex::P6Regex::Actions is HLL::Actions {
119119
}
120120
else {
121121
my $qast := QAST::Regex.new( ~$/, :rxtype<literal>, :node($/));
122-
$qast.subtype('ignorecase') if %*RX<i>;
122+
if %*RX<i> && %*RX<m> { # >
123+
$qast.subtype('ignorecase+ignoremark')
124+
}
125+
elsif %*RX<i> {
126+
$qast.subtype('ignorecase')
127+
}
128+
elsif %*RX<m> { # >
129+
$qast.subtype('ignoremark')
130+
}
123131
make $qast;
124132
}
125133
}
@@ -205,15 +213,31 @@ class QRegex::P6Regex::Actions is HLL::Actions {
205213
my $quote := $<quote_EXPR>.ast;
206214
if QAST::SVal.ACCEPTS($quote) { $quote := $quote.value; }
207215
my $qast := QAST::Regex.new( $quote, :rxtype<literal>, :node($/) );
208-
$qast.subtype('ignorecase') if %*RX<i>;
216+
if %*RX<i> && %*RX<m> { # >
217+
$qast.subtype('ignorecase+ignoremark')
218+
}
219+
elsif %*RX<i> {
220+
$qast.subtype('ignorecase')
221+
}
222+
elsif %*RX<m> { # >
223+
$qast.subtype('ignoremark')
224+
}
209225
make $qast;
210226
}
211227

212228
method metachar:sym<">($/) {
213229
my $quote := $<quote_EXPR>.ast;
214230
if QAST::SVal.ACCEPTS($quote) { $quote := $quote.value; }
215231
my $qast := QAST::Regex.new( $quote, :rxtype<literal>, :node($/) );
216-
$qast.subtype('ignorecase') if %*RX<i>;
232+
if %*RX<i> && %*RX<m> { # >
233+
$qast.subtype('ignorecase+ignoremark')
234+
}
235+
elsif %*RX<i> {
236+
$qast.subtype('ignorecase')
237+
}
238+
elsif %*RX<m> { # >
239+
$qast.subtype('ignoremark')
240+
}
217241
make $qast;
218242
}
219243

@@ -674,7 +698,7 @@ class QRegex::P6Regex::Actions is HLL::Actions {
674698
$/.CURSOR.panic("Illegal reversed character range in regex: " ~ ~$_)
675699
if $ord0 > $ord1;
676700
@alts.push(QAST::Regex.new(
677-
%*RX<i> ?? 'ignorecase' !! '',
701+
%*RX<i> ?? 'ignorecase' !! '', # XXX ignoremark
678702
QAST::IVal.new( :value($ord0) ),
679703
QAST::IVal.new( :value($ord1) ),
680704
:negate( $<sign> eq '-' ),
@@ -690,12 +714,23 @@ class QRegex::P6Regex::Actions is HLL::Actions {
690714
@alts.push($bs);
691715
}
692716
}
693-
else {
717+
elsif %*RX<i> && %*RX<m> { # >
718+
my $c := nqp::chr(nqp::ordbaseat(~$_[0], 0));
719+
$str := $str ~ nqp::lc($c) ~ nqp::uc($c);
720+
}
721+
elsif %*RX<i> {
694722
my $c := ~$_[0];
695-
$str := $str ~ (%*RX<i> ?? nqp::lc($c) ~ nqp::uc($c) !! $c);
723+
$str := $str ~ nqp::lc($c) ~ nqp::uc($c);
724+
}
725+
elsif %*RX<m> { # >
726+
$str := $str ~ nqp::chr(nqp::ordbaseat(~$_[0], 0));
727+
}
728+
else {
729+
$str := $str ~ ~$_[0];
696730
}
697731
}
698-
@alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ) ))
732+
@alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ),
733+
:subtype(%*RX<m> ?? 'ignoremark' !! '') )) # >
699734
if nqp::chars($str);
700735
$qast := +@alts == 1 ?? @alts[0] !!
701736
$<sign> eq '-' ??

src/QRegex/P6Regex/Grammar.nqp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,12 @@ grammar QRegex::P6Regex::Grammar is HLL::Grammar {
453453

454454
proto token mod_ident { <...> }
455455
token mod_ident:sym<ignorecase> { $<sym>=[i] 'gnorecase'? » }
456+
token mod_ident:sym<ignoremark> {
457+
[
458+
| $<sym>=[m]
459+
| 'ignore' $<sym>=[m] 'ark'
460+
] »
461+
}
456462
token mod_ident:sym<ratchet> { $<sym>=[r] 'atchet'? » }
457463
token mod_ident:sym<sigspace> { $<sym>=[s] 'igspace'? » }
458464
token mod_ident:sym<dba> { <sym> » }

src/vm/moar/QAST/QASTOperationsMAST.nqp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2147,6 +2147,7 @@ QAST::MASTOperations.add_core_moarop_mapping('split', 'split');
21472147
QAST::MASTOperations.add_core_moarop_mapping('chr', 'chr');
21482148
QAST::MASTOperations.add_core_moarop_mapping('ordfirst', 'ordfirst');
21492149
QAST::MASTOperations.add_core_moarop_mapping('ordat', 'ordat');
2150+
QAST::MASTOperations.add_core_moarop_mapping('ordbaseat', 'ordbaseat');
21502151
QAST::MASTOperations.add_core_moarop_mapping('indexfrom', 'index_s');
21512152
QAST::MASTOperations.add_core_moarop_mapping('rindexfrom', 'rindexfrom');
21522153
QAST::MASTOperations.add_core_moarop_mapping('substr_s', 'substr_s');

src/vm/moar/QAST/QASTRegexCompilerMAST.nqp

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,20 @@ class QAST::MASTRegexCompiler {
507507
method enumcharlist($node) {
508508
my @ins;
509509
my $op := $node.negate ?? 'indexnat' !! 'indexat';
510-
nqp::push(@ins, op($op, %!reg<tgt>, %!reg<pos>, sval($node[0]), %!reg<fail>));
510+
if $node.subtype eq 'ignoremark' || $node.subtype eq 'ignorecase+ignoremark' {
511+
my $i0 := $!regalloc.fresh_i();
512+
my $s0 := $!regalloc.fresh_s();
513+
merge_ins(@ins, [
514+
op('ge_i', $i0, %!reg<pos>, %!reg<eos>),
515+
op('if_i', $i0, %!reg<fail>),
516+
op('ordbaseat', $i0, %!reg<tgt>, %!reg<pos>),
517+
op('chr', $s0, $i0),
518+
op($op, $s0, %!reg<zero>, sval($node[0]), %!reg<fail>),
519+
]);
520+
}
521+
else {
522+
nqp::push(@ins, op($op, %!reg<tgt>, %!reg<pos>, sval($node[0]), %!reg<fail>));
523+
}
511524
nqp::push(@ins, op('inc_i', %!reg<pos>))
512525
unless $node.subtype eq 'zerowidth';
513526
@ins
@@ -577,10 +590,8 @@ class QAST::MASTRegexCompiler {
577590

578591
method literal($node) {
579592
my $litconst := $node[0];
580-
my $eq_op := $node.subtype eq 'ignorecase' ?? 'eqatic_s' !! 'eqat_s';
581593
my $s0 := $!regalloc.fresh_s();
582594
my $i0 := $!regalloc.fresh_i();
583-
my $cmpop := $node.negate ?? 'if_i' !! 'unless_i';
584595
my @ins;
585596
if $node.negate {
586597
# Need explicit check we're not going beyond the string end in the
@@ -594,11 +605,27 @@ class QAST::MASTRegexCompiler {
594605
# can happen only once at the beginning of a regex. hash of string constants
595606
# to the registers to which they are assigned.
596607
# XXX or make a specialized eqat_sc op that takes a constant string.
597-
nqp::push(@ins, op('const_s', $s0, sval($litconst)));
598608
# also, consider making the op branch directly from the comparison
599609
# instead of storing an integer to a temporary register
600-
nqp::push(@ins, op($eq_op, $i0, %!reg<tgt>, $s0, %!reg<pos>));
601-
nqp::push(@ins, op($cmpop, $i0, %!reg<fail>));
610+
if $node.subtype eq 'ignorecase+ignoremark' {
611+
my $op := $node.negate ?? 'indexnat' !! 'indexat';
612+
my $c := nqp::chr(nqp::ordbaseat($litconst, 0));
613+
merge_ins(@ins, [
614+
op('ge_i', $i0, %!reg<pos>, %!reg<eos>),
615+
op('if_i', $i0, %!reg<fail>),
616+
op('ordbaseat', $i0, %!reg<tgt>, %!reg<pos>),
617+
op('chr', $s0, $i0),
618+
op($op, $s0, %!reg<zero>, sval(nqp::lc($c) ~ nqp::uc($c)), %!reg<fail>),
619+
]);
620+
}
621+
else {
622+
my $eq_op := $node.subtype eq 'ignorecase' ?? 'eqatic_s' !!
623+
$node.subtype eq 'ignoremark' ?? 'eqatim_s' !! 'eqat_s';
624+
my $cmpop := $node.negate ?? 'if_i' !! 'unless_i';
625+
nqp::push(@ins, op('const_s', $s0, sval($litconst)));
626+
nqp::push(@ins, op($eq_op, $i0, %!reg<tgt>, $s0, %!reg<pos>));
627+
nqp::push(@ins, op($cmpop, $i0, %!reg<fail>));
628+
}
602629
unless $node.subtype eq 'zerowidth' {
603630
nqp::push(@ins, op('const_i64', $i0, ival(nqp::chars($litconst))));
604631
nqp::push(@ins, op('add_i', %!reg<pos>, %!reg<pos>, $i0));
@@ -922,7 +949,7 @@ class QAST::MASTRegexCompiler {
922949
$looplabel,
923950
op('inc_i', %!reg<pos>),
924951
];
925-
if $node.list && $node.subtype ne 'ignorecase' {
952+
if $node.list && $node.subtype ne 'ignorecase' && $node.subtype ne 'ignoremark' && $node.subtype ne 'ignorecase+ignoremark' {
926953
my $lit := $!regalloc.fresh_s();
927954
nqp::push(@ins, op('const_s', $lit, sval($node[0])));
928955
nqp::push(@ins, op('index_s', %!reg<pos>, %!reg<tgt>, $lit, %!reg<pos>));

0 commit comments

Comments
 (0)