Skip to content

Commit b289f69

Browse files
committed
implement ignoremark for charranges
1 parent 4de33c4 commit b289f69

File tree

2 files changed

+65
-12
lines changed

2 files changed

+65
-12
lines changed

src/QRegex/P6Regex/Actions.nqp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,9 @@ class QRegex::P6Regex::Actions is HLL::Actions {
670670
}
671671
else {
672672
my @alts;
673+
my $RXi := %*RX<i>;
674+
my $RXm := %*RX<m>;
675+
my $RXim := $RXi && $RXm;
673676
for $<charspec> {
674677
if $_[1] {
675678
my $node;
@@ -681,24 +684,28 @@ class QRegex::P6Regex::Actions is HLL::Actions {
681684
if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
682685
|| $node.negate || nqp::chars($node[0]) != 1;
683686
$ord0 := $node.ann('codepoint') // nqp::ord($node[0]);
687+
$ord0 := nqp::ordbaseat(nqp::chr($ord0), 0) if $RXm;
684688
}
685689
else {
686-
$ord0 := nqp::ord(~$_[0][0]);
690+
$ord0 := $RXm ?? nqp::ordbaseat(~$_[0][0], 0) !! nqp::ord(~$_[0][0]);
687691
}
688692
if $_[1][0]<cclass_backslash> {
689693
$node := $_[1][0]<cclass_backslash>.ast;
690694
$/.CURSOR.panic("Illegal range endpoint in regex: " ~ ~$_)
691695
if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist'
692696
|| $node.negate || nqp::chars($node[0]) != 1;
693697
$ord1 := $node.ann('codepoint') // nqp::ord($node[0]);
698+
$ord1 := nqp::ordbaseat(nqp::chr($ord1), 0) if $RXm;
694699
}
695700
else {
696-
$ord1 := nqp::ord(~$_[1][0][0]);
701+
$ord1 := $RXm ?? nqp::ordbaseat(~$_[1][0][0], 0) !! nqp::ord(~$_[1][0][0]);
697702
}
698703
$/.CURSOR.panic("Illegal reversed character range in regex: " ~ ~$_)
699704
if $ord0 > $ord1;
700705
@alts.push(QAST::Regex.new(
701-
%*RX<i> ?? 'ignorecase' !! '', # XXX ignoremark
706+
$RXim ?? 'ignorecase+ignoremark' !!
707+
$RXi ?? 'ignorecase' !!
708+
$RXm ?? 'ignoremark' !! '',
702709
QAST::IVal.new( :value($ord0) ),
703710
QAST::IVal.new( :value($ord1) ),
704711
:negate( $<sign> eq '-' ),
@@ -714,23 +721,23 @@ class QRegex::P6Regex::Actions is HLL::Actions {
714721
@alts.push($bs);
715722
}
716723
}
717-
elsif %*RX<i> && %*RX<m> { # >
724+
elsif $RXim {
718725
my $c := nqp::chr(nqp::ordbaseat(~$_[0], 0));
719726
$str := $str ~ nqp::lc($c) ~ nqp::uc($c);
720727
}
721-
elsif %*RX<i> {
728+
elsif $RXi {
722729
my $c := ~$_[0];
723730
$str := $str ~ nqp::lc($c) ~ nqp::uc($c);
724731
}
725-
elsif %*RX<m> { # >
732+
elsif $RXm {
726733
$str := $str ~ nqp::chr(nqp::ordbaseat(~$_[0], 0));
727734
}
728735
else {
729736
$str := $str ~ ~$_[0];
730737
}
731738
}
732739
@alts.push(QAST::Regex.new( $str, :rxtype<enumcharlist>, :node($/), :negate( $<sign> eq '-' ),
733-
:subtype(%*RX<m> ?? 'ignoremark' !! '') )) # >
740+
:subtype($RXm ?? 'ignoremark' !! '') ))
734741
if nqp::chars($str);
735742
$qast := +@alts == 1 ?? @alts[0] !!
736743
$<sign> eq '-' ??

src/vm/moar/QAST/QASTRegexCompilerMAST.nqp

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -538,33 +538,79 @@ class QAST::MASTRegexCompiler {
538538
op('const_i64', $lower, ival($node[1].value)),
539539
op('const_i64', $upper, ival($node[2].value)),
540540
]);
541-
if $node[0] eq 'ignorecase' {
541+
if $node[0] eq 'ignorecase+ignoremark' {
542+
my $succeed := label();
542543
my $s0 := $!regalloc.fresh_s();
543544
my $s1 := $!regalloc.fresh_s();
544545
my $i2 := $!regalloc.fresh_i();
546+
my $goal := $node.negate ?? %!reg<fail> !! $succeed;
547+
merge_ins(@ins, [
548+
op('substr_s', $s0, %!reg<tgt>, %!reg<pos>, %!reg<one>),
549+
op('lc', $s1, $s0),
550+
op('ordbaseat', $i0, $s1, %!reg<zero>),
551+
op('ge_i', $i1, $i0, $lower),
552+
op('le_i', $i2, $i0, $upper),
553+
op('band_i', $i1, $i1, $i2),
554+
op('if_i', $i1, $goal),
555+
op('uc', $s1, $s0),
556+
op('ordbaseat', $i0, $s1, %!reg<zero>),
557+
op('ge_i', $i1, $i0, $lower),
558+
op('le_i', $i2, $i0, $upper),
559+
op('band_i', $i1, $i1, $i2),
560+
op('if_i', $i1, $goal),
561+
]);
562+
$!regalloc.release_register($s0, $MVM_reg_str);
563+
$!regalloc.release_register($s1, $MVM_reg_str);
564+
$!regalloc.release_register($i2, $MVM_reg_int64);
565+
unless $node.negate {
566+
nqp::push(@ins, op('goto', %!reg<fail>));
567+
nqp::push(@ins, $succeed);
568+
}
569+
}
570+
elsif $node[0] eq 'ignorecase' {
545571
my $succeed := label();
572+
my $s0 := $!regalloc.fresh_s();
573+
my $s1 := $!regalloc.fresh_s();
574+
my $i2 := $!regalloc.fresh_i();
546575
my $goal := $node.negate ?? %!reg<fail> !! $succeed;
547576
merge_ins(@ins, [
548-
op('const_i64', $i2, ival(1)),
549-
op('substr_s', $s0, %!reg<tgt>, %!reg<pos>, $i2),
577+
op('substr_s', $s0, %!reg<tgt>, %!reg<pos>, %!reg<one>),
550578
op('lc', $s1, $s0),
551579
op('ordfirst', $i0, $s1),
552580
op('ge_i', $i1, $i0, $lower),
553581
op('le_i', $i2, $i0, $upper),
554-
op('bitand_i', $i1, $i1, $i2),
582+
op('band_i', $i1, $i1, $i2),
555583
op('if_i', $i1, $goal),
556584
op('uc', $s1, $s0),
557585
op('ordfirst', $i0, $s1),
558586
op('ge_i', $i1, $i0, $lower),
559587
op('le_i', $i2, $i0, $upper),
560-
op('bitand_i', $i1, $i1, $i2),
588+
op('band_i', $i1, $i1, $i2),
561589
op('if_i', $i1, $goal),
562590
]);
591+
$!regalloc.release_register($s0, $MVM_reg_str);
592+
$!regalloc.release_register($s1, $MVM_reg_str);
593+
$!regalloc.release_register($i2, $MVM_reg_int64);
563594
unless $node.negate {
564595
nqp::push(@ins, op('goto', %!reg<fail>));
565596
nqp::push(@ins, $succeed);
566597
}
567598
}
599+
elsif $node[0] eq 'ignoremark' {
600+
my $succeed := label();
601+
my $goal := $node.negate ?? $succeed !! %!reg<fail>;
602+
merge_ins(@ins, [
603+
op('ordbaseat', $i0, %!reg<tgt>, %!reg<pos>),
604+
op('gt_i', $i1, $i0, $upper),
605+
op('if_i', $i1, $goal),
606+
op('lt_i', $i1, $i0, $lower),
607+
op('if_i', $i1, $goal),
608+
]);
609+
if $node.negate {
610+
nqp::push(@ins, op('goto', %!reg<fail>));
611+
nqp::push(@ins, $succeed);
612+
}
613+
}
568614
else {
569615
my $succeed := label();
570616
my $goal := $node.negate ?? $succeed !! %!reg<fail>;

0 commit comments

Comments
 (0)