Skip to content

Commit b015083

Browse files
committed
Bunch of small regex fixes/features.
Fix emitting of string containing horizontal whitespace. Implement enumcharlist. Implement eol, bol, lwb, rwb anchors. Fix bug in quantifiers.
1 parent 52d13fe commit b015083

File tree

1 file changed

+45
-3
lines changed

1 file changed

+45
-3
lines changed

src/vm/js/QAST/Compiler.nqp

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,13 @@ sub quote_string($str) {
2424
} elsif $backslash && $c eq 'a' {
2525
$out := $out ~ 'x07';
2626
} else {
27-
$out := $out ~ $c;
27+
if ($c eq "\x[2028]") {
28+
$out := $out ~ "\\u2028";
29+
} elsif ($c eq "\x[2029]") {
30+
$out := $out ~ "\\u2029";
31+
} else {
32+
$out := $out ~ $c;
33+
}
2834
}
2935
$backslash := !$backslash && $c eq '\\';
3036
}
@@ -1357,12 +1363,48 @@ class RegexCompiler {
13571363
~ self.case($done);
13581364
}
13591365

1366+
method enumcharlist($node) {
1367+
my $charlist := quote_string($node[0]);
1368+
my $testop := $node.negate ?? '!=' !! '==';
1369+
1370+
"if ($!pos >= $!target.length) \{{self.fail()}\}"
1371+
~ "if ($charlist.indexOf($!target.substr($!pos,1)) $testop -1) \{{self.fail()}\}"
1372+
~ ($node.subtype eq 'zerowidth' ?? '' !! "$!pos++;\n")
1373+
1374+
}
1375+
1376+
method cclass_check($cclass,:$pos=$!pos,:$negated=0) {
1377+
"if ({$negated ?? '' !! '!'}nqp.op.iscclass({%const_map{$cclass}},$!target,$pos)) \{{self.fail}\}\n";
1378+
}
13601379

13611380
method anchor($node) {
13621381
if $node.subtype eq 'eos' {
13631382
"if ($!pos < $!target.length) \{{self.fail}\}\n";
13641383
} elsif $node.subtype eq 'bos' {
13651384
"if ($!pos != 0) \{{self.fail}\}\n";
1385+
} elsif $node.subtype eq 'lwb' {
1386+
"if ($!pos >= $!target.length) \{{self.fail}\}\n"
1387+
~ self.cclass_check('CCLASS_WORD')
1388+
~ self.cclass_check('CCLASS_WORD', :negated(1), :pos("$!pos-1"));
1389+
} elsif $node.subtype eq 'rwb' {
1390+
"if ($!pos <= 0) \{{self.fail}\}\n"
1391+
~ self.cclass_check('CCLASS_WORD', :negated(1), :pos($!pos))
1392+
~ self.cclass_check('CCLASS_WORD', :pos("$!pos-1"));
1393+
} elsif $node.subtype eq 'bol' {
1394+
my $done_label := self.new_label;
1395+
1396+
"if ($!pos == 0) \{{self.goto($done_label)}\}\n"
1397+
~ "if ($!pos >= $!target.length) \{{self.fail}\}\n"
1398+
~ self.cclass_check('CCLASS_NEWLINE',:pos("$!pos-1"))
1399+
~ self.case($done_label);
1400+
} elsif $node.subtype eq 'eol' {
1401+
my $done_label := self.new_label;
1402+
1403+
"if (nqp.op.iscclass({%const_map<CCLASS_NEWLINE>},$!target,$!pos)) \{{self.goto($done_label)}\}\n"
1404+
~ "if ($!pos != $!target.length) \{{self.fail}\}\n"
1405+
~ "if ($!pos == 0) \{{self.goto($done_label)}\}\n"
1406+
~ self.cclass_check('CCLASS_NEWLINE', :negated(1), :pos("$!pos-1"))
1407+
~ self.case($done_label);
13661408
} else {
13671409
$!compiler.NYI("anchor type: {$node.subtype}");
13681410
}
@@ -1572,8 +1614,8 @@ class RegexCompiler {
15721614
self.case($loop),
15731615
"$rep = $irep;\n",
15741616
"$rep++;\n",
1575-
($min > 1 ?? "if (rep < $min) \{{self.goto($loop)}\}\n" !! ''),
1576-
($max > 1 ?? "if (rep >= $max) \{{self.goto($done)}\}\n" !! ''),
1617+
($min > 1 ?? "if ($rep < $min) \{{self.goto($loop)}\}\n" !! ''),
1618+
($max > 1 ?? "if ($rep >= $max) \{{self.goto($done)}\}\n" !! ''),
15771619
($max != 1 ?? self.mark($loop, $!pos, $rep) !! ''),
15781620
self.case($done)
15791621
);

0 commit comments

Comments
 (0)