Permalink
Browse files

regexes: speed up scanning for literal matches

if an unanchored regex (not ignoring case) starts with a literal,
we now use the "index" opcode (at least on parrot) to search for
the start position.
Speeds up the fairly artifical benchmark nqp::x(abc, 500) ~ def ~~ /def/
by a factor of 4
  • Loading branch information...
1 parent 062b576 commit 71c142a34f2fcac2383f5ff05a683ff6222c8b36 @moritz moritz committed Oct 18, 2013
Showing with 17 additions and 1 deletion.
  1. +13 −1 src/QRegex/P6Regex/Actions.nqp
  2. +4 −0 src/vm/parrot/QAST/Compiler.nqp
@@ -667,9 +667,21 @@ class QRegex::P6Regex::Actions is HLL::Actions {
self.store_regex_nfa($code_obj, $block, QRegex::NFA.new.addnode($qast));
self.alt_nfas($code_obj, $block, $qast);
+ my $scan := QAST::Regex.new( :rxtype<scan> );
+ {
+ my $q := $qast;
+ if $q.rxtype eq 'concat' && $q[0] {
+ $q := $q[0]
+ }
+ if $q.rxtype eq 'literal' {
+ nqp::push($scan, $q[0]);
+ $scan.subtype($q.subtype);
+ }
+ }
+
$block<orig_qast> := $qast;
$qast := QAST::Regex.new( :rxtype<concat>,
- QAST::Regex.new( :rxtype<scan> ),
+ $scan,
$qast,
($anon
?? QAST::Regex.new( :rxtype<pass> )
@@ -1564,6 +1564,10 @@ class QAST::Compiler is HLL::Compiler {
$ops.push_pirop('goto', $scanlabel);
$ops.push($looplabel);
$ops.push_pirop('inc', %*REG<pos>);
+ if nqp::elems($node.list) && $node.subtype ne 'ignorecase' {
+ $ops.push_pirop('index', %*REG<pos>, %*REG<tgt>, self.rxescape($node[0]), %*REG<pos>);
+ $ops.push_pirop('eq', %*REG<pos>, -1, %*REG<fail>);
+ }
$ops.push_pirop('gt', %*REG<pos>, %*REG<eos>, %*REG<fail>);
$ops.push_pirop('repr_bind_attr_int', %*REG<cur>, %*REG<curclass>, '"$!from"', %*REG<pos>);
$ops.push($scanlabel);

0 comments on commit 71c142a

Please sign in to comment.