Skip to content

Commit 1c3737b

Browse files
committed
remove futile scan operations in poscaps and subrules.
1 parent 5a18b11 commit 1c3737b

File tree

1 file changed

+62
-30
lines changed

1 file changed

+62
-30
lines changed

src/QRegex/P6Regex/Optimizer.nqp

Lines changed: 62 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,24 @@ class QRegex::Optimizer {
4141
$node;
4242
}
4343

44+
method dont_scan($node) {
45+
while +@($node) >= 1 {
46+
if nqp::istype($node, QAST::Regex) {
47+
if nqp::istype($node[0], QAST::Regex) && $node[0].rxtype eq 'scan' {
48+
$node.shift;
49+
last;
50+
}
51+
if $node.rxtype eq 'concat' {
52+
$node := $node[0];
53+
} else {
54+
last;
55+
}
56+
} else {
57+
last;
58+
}
59+
}
60+
}
61+
4462
method visit_concat($node) {
4563
# a single-child concat can become the child itself
4664
self.visit_children($node);
@@ -70,38 +88,52 @@ class QRegex::Optimizer {
7088
}
7189

7290
method simplify_assertion($qast) {
73-
if $qast.subtype eq 'zerowidth'
74-
&& nqp::istype($qast[0], QAST::Node) && nqp::istype($qast[0][0], QAST::SVal)
75-
&& $qast[0][0].value eq 'before' {
76-
if nqp::istype($qast[0], QAST::Node) && nqp::istype($qast[0][1], QAST::Block)
77-
&& nqp::istype((my $regex := $qast[0][1][2]), QAST::Regex)
78-
&& $regex.rxtype eq 'concat' && $regex[0].rxtype eq 'scan' && $regex[2].rxtype eq 'pass' {
79-
self.visit_children($regex);
80-
my $simple := $regex[1];
81-
my $result := 0;
82-
if $simple.rxtype eq 'literal' && $simple.rxtype ne 'ignorecase' && !$qast.negate {
83-
$result := QAST::Regex.new(:rxtype<literal>, :subtype<zerowidth>, :node($simple.node),
84-
:negate($simple.negate),
85-
$simple[0]);
86-
} elsif $simple.rxtype eq 'enumcharlist' && $simple.rxtype ne 'ignorecase' {
87-
$result := QAST::Regex.new(:rxtype<enumcharlist>, :subtype<zerowidth>, :node($simple.node),
88-
:negate(nqp::bitxor_i($qast.negate, $simple.negate)),
89-
$simple[0]);
90-
} elsif $simple.rxtype eq 'charrange' && $simple.rxtype ne 'ignorecase' {
91-
$result := QAST::Regex.new(:rxtype<charrange>, :subtype<zerowidth>, :node($simple.node),
92-
:negate(nqp::bitxor_i($qast.negate, $simple.negate)),
93-
$simple[0],
94-
$simple[1],
95-
$simple[2]);
96-
} elsif $simple.rxtype eq 'cclass' && $simple.rxtype ne 'ignorecase' {
97-
$result := QAST::Regex.new(:rxtype<cclass>, :subtype<zerowidth>, :node($simple.node),
98-
:negate(nqp::bitxor_i($qast.negate, $simple.negate)), :name($simple.name));
99-
}
100-
if $result {
101-
self.stub_out_block($qast[0][1]);
102-
$qast := $result;
91+
my $child_is_block := nqp::istype($qast[0], QAST::Node)
92+
&& (nqp::istype($qast[0][0], QAST::SVal) || nqp::istype($qast[0][0], QAST::Block));
93+
if $child_is_block {
94+
my $block := nqp::istype($qast[0][0], QAST::SVal) ?? $qast[0][1] !! $qast[0][0];
95+
my $regex := $block[2];
96+
# extra safety
97+
if !nqp::istype($regex, QAST::Regex) { return $qast }
98+
self.visit_children($regex);
99+
if $qast.subtype eq 'zerowidth'
100+
&& $child_is_block
101+
&& $qast[0][0].value eq 'before' {
102+
if nqp::istype($qast[0], QAST::Node) && nqp::istype($qast[0][1], QAST::Block)
103+
&& $regex.rxtype eq 'concat' && $regex[0].rxtype eq 'scan' && $regex[2].rxtype eq 'pass' {
104+
my $simple := $regex[1];
105+
my $result := 0;
106+
if $simple.rxtype eq 'literal' && $simple.rxtype ne 'ignorecase' && !$qast.negate {
107+
$result := QAST::Regex.new(:rxtype<literal>, :subtype<zerowidth>, :node($simple.node),
108+
:negate($simple.negate),
109+
$simple[0]);
110+
} elsif $simple.rxtype eq 'enumcharlist' && $simple.rxtype ne 'ignorecase' {
111+
$result := QAST::Regex.new(:rxtype<enumcharlist>, :subtype<zerowidth>, :node($simple.node),
112+
:negate(nqp::bitxor_i($qast.negate, $simple.negate)),
113+
$simple[0]);
114+
} elsif $simple.rxtype eq 'charrange' && $simple.rxtype ne 'ignorecase' {
115+
$result := QAST::Regex.new(:rxtype<charrange>, :subtype<zerowidth>, :node($simple.node),
116+
:negate(nqp::bitxor_i($qast.negate, $simple.negate)),
117+
$simple[0],
118+
$simple[1],
119+
$simple[2]);
120+
} elsif $simple.rxtype eq 'cclass' && $simple.rxtype ne 'ignorecase' {
121+
$result := QAST::Regex.new(:rxtype<cclass>, :subtype<zerowidth>, :node($simple.node),
122+
:negate(nqp::bitxor_i($qast.negate, $simple.negate)), :name($simple.name));
123+
} else {
124+
# since before is implicitly anchored, we can remove the scan.
125+
self.dont_scan($regex);
126+
}
127+
if $result {
128+
self.stub_out_block($qast[0][1]);
129+
$qast := $result;
130+
}
103131
}
104132
}
133+
# positional and named captures are implicitly anchored, so we can get rid of the scan there, too.
134+
if $qast.subtype eq 'capture' && $child_is_block {
135+
self.dont_scan($regex);
136+
}
105137
}
106138
$qast;
107139
}

0 commit comments

Comments
 (0)