Skip to content

Commit

Permalink
Rewrite recursion into iteration (estimation of NFA size for RE).
Browse files Browse the repository at this point in the history
This is to avoid stack overflow on large RE (especially on instrumented
builds that have larger stack frames, like AddressSanitizer).

Partial fix for #219 "overflow-1.re test fails on system with small stack".
  • Loading branch information
skvadrik committed Apr 23, 2020
1 parent 4d9c809 commit 89be91f
Showing 1 changed file with 93 additions and 22 deletions.
115 changes: 93 additions & 22 deletions src/nfa/estimate_size.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,41 +8,112 @@


namespace re2c {
namespace {

static size_t estimate(const RE *re)
struct StackItem {
const RE *re; // current sub-RE
uint32_t size; // size of the sub-RE (only for alternative and concatenation)
uint8_t succ; // index of the next sucessor to be visited
};

static uint32_t estimate_re_size(const RE *re0, std::vector<StackItem> &stack)
{
switch (re->type) {
case RE::NIL: return 0;
case RE::SYM: return 1;
case RE::TAG: return 1;
case RE::ALT:
return estimate(re->alt.re1)
+ estimate(re->alt.re2)
+ 1;
case RE::CAT:
return estimate(re->cat.re1)
+ estimate(re->cat.re2);
case RE::ITER: {
const size_t
iter = estimate(re->iter.re),
min = re->iter.min,
max = re->iter.max;
return max == AST::MANY
? iter * min + 1
: iter * max + (max - min);
// the estimated size of the last sub-RE visited by DFS
uint32_t size = 0;

const StackItem i0 = {re0, 0, 0};
stack.push_back(i0);

while (!stack.empty()) {
const StackItem i = stack.back();
stack.pop_back();

const RE *re = i.re;
if (re->type == RE::NIL) {
size = 0;
}
else if (re->type == RE::SYM || re->type == RE::TAG) {
size = 1;
}
else if (re->type == RE::ALT) {
if (i.succ == 0) {
// recurse into the left sub-RE
StackItem k = {re, 0, 1};
stack.push_back(k);
StackItem j = {re->alt.re1, 0, 0};
stack.push_back(j);
}
else if (i.succ == 1) {
// recurse into the right sub-RE
StackItem k = {re, size, 2};
stack.push_back(k);
StackItem j = {re->alt.re2, 0, 0};
stack.push_back(j);
}
else {
// both sub-RE visited, recursive return
size = i.size // left sub-RE (saved on stack)
+ size // right sub-RE (just visited by DFS)
+ 1; // additional state for alternative
}
}
else if (re->type == RE::CAT) {
if (i.succ == 0) {
// recurse into the left sub-RE
StackItem k = {re, 0, 1};
stack.push_back(k);
StackItem j = {re->cat.re1, 0, 0};
stack.push_back(j);
}
else if (i.succ == 1) {
// recurse into the right sub-RE
StackItem k = {re, size, 2};
stack.push_back(k);
StackItem j = {re->cat.re2, 0, 0};
stack.push_back(j);
}
else {
// both sub-RE visited, recursive return
size = i.size // left sub-RE (saved on stack)
+ size; // right sub-RE (just visited by DFS)
}
}
else if (re->type == RE::ITER) {
if (i.succ == 0) {
// recurse into the sub-RE
StackItem k = {re, 0, 1};
stack.push_back(k);
StackItem j = {re->iter.re, 0, 0};
stack.push_back(j);
}
else {
// sub-RE visited, recursive return
const uint32_t min = re->iter.min, max = re->iter.max;
size = max == AST::MANY
? size * min + 1
: size * max + (max - min);
}
}
}
return 0; /* unreachable */

DASSERT(stack.empty());
return size;
}

} // anonymous namespace

size_t estimate_size(const std::vector<RE*> &res)
{
std::vector<StackItem> stack;

const size_t nre = res.size();
DASSERT(nre > 0);
size_t size = nre - 1;

for (size_t i = 0; i < nre; ++i) {
size += estimate(res[i]) + 1;
size += estimate_re_size(res[i], stack) + 1;
}

return size;
}

Expand Down

0 comments on commit 89be91f

Please sign in to comment.