Skip to content

Commit

Permalink
Rewrite recursion into iteration (fixed tags computation).
Browse files Browse the repository at this point in the history
This is to avoid stack overflow on large RE (especially on instrumented
builds that have larger stack frames, like AddressSanitizer).

Partial fix for #219 "overflow-1.re test fails on system with small stack".
  • Loading branch information
skvadrik committed Apr 22, 2020
1 parent fd63499 commit 7b56434
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 48 deletions.
145 changes: 99 additions & 46 deletions src/regexp/fixed_tags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,77 +8,130 @@


namespace re2c {
namespace {

/* note [fixed and variable tags]
*
* If distance between two tags is constant (equal for all strings that
* match the given regexp), then lexer only needs to track one of them:
* the second tag equals the first tag plus static offset.
*
* However, this optimization is applied only to tags in top-level
* concatenation, because other tags may be uninitialized and we don't
* want to mess with conditional calculation of fixed tags.
* If distance between two tags is constant (equal for all strings that match
* the given regexp), then lexer only needs to track one of them: the second
* tag equals the first tag plus static offset.
*
* This optimization is applied only to tags in top-level concatenation,
* because in other cases the base tag may be NULL, and the calculation of
* the fixed tag value is not as simple as substracting a fixed offset.
* Furthermore, fixed tags are fobidden with generic API because it cannot
* express fixed offsets.
*
* Tags with history also cannot be fixed.
* express fixed offsets. M-tags (with history) also cannot be fixed.
*
* Another special case is fictive tags (those that exist only to impose
* hierarchical laws of POSIX disambiguation). We treat them as fixed
* in order to suppress code generation.
* hierarchical laws of POSIX disambiguation). We treat them as fixed in order
* to suppress code generation.
*/

static void find_fixed_tags(RE *re, std::vector<Tag> &tags,
size_t &dist, size_t &base, bool toplevel)
struct StackItem {
RE *re; // current sub-RE
uint32_t dist; // distance backup for alternative, unused for other RE
uint8_t succ; // index of the next successor to be visited
bool toplevel; // if this sub-RE is in top-level concatenation
};

static void find_fixed_tags(RESpec &spec, std::vector<StackItem> &stack, RE *re0)
{
switch (re->type) {
case RE::NIL: break;
case RE::SYM:
if (dist != Tag::VARDIST) ++dist;
break;
case RE::ALT: {
size_t d1 = dist, d2 = dist;
find_fixed_tags(re->alt.re1, tags, d1, base, false);
find_fixed_tags(re->alt.re2, tags, d2, base, false);
dist = (d1 == d2) ? d1 : Tag::VARDIST;
break;
static const uint32_t VARDIST = Tag::VARDIST;
bool toplevel = spec.opts->input_api != INPUT_CUSTOM;

// base tag, intially the fake "rightmost tag" (the end of RE)
size_t base = Tag::RIGHTMOST;

// the distance to the nearest top-level tag to the right (base tag)
uint32_t dist = 0;

const StackItem i0 = {re0, VARDIST, 0, toplevel};
stack.push_back(i0);

while (!stack.empty()) {
const StackItem i = stack.back();
stack.pop_back();
RE *re = i.re;

if (re->type == RE::SYM) {
if (dist != VARDIST) ++dist;
}
case RE::CAT:
find_fixed_tags(re->cat.re2, tags, dist, base, toplevel);
find_fixed_tags(re->cat.re1, tags, dist, base, toplevel);
break;
case RE::ITER:
find_fixed_tags(re->iter.re, tags, dist, base, false);
dist = Tag::VARDIST;
break;
case RE::TAG: {
else if (re->type == RE::ALT) {
if (i.succ == 0) {
// save the current distance on stack (from the alternative end
// to base) and recurse into the left sub-RE
StackItem k = {re, dist, 1, i.toplevel};
stack.push_back(k);
StackItem j = {re->alt.re1, VARDIST, 0, false};
stack.push_back(j);
}
else if (i.succ == 1) {
// save the current distance on stack (from the left sub-RE to
// base), reset distance to the distance popped from stack (from
// the alternative end to base), recurse into the right sub-RE
StackItem k = {re, dist, 2, i.toplevel};
stack.push_back(k);
StackItem j = {re->alt.re2, VARDIST, 0, false};
stack.push_back(j);
dist = i.dist;
}
else {
// both sub-RE visited, compare the distance on stack (from the
// left sub-RE to base) to the current distance (from the right
// sub-RE to base), if not equal set variable distance
dist = (i.dist == dist) ? i.dist : VARDIST;
}
}
else if (re->type == RE::ITER) {
if (i.succ == 0) {
// recurse into the sub-RE
StackItem k = {re, VARDIST, 1, i.toplevel};
stack.push_back(k);
StackItem j = {re->iter.re, VARDIST, 0, false};
stack.push_back(j);
}
else {
// sub-RE visited, assume unknown number of iterations
// TODO: find precise distance for fixed repetition counter
dist = VARDIST;
}
}
else if (re->type == RE::CAT) {
// the right sub-RE is pushed on stack after the left sub-RE and
// visited earlier (because distance is computed from right to left)
StackItem j1 = {re->cat.re1, VARDIST, 0, i.toplevel};
stack.push_back(j1);
StackItem j2 = {re->cat.re2, VARDIST, 0, i.toplevel};
stack.push_back(j2);
}
else if (re->type == RE::TAG) {
// see note [fixed and variable tags]
Tag &tag = tags[re->tag.idx];
Tag &tag = spec.tags[re->tag.idx];
if (fictive(tag)) {
tag.base = tag.dist = 0;
} else if (toplevel && dist != Tag::VARDIST && !history(tag)) {
}
else if (i.toplevel && dist != VARDIST && !history(tag)) {
tag.base = base;
tag.dist = dist;
} else if (toplevel) {
}
else if (i.toplevel) {
base = re->tag.idx;
dist = 0;
}
if (trailing(tag)) dist = 0;
break;
if (trailing(tag)) {
dist = 0;
}
}
}
}

} // anonymous namespace

void find_fixed_tags(RESpec &spec)
{
const bool generic = spec.opts->input_api == INPUT_CUSTOM;
std::vector<RE*>::iterator
i = spec.res.begin(),
e = spec.res.end();
for (; i != e; ++i) {
size_t base = Tag::RIGHTMOST, dist = 0;
find_fixed_tags(*i, spec.tags, dist, base, !generic);
std::vector<StackItem> stack;
for (std::vector<RE*>::iterator i = spec.res.begin(); i != spec.res.end(); ++i) {
find_fixed_tags(spec, stack, *i);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/regexp/tag.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
namespace re2c {

const size_t Tag::RIGHTMOST = std::numeric_limits<size_t>::max();
const size_t Tag::VARDIST = std::numeric_limits<size_t>::max();
const uint32_t Tag::VARDIST = std::numeric_limits<uint32_t>::max();
const size_t Tag::FICTIVE = Tag::RIGHTMOST - 1;


Expand Down
2 changes: 1 addition & 1 deletion src/regexp/tag.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ struct tag_info_t
struct Tag
{
static const size_t RIGHTMOST;
static const size_t VARDIST;
static const uint32_t VARDIST;
static const size_t FICTIVE;

const std::string *name;
Expand Down

0 comments on commit 7b56434

Please sign in to comment.