Skip to content

Commit

Permalink
Merge AST nodes CAP and REF for capturing and non-capturing groups.
Browse files Browse the repository at this point in the history
These nodes are similar enough so that it is easier to handle them as
one node with a boolean parameter.
  • Loading branch information
skvadrik committed Apr 21, 2023
1 parent 20030ff commit bf3ff8f
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 46 deletions.
19 changes: 9 additions & 10 deletions src/parse/ast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,9 @@ const AstNode* Ast::tag(const loc_t& loc, const char* n, bool h) {
}

const AstNode* Ast::cap(const AstNode* a, bool capturing) {
AstNode* ast;
if (capturing) {
ast = make(a->loc, AstKind::CAP, true);
ast->cap = a;
} else {
ast = make(a->loc, AstKind::REF, a->has_caps);
ast->ref = a;
}
AstNode* ast = make(a->loc, AstKind::CAP, capturing || a->has_caps);
ast->cap.ast = a;
ast->cap.capturing = capturing;
return ast;
}

Expand All @@ -166,6 +161,10 @@ const char* Ast::cstr_global(const uint8_t* s, const uint8_t* e) {
return newcstr(s, e, out_alc);
}

bool Ast::is_capturing(const AstNode* a) {
return a->kind == AstKind::CAP && a->cap.capturing;
}

bool Ast::needs_wrap(const AstNode* a) {
switch (a->kind) {
case AstKind::ITER:
Expand All @@ -175,13 +174,13 @@ bool Ast::needs_wrap(const AstNode* a) {
case AstKind::DOT:
case AstKind::DEF:
case AstKind::TAG:
case AstKind::CAP:
return false;
case AstKind::ALT:
case AstKind::CAT:
case AstKind::DIFF:
case AstKind::REF:
return true;
case AstKind::CAP:
return !a->cap.capturing;
}
return false; // unreachable
}
Expand Down
11 changes: 7 additions & 4 deletions src/parse/ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ enum class AstKind: uint32_t {
ITER, // generalized repetition of two nodes: x{n,m} or x{n,} or x{n} or x* or x+
DIFF, // difference of two node (only applies to character classes)
TAG, // a tag, like @t (s-tag, single-valued tag) or #t (m-tag. multi-valued tag)
CAP, // capturing group (submatch group)
REF // non-capturing group
CAP // capturing or non-capturing group
};

// A character (symbol) in the abstract syntax tree.
Expand Down Expand Up @@ -92,8 +91,10 @@ struct AstNode {
const char* name;
bool history;
} tag;
const AstNode* cap;
const AstNode* ref;
struct {
const AstNode* ast;
bool capturing;
} cap;
};
loc_t loc;
bool has_caps; // whether this AST has nested capturing groups
Expand Down Expand Up @@ -205,6 +206,8 @@ class Ast {
const char* cstr_local(const uint8_t* s, const uint8_t* e);
const char* cstr_global(const uint8_t* s, const uint8_t* e);

static bool is_capturing(const AstNode* a);

// Whether this AST node must be wrapped in implicit parentheses to ensure correct operator
// precedence. This happens with named definitions, for example `x = "a"|"aa"` used in `x "b"`
// is parsed as `("a"|"aa")"b"`, not `"a"|"aab"`. However, such implicit groups do no exist in
Expand Down
50 changes: 18 additions & 32 deletions src/regexp/ast_to_re.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,14 @@ LOCAL_NODISCARD(Regexp* capture_tags(
size_t ncap = *pncap, lcap = ncap;

const AstNode* ast = *psub;
if (ast->kind == AstKind::CAP) {
if (Ast::is_capturing(ast)) {
// save the range of repeated captures and collapse them: (...(R)...) -> (R)
for (ast = ast->cap; ast && ast->kind == AstKind::CAP; ast = ast->cap) {
for (ast = ast->cap.ast; ast && Ast::is_capturing(ast); ast = ast->cap.ast) {
++ncap;
}
// dereference to avoid future check for non-parenthesized rerefences
if (ast->kind == AstKind::REF) {
ast = ast->ref;
if (ast->kind == AstKind::CAP) { // non-capturing group
ast = ast->cap.ast;
}
*psub = ast;
}
Expand All @@ -116,7 +116,7 @@ LOCAL_NODISCARD(Regexp* capture_tags(

LOCAL_NODISCARD(Regexp* structural_tags(
RESpec& spec, DfsAstToRe& x, const AstNode* sub, size_t* pncap)) {
if (sub->kind == AstKind::CAP) {
if (Ast::is_capturing(sub)) {
// If this sub-AST is already a capture, no need for structural tags.
} else if (spec.opts->tags_automatic) {
// Full parsing: automatically add tags as if this sub-regexp was a capture.
Expand Down Expand Up @@ -314,12 +314,8 @@ LOCAL_NODISCARD(Ret diff_to_range(RESpec& spec,
break;

case AstKind::CAP:
if (spec.opts->tags_posix_syntax) goto error;
x.ast = ast->cap; // replace on stack
break;

case AstKind::REF:
x.ast = ast->ref; // replace on stack
if (Ast::is_capturing(ast) && spec.opts->tags_posix_syntax) goto error;
x.ast = ast->cap.ast; // replace on stack
break;

case AstKind::DIFF:
Expand Down Expand Up @@ -380,7 +376,7 @@ LOCAL_NODISCARD(Ret ast_to_re(RESpec& spec,

const AstNode* ast = x.ast;

if (ast->kind != AstKind::CAP && ast->kind != AstKind::REF) ++x.height;
if (ast->kind != AstKind::CAP) ++x.height;

switch (ast->kind) {
case AstKind::NIL:
Expand Down Expand Up @@ -432,27 +428,17 @@ LOCAL_NODISCARD(Ret ast_to_re(RESpec& spec,

case AstKind::CAP:
if (!opts->tags_posix_syntax) { // ordinary group, replace with subexpr on stack
x.ast = ast->cap;
} else { // capturing group
if (x.succ == 0) { // 1st visit: push successor
++x.succ;
x.re1 = capture_tags(spec, x, false, &ast, pncap);
stack.emplace_back(ast, x.height, x.in_iter);
} else { // 2nd visit: return
re = insert_between_tags(spec, x.re1, re);
stack.pop_back();
}
}
break;

case AstKind::REF:
if (!opts->tags_posix_semantics) { // ordinary group, replace with subexpr on stack
x.ast = ast->ref;
} else { // non-capturing group
x.ast = ast->cap.ast;
} else { // capturing or non-capturing group
if (x.succ == 0) { // 1st visit: push successor
++x.succ;
x.re1 = structural_tags(spec, x, ast->ref, pncap);
stack.emplace_back(ast->ref, x.height, x.in_iter);
if (Ast::is_capturing(ast)) {
x.re1 = capture_tags(spec, x, false, &ast, pncap);
stack.emplace_back(ast, x.height, x.in_iter);
} else {
x.re1 = structural_tags(spec, x, ast->cap.ast, pncap);
stack.emplace_back(ast->cap.ast, x.height, x.in_iter);
}
} else { // 2nd visit: return
re = insert_between_tags(spec, x.re1, re);
stack.pop_back();
Expand Down Expand Up @@ -500,7 +486,7 @@ LOCAL_NODISCARD(Ret ast_to_re(RESpec& spec,
++x.succ;
const uint32_t m = ast->iter.max;
ast = ast->iter.ast;
if ((opts->tags_posix_semantics && ast->kind == AstKind::CAP)
if ((opts->tags_posix_semantics && Ast::is_capturing(ast))
|| opts->tags_automatic) {
x.re1 = capture_tags(spec, x, m > 1, &ast, pncap);
}
Expand Down

0 comments on commit bf3ff8f

Please sign in to comment.