Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add error code for unsupported PCRE cases (RE_EUNSUPPPCRE), reject one. #447

Merged
merged 4 commits into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/re/re.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ enum re_errno {
RE_EERRNO = 1 | RE_MISC,
RE_EBADDIALECT = 2 | RE_MISC,
RE_EBADGROUP = 3 | RE_MISC,
RE_EUNSUPCAPTUR = 4 | RE_MISC,
RE_EUNSUPPPCRE = 5 | RE_MISC,

RE_ENEGRANGE = 0 | RE_MARK | RE_GROUP,
RE_ENEGCOUNT = 1 | RE_MARK | RE_GROUP,
Expand Down
1 change: 1 addition & 0 deletions src/libre/ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ enum ast_flags {
AST_FLAG_ANCHORED_START = 1 << 6,
AST_FLAG_ANCHORED_END = 1 << 7,
AST_FLAG_END_NL = 1 << 8,
AST_FLAG_MATCHES_1NEWLINE= 1 << 9,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lol

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There will be even more niche start/end anchor flags coming soon. Expressing PCRE's anchoring rules statically gets complicated.


AST_FLAG_NONE = 0x00
};
Expand Down
103 changes: 98 additions & 5 deletions src/libre/ast_analysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,46 @@ set_flags_subtree(struct ast_expr *n, enum ast_flags flags)
}
}

static int
can_consume_single_newline(struct ast_expr *n)
{
if (!can_consume_input(n)) { return 0; }

if (n->flags & AST_FLAG_MATCHES_1NEWLINE) { return 1; }

switch (n->type) {
case AST_EXPR_LITERAL:
return n->u.literal.c == '\n';

case AST_EXPR_CODEPOINT:
return n->u.codepoint.u == (uint32_t)'\n';

case AST_EXPR_RANGE:
if ((n->u.range.from.type == AST_ENDPOINT_LITERAL) &&
(n->u.range.to.type == AST_ENDPOINT_LITERAL)) {
return n->u.range.from.u.literal.c <= '\n'
&& n->u.range.to.u.literal.c >= '\n';
} else if ((n->u.range.from.type == AST_ENDPOINT_CODEPOINT) &&
(n->u.range.to.type == AST_ENDPOINT_CODEPOINT)) {
return n->u.range.from.u.codepoint.u <= '\n'
&& n->u.range.to.u.codepoint.u >= '\n';
} else if (n->u.range.from.type == AST_ENDPOINT_NAMED) {
/* TODO: unreachable? */
katef marked this conversation as resolved.
Show resolved Hide resolved
break;
}
break;

case AST_EXPR_SUBTRACT:
return can_consume_single_newline(n->u.subtract.a)
&& !can_consume_single_newline(n->u.subtract.b);

default:
break;
}

return 0;
}

struct anchoring_env {
enum re_flags re_flags;

Expand All @@ -562,6 +602,7 @@ struct anchoring_env {

/* Corresponding flag for end anchors while sweeping backward. */
int followed_by_consuming;
int followed_by_consuming_newline;

int before_start_anchor;
};
Expand Down Expand Up @@ -644,6 +685,9 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
case AST_EXPR_LITERAL:
case AST_EXPR_CODEPOINT:
case AST_EXPR_RANGE:
if (can_consume_single_newline(n)) {
set_flags(n, AST_FLAG_MATCHES_1NEWLINE);
}
break; /* handled outside switch/case */

case AST_EXPR_CONCAT: {
Expand Down Expand Up @@ -810,6 +854,13 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
}
}

for (i = 0; i < n->u.concat.count; i++) {
struct ast_expr *child = n->u.concat.n[i];
if (can_consume_single_newline(child)) {
set_flags(n, AST_FLAG_MATCHES_1NEWLINE);
}
}

break;
}

Expand Down Expand Up @@ -846,6 +897,9 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
} else if (res == AST_ANALYSIS_OK) {
all_set_past_always_consuming &= child_env.past_always_consuming;
any_sat = 1;
} else if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE
|| res == AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE) {
continue;
} else {
return res;
}
Expand All @@ -858,6 +912,10 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
all_end_anchored = 0;
}
}

if (child->flags & AST_FLAG_MATCHES_1NEWLINE) {
set_flags(n, AST_FLAG_MATCHES_1NEWLINE);
}
}

if (!env->past_always_consuming && all_set_past_always_consuming) {
Expand Down Expand Up @@ -925,6 +983,10 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
return res;
}

if (can_consume_single_newline(n->u.repeat.e)) {
set_flags(n, AST_FLAG_MATCHES_1NEWLINE);
}

if (n->u.repeat.e->flags & AST_FLAG_ANCHORED_END && n->u.repeat.min > 0) {
/* FIXME: if repeating something that is always
* anchored at the end, repeat.max could be
Expand Down Expand Up @@ -964,6 +1026,11 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
} while(0)

PROPAGATE_CHILD_FLAGS("GROUP", n, n->u.group.e);

if (n->u.group.e->flags & AST_FLAG_MATCHES_1NEWLINE) {
set_flags(n, AST_FLAG_MATCHES_1NEWLINE);
}

break;

case AST_EXPR_SUBTRACT:
Expand Down Expand Up @@ -991,6 +1058,10 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
}
return res;
}
if (can_consume_single_newline(n->u.repeat.e)) {
set_flags(n, AST_FLAG_MATCHES_1NEWLINE);
}

break;

default:
Expand Down Expand Up @@ -1048,11 +1119,18 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
assert(n->flags & AST_FLAG_ANCHORED_END);

if (env->followed_by_consuming) {
LOG(3 - LOG_ANCHORING,
"%s: END anchor & followed_by_consuming, setting UNSATISFIABLE\n",
__func__);
set_flags(n, AST_FLAG_UNSATISFIABLE);
return AST_ANALYSIS_UNSATISFIABLE;
if (env->followed_by_consuming_newline) {
LOG(3 - LOG_ANCHORING,
"%s: END anchor & followed_by_consuming, returning UNSUPPORTED_PCRE\n",
__func__);
return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE;
} else {
LOG(3 - LOG_ANCHORING,
"%s: END anchor & followed_by_consuming, setting UNSATISFIABLE\n",
__func__);
set_flags(n, AST_FLAG_UNSATISFIABLE);
return AST_ANALYSIS_UNSATISFIABLE;
}
}

break;
Expand Down Expand Up @@ -1113,6 +1191,8 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
set_flags(n, AST_FLAG_UNSATISFIABLE);
}
} else if (res != AST_ANALYSIS_OK) {
LOG(3 - LOG_ANCHORING,
"%s: CONCAT: got res of %d, bubbling up\n", __func__, res);
return res;
}

Expand All @@ -1128,6 +1208,15 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
env->followed_by_consuming = 1;
}

if (!env->followed_by_consuming_newline &&
(child_env.followed_by_consuming_newline
|| child->flags & AST_FLAG_MATCHES_1NEWLINE)) {
LOG(3 - LOG_ANCHORING,
"%s: setting followed_by_consuming_newline due to child %p's analysis\n",
__func__, (void *)child);
env->followed_by_consuming_newline = 1;
}

if (!env->before_start_anchor && child_env.before_start_anchor
&& !is_nullable(child)) {
LOG(3 - LOG_ANCHORING,
Expand Down Expand Up @@ -1169,6 +1258,10 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
all_set_followed_by_consuming &= child_env.followed_by_consuming;
all_set_before_start_anchor &= child_env.before_start_anchor;
any_sat = 1;
} else if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE
|| res == AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE) {
LOG(3 - LOG_ANCHORING, "%s: got res of UNSUPPORTED_*, bubbling up\n", __func__);
return res;
} else {
return res;
}
Expand Down
4 changes: 3 additions & 1 deletion src/libre/ast_analysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ enum ast_analysis_res {
AST_ANALYSIS_UNSATISFIABLE,

AST_ANALYSIS_ERROR_NULL = -1,
AST_ANALYSIS_ERROR_MEMORY = -2
AST_ANALYSIS_ERROR_MEMORY = -2,
AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE = -3,
AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE = -4
};

enum ast_analysis_res
Expand Down
1 change: 1 addition & 0 deletions src/libre/print/tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ fprintf_flags(FILE *f, enum ast_flags flags)
PR_FLAG(END_NL, "N");
PR_FLAG(CAN_CONSUME, "c");
PR_FLAG(ALWAYS_CONSUMES, "C");
PR_FLAG(MATCHES_1NEWLINE, "n");

#undef PR_FLAG

Expand Down
10 changes: 9 additions & 1 deletion src/libre/re.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,15 @@ re_parse(enum re_dialect dialect, int (*getc)(void *opaque), void *opaque,

if (res < 0) {
ast_free(ast);
if (err != NULL) { err->e = RE_EERRNO; }
if (err != NULL) {
if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE) {
err->e = RE_EUNSUPPPCRE;
} else if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE) {
err->e = RE_EUNSUPCAPTUR;
} else if (err->e == RE_ESUCCESS) {
err->e = RE_EERRNO;
}
}
return NULL;
}

Expand Down
2 changes: 2 additions & 0 deletions src/libre/strerror.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ re_strerror(enum re_errno e)
case RE_EERRNO: return strerror(errno);
case RE_EBADDIALECT: return "Bad dialect";
case RE_EBADGROUP: return "Bad group";
case RE_EUNSUPCAPTUR: return "Cannot support captures in this case";
case RE_EUNSUPPPCRE: return "Unsupported PCRE edge case";

case RE_ENEGRANGE: return "Negative group range";
case RE_ENEGCOUNT: return "Negative count range";
Expand Down
1 change: 1 addition & 0 deletions tests/pcre/in48.re
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
^a|$[^x]b*
1 change: 1 addition & 0 deletions tests/pcre/out48.err
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
tests/pcre/in48.re: Unsupported PCRE edge case