Skip to content

Commit

Permalink
Merge pull request #112 from soasme/leftrecur
Browse files Browse the repository at this point in the history
Left Recursion (C API)
  • Loading branch information
soasme committed Aug 26, 2021
2 parents 34e0c2e + 01f94c2 commit 2e35ede
Show file tree
Hide file tree
Showing 4 changed files with 489 additions and 5 deletions.
125 changes: 121 additions & 4 deletions peppa.c
Expand Up @@ -612,6 +612,12 @@ struct P4_Expression {
size_t repeat_min;
size_t repeat_max;
};

/** Used by P4_LeftRecursion. */
struct {
P4_Expression* lhs;
P4_Expression* rhs;
};
};
};

Expand Down Expand Up @@ -851,6 +857,7 @@ P4_PRIVATE(P4_Node*) match_cut(P4_Source*, P4_Expression*);
P4_PRIVATE(P4_Node*) match_sequence(P4_Source*, P4_Expression*);
P4_PRIVATE(P4_Node*) match_choice(P4_Source*, P4_Expression*);
P4_PRIVATE(P4_Node*) match_repeat(P4_Source*, P4_Expression*);
P4_PRIVATE(P4_Node*) match_left_recursion(P4_Source*, P4_Expression*);
P4_PRIVATE(P4_Node*) match_spaced_rules(P4_Source*, P4_Expression*);
P4_PRIVATE(P4_Node*) match_back_reference(P4_Source*, P4_Expression*, P4_Slice*, P4_Expression*);

Expand Down Expand Up @@ -2207,6 +2214,87 @@ match_repeat(P4_Source* s, P4_Expression* e) {
return NULL;
}

P4_PRIVATE(P4_Node*)
match_left_recursion(P4_Source* s, P4_Expression* e) {
assert(no_error(s), "can't proceed due to a failed match");

mark_position(s, startpos);

P4_Node* toklhs = match_expression(s, e->lhs);
P4_Node* tokrhs = NULL;
P4_Node* toktmp = NULL;
P4_Node* whitespace = NULL;

if (need_lift(s, e)) {
P4_MatchRaisef(s, P4_PegError,
"left recursion rule %s cannot be lifted", peek_rule_name(s));
return NULL;
}

if (!no_error(s)) {
P4_MatchRaisef(s, P4_MatchError, "expect %s", peek_rule_name(s));
goto finalize;
}

bool space = need_whitespace(s);

do {
/* left recursion until the end of input. */
if (is_end(s))
break;

/* match implicit whitespace. */
mark_position(s, whitespace_startpos);
if (space) {
whitespace = match_spaced_rules(s, NULL);
if (!no_error(s)) goto finalize;
}

/* attempt to match rhs. */
tokrhs = match_expression(s, e->rhs);

/* if no match, puke the parsed whitespace, if any. */
if (no_match(s)) {
if (space) { set_position(s, whitespace_startpos); }
break;
}

if (!no_error(s)) {
P4_MatchRaisef(s, P4_MatchError, "expect %s", peek_rule_name(s));
goto finalize;
}

mark_position(s, endpos);

/* adopt all lhs,ws,rhs tokens under the hierarchy of e. */
catch_oom(toktmp = P4_CreateNode(s->content, startpos, endpos, e->name));
P4_AdoptNode(toktmp->head, toktmp->tail, toklhs);
P4_AdoptNode(toktmp->head, toktmp->tail, whitespace);
P4_AdoptNode(toktmp->head, toktmp->tail, tokrhs);
whitespace = tokrhs = NULL;

/* if only one child and e is nonterminal, keep the child. */
if (is_non_terminal(e) && toktmp->head == toktmp->tail && toktmp->head != NULL) {
toklhs = toktmp->head;
toktmp->head = NULL;
toktmp->tail = NULL;
P4_DeleteNode(s->grammar, toktmp);
} else {
/* on next loop, we will use the parsed inputs as lhs and
* attempt to match rhs, if any. */
toklhs = toktmp;
}
} while (true);

return toklhs;

finalize:
P4_DeleteNode(s->grammar, toklhs);
P4_DeleteNode(s->grammar, whitespace);
P4_DeleteNode(s->grammar, tokrhs);
return NULL;
}

P4_PRIVATE(P4_Node*)
match_positive(P4_Source* s, P4_Expression* e) {
assert(no_error(s) && e->ref_expr != NULL, "expression should not be null");
Expand Down Expand Up @@ -2284,6 +2372,7 @@ match_expression(P4_Source* s, P4_Expression* e) {
case P4_Positive: result = match_positive(s, e); break;
case P4_Negative: result = match_negative(s, e); break;
case P4_Repeat: result = match_repeat(s, e); break;
case P4_LeftRecursion: result = match_left_recursion(s, e); break;
case P4_Cut: panic("cut can be applied only in sequence.");
case P4_BackReference: panic("backreference can be applied only in sequence.");
default: panicf("invalid dispatch kind: %zu.", e->kind);
Expand Down Expand Up @@ -2587,6 +2676,17 @@ P4_CreateCut() {
return expr;
}

P4_PUBLIC P4_Expression*
P4_CreateLeftRecursion(P4_Expression* lhs, P4_Expression* rhs) {
P4_Expression* expr = P4_MALLOC(sizeof(P4_Expression));
expr->kind = P4_LeftRecursion;
expr->flag = 0;
expr->name = NULL;
expr->lhs = lhs;
expr->rhs = rhs;
return expr;
}

P4_PRIVATE(P4_Expression*)
P4_CreateContainer(size_t count) {
if (count == 0)
Expand Down Expand Up @@ -3268,6 +3368,12 @@ P4_Expression* P4_CreateJoin(const P4_String joiner, P4_String reference) {
);
}

P4_PUBLIC P4_Error
P4_AddLeftRecursion(P4_Grammar* grammar, P4_String name, P4_Expression* lhs, P4_Expression* rhs) {
P4_AddSomeGrammarRule(grammar, name, P4_CreateLeftRecursion(lhs, rhs));
return P4_Ok;
}

P4_PUBLIC void
P4_DeleteExpression(P4_Expression* expr) {
if (expr == NULL)
Expand Down Expand Up @@ -3308,6 +3414,9 @@ P4_DeleteExpression(P4_Expression* expr) {
if (expr->repeat_expr)
P4_DeleteExpression(expr->repeat_expr);
break;
case P4_LeftRecursion:
P4_DeleteExpression(expr->lhs);
P4_DeleteExpression(expr->rhs);
default:
break;
}
Expand Down Expand Up @@ -3560,6 +3669,10 @@ P4_RefreshReference(P4_Expression* expr, P4_String name) {
case P4_Repeat:
catch_err(P4_RefreshReference(expr->repeat_expr, name));
break;
case P4_LeftRecursion:
catch_err(P4_RefreshReference(expr->lhs, name));
catch_err(P4_RefreshReference(expr->rhs, name));
break;
default:
break;
}
Expand Down Expand Up @@ -4396,7 +4509,7 @@ P4_PegEvalGrammarReferences(
P4_Grammar* grammar,
P4_Expression* expr,
P4_Result* result) {
# define recursive(e) \
# define recursively_eval_reference(e) \
if ((e)) \
return P4_PegEvalGrammarReferences(grammar, (e), result);

Expand All @@ -4421,15 +4534,19 @@ P4_PegEvalGrammarReferences(
/* recursively check non-reference expressions. */
case P4_Positive:
case P4_Negative:
recursive(expr->ref_expr);
recursively_eval_reference(expr->ref_expr);
break;
case P4_Sequence:
case P4_Choice:
for (i = 0; i < expr->count; i++)
recursive(expr->members[i])
recursively_eval_reference(expr->members[i])
break;
case P4_Repeat:
recursive(expr->repeat_expr);
recursively_eval_reference(expr->repeat_expr);
break;
case P4_LeftRecursion:
recursively_eval_reference(expr->lhs);
recursively_eval_reference(expr->rhs);
break;
default: break;
}
Expand Down
24 changes: 23 additions & 1 deletion peppa.h
Expand Up @@ -215,7 +215,9 @@ typedef enum {
*/
P4_Repeat,
/** Rule: Cut. */
P4_Cut
P4_Cut,
/** Rule: Left Recursion. */
P4_LeftRecursion
} P4_ExpressionKind;

/**
Expand Down Expand Up @@ -694,6 +696,26 @@ P4_Error P4_AddNegative(P4_Grammar* grammar, P4_String name, P4_Expression
*/
P4_Expression* P4_CreateCut();

/**
* Create a P4_LeftRecursion expression.
*
* @param lhs Left-hand side of left recursion.
* @param rhs Right-hand side of left recursion.
* @return A P4_Expression.
*/
P4_Expression* P4_CreateLeftRecursion(P4_Expression* lhs, P4_Expression* rhs);

/**
* Add a P4_LeftRecursion expression as grammar rule.
*
* @param grammar The grammar.
* @param name The grammar rule name.
* @param lhs Left-hand side of left recursion.
* @param rhs Right-hand side of left recursion.
* @return The error code.
*/
P4_Error P4_AddLeftRecursion(P4_Grammar* grammar, P4_String name, P4_Expression* lhs, P4_Expression* rhs);

/**
* Create a P4_Sequence expression.
*
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Expand Up @@ -16,6 +16,7 @@ set(
test_flags
test_cut
test_back_reference
test_left_recursion
test_misc
test_peg
test_example_mustache
Expand Down

0 comments on commit 2e35ede

Please sign in to comment.