From 542c0662b4901a6cfd32a99e5241dace0ddde807 Mon Sep 17 00:00:00 2001 From: Karsten Schmidt Date: Mon, 6 Jul 2020 01:11:04 +0100 Subject: [PATCH] feat(parse): lookahead w/ configurable capture --- packages/parse/src/combinators/lookahead.ts | 54 +++++++++++--- packages/parse/test/lookahead.ts | 80 ++++++++++++++++++++- 2 files changed, 121 insertions(+), 13 deletions(-) diff --git a/packages/parse/src/combinators/lookahead.ts b/packages/parse/src/combinators/lookahead.ts index 603817f758..338e479e73 100644 --- a/packages/parse/src/combinators/lookahead.ts +++ b/packages/parse/src/combinators/lookahead.ts @@ -1,30 +1,63 @@ import type { Parser } from "../api"; /** - * Repeatedly runs look-`ahead` and `main` parsers until the former - * succeeds or end of input is reached. + * Repeatedly runs `main` and look-`ahead` parsers for as long as the + * former succeeds and UNTIL the latter passes or end of input is + * reached. If the `ahead` parser never passes, the entire parser fails + * and any partial matches are discarded. * * @remarks - * Result of `ahead` parser will NOT be cosumed and on successful match - * the final read position will be at beginning of `ahead` pattern. If - * the `ahead` parser never succeeds, the entire parser fails and any - * partial matches are discarded. + * Depending on `capture` (default: false), the result of the `ahead` + * parser is captured or omitted and the final read position is adjusted + * accordingly. + * + * Currently, iff `capture` is disabled, the `ahead` parser MUST discard + * its own result (e.g. via {@link discard}). On successful match the + * final read position will then be restored to the beginning of `ahead` + * pattern. + * + * Iff `capture` is enabled, the `ahead` parser MAY discard its own + * result, but the final read position will not be re-adjusted as in the + * non-capturing version. + * + * **Important:** Since the main term will be repeated automatically, DO + * NOT use repetition modifiers `?` or `*`, since both of these will + * cause the parser to go into an infinite loop. This is expected + * behavior and not a bug. * * @example * ```ts * const ctx = defContext("ababaaabbabba"); * - * // consume while 'a' or `b` until 1st occurrence of "abba" + * // consume while 'a' or `b` and until 1st occurrence of "abba"... + * // note the use of `stringD()` to discard lookahead result + * + * // non-capturing lookahead * join(lookahead(oneOf("ab"), stringD("abba")))(ctx) * // true * * ctx.result - * // 'ababaa' + * // "ababaa" * * ctx.state * // { p: 6, l: 1, c: 7, done: false, last: 'a' } * ``` * + * @example + * ```ts + * const ctx = defContext("ababaaabbabba"); + * + * // capturing lookahead + * join(lookahead(oneOf("ab"), string("abba"), true))(ctx) + * // true + * + * ctx.result + * // "ababaaabba" + * + * ctx.state + * // { p: 10, l: 1, c: 11, done: false, last: 'a' } + * ``` + * * @param parser * @param ahead * @param id @@ -32,15 +65,16 @@ import type { Parser } from "../api"; export const lookahead = ( parser: Parser, ahead: Parser, + capture = false, id = "lookahead" ): Parser => (ctx) => { if (ctx.done) return false; ctx.start(id); let pass = false; while (true) { - const state = { ...ctx.state }; + const state = capture ? null : { ...ctx.state }; if (ahead(ctx)) { - ctx.state = state; + !capture && (ctx.state = state!); return pass ? ctx.end() : ctx.discard(); } if (!parser(ctx)) return ctx.discard(); diff --git a/packages/parse/test/lookahead.ts b/packages/parse/test/lookahead.ts index 312384e7d5..bb076ccb72 100644 --- a/packages/parse/test/lookahead.ts +++ b/packages/parse/test/lookahead.ts @@ -1,8 +1,16 @@ import * as assert from "assert"; -import { defContext, join, lookahead, oneOf, stringD, string } from "../src"; +import { + defContext, + defGrammar, + join, + lookahead, + oneOf, + string, + stringD, +} from "../src"; describe("lookahead", () => { - it("oneof", () => { + it("oneof (no capture)", () => { const ctx = defContext("ababaaabbabba"); assert(join(lookahead(oneOf("ab"), stringD("abba")))(ctx)); assert.equal(ctx.result, "ababaa"); @@ -14,9 +22,25 @@ describe("lookahead", () => { last: "a", }); assert(string("abba")(ctx)); + assert(!ctx.done); }); - it("string", () => { + it("oneof (capture)", () => { + const ctx = defContext("ababaaabbabba"); + assert(join(lookahead(oneOf("ab"), string("abba"), true))(ctx)); + assert.equal(ctx.result, "ababaaabba"); + assert.deepEqual(ctx.state, { + p: 10, + l: 1, + c: 11, + done: false, + last: "a", + }); + assert(string("bba")(ctx)); + assert(ctx.done); + }); + + it("string (no capture)", () => { const ctx = defContext("abababbabba"); assert(join(lookahead(string("ab"), stringD("abba")))(ctx)); assert.equal(ctx.result, "abab"); @@ -28,5 +52,55 @@ describe("lookahead", () => { last: "b", }); assert(string("abba")(ctx)); + assert(!ctx.done); + }); + + it("string (capture)", () => { + const ctx = defContext("abababbabba"); + assert(join(lookahead(string("ab"), string("abba"), true))(ctx)); + assert.equal(ctx.result, "abababba"); + assert.deepEqual(ctx.state, { + p: 8, + l: 1, + c: 9, + done: false, + last: "a", + }); + assert(string("bba")(ctx)); + assert(ctx.done); + }); + + it("grammar (no capture)", () => { + const ctx = defContext("ababaaabbabba"); + const lang = defGrammar(`foo: [ab](?-"abba"!) => join ;`); + assert(lang); + assert(lang.rules.foo(ctx)); + assert.equal(ctx.result, "ababaa"); + assert.deepEqual(ctx.state, { + p: 6, + l: 1, + c: 7, + done: false, + last: "a", + }); + assert(string("abba")(ctx)); + assert(!ctx.done); + }); + + it("grammar (capture)", () => { + const ctx = defContext("ababaaabbabba"); + const lang = defGrammar(`foo: [ab](?+"abba") => join ;`); + assert(lang); + assert(lang.rules.foo(ctx)); + assert.equal(ctx.result, "ababaaabba"); + assert.deepEqual(ctx.state, { + p: 10, + l: 1, + c: 11, + done: false, + last: "a", + }); + assert(string("bba")(ctx)); + assert(ctx.done); }); });