Skip to content

Commit

Permalink
feat(parse): lookahead w/ configurable capture
Browse files Browse the repository at this point in the history
  • Loading branch information
postspectacular committed Jul 6, 2020
1 parent 6542318 commit 542c066
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 13 deletions.
54 changes: 44 additions & 10 deletions packages/parse/src/combinators/lookahead.ts
Original file line number Diff line number Diff line change
@@ -1,46 +1,80 @@
import type { Parser } from "../api";

/**
* Repeatedly runs look-`ahead` and `main` parsers until the former
* succeeds or end of input is reached.
* Repeatedly runs `main` and look-`ahead` parsers for as long as the
* former succeeds and UNTIL the latter passes or end of input is
* reached. If the `ahead` parser never passes, the entire parser fails
* and any partial matches are discarded.
*
* @remarks
* Result of `ahead` parser will NOT be cosumed and on successful match
* the final read position will be at beginning of `ahead` pattern. If
* the `ahead` parser never succeeds, the entire parser fails and any
* partial matches are discarded.
* Depending on `capture` (default: false), the result of the `ahead`
* parser is captured or omitted and the final read position is adjusted
* accordingly.
*
* Currently, iff `capture` is disabled, the `ahead` parser MUST discard
* its own result (e.g. via {@link discard}). On successful match the
* final read position will then be restored to the beginning of `ahead`
* pattern.
*
* Iff `capture` is enabled, the `ahead` parser MAY discard its own
* result, but the final read position will not be re-adjusted as in the
* non-capturing version.
*
* **Important:** Since the main term will be repeated automatically, DO
* NOT use repetition modifiers `?` or `*`, since both of these will
* cause the parser to go into an infinite loop. This is expected
* behavior and not a bug.
*
* @example
* ```ts
* const ctx = defContext("ababaaabbabba");
*
* // consume while 'a' or `b` until 1st occurrence of "abba"
* // consume while 'a' or `b` and until 1st occurrence of "abba"...
* // note the use of `stringD()` to discard lookahead result
*
* // non-capturing lookahead
* join(lookahead(oneOf("ab"), stringD("abba")))(ctx)
* // true
*
* ctx.result
* // 'ababaa'
* // "ababaa"
*
* ctx.state
* // { p: 6, l: 1, c: 7, done: false, last: 'a' }
* ```
*
* @example
* ```ts
* const ctx = defContext("ababaaabbabba");
*
* // capturing lookahead
* join(lookahead(oneOf("ab"), string("abba"), true))(ctx)
* // true
*
* ctx.result
* // "ababaaabba"
*
* ctx.state
* // { p: 10, l: 1, c: 11, done: false, last: 'a' }
* ```
*
* @param parser
* @param ahead
* @param id
*/
export const lookahead = <T>(
parser: Parser<T>,
ahead: Parser<T>,
capture = false,
id = "lookahead"
): Parser<T> => (ctx) => {
if (ctx.done) return false;
ctx.start(id);
let pass = false;
while (true) {
const state = { ...ctx.state };
const state = capture ? null : { ...ctx.state };
if (ahead(ctx)) {
ctx.state = state;
!capture && (ctx.state = state!);
return pass ? ctx.end() : ctx.discard();
}
if (!parser(ctx)) return ctx.discard();
Expand Down
80 changes: 77 additions & 3 deletions packages/parse/test/lookahead.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
import * as assert from "assert";
import { defContext, join, lookahead, oneOf, stringD, string } from "../src";
import {
defContext,
defGrammar,
join,
lookahead,
oneOf,
string,
stringD,
} from "../src";

describe("lookahead", () => {
it("oneof", () => {
it("oneof (no capture)", () => {
const ctx = defContext("ababaaabbabba");
assert(join(lookahead(oneOf("ab"), stringD("abba")))(ctx));
assert.equal(ctx.result, "ababaa");
Expand All @@ -14,9 +22,25 @@ describe("lookahead", () => {
last: "a",
});
assert(string("abba")(ctx));
assert(!ctx.done);
});

it("string", () => {
it("oneof (capture)", () => {
const ctx = defContext("ababaaabbabba");
assert(join(lookahead(oneOf("ab"), string("abba"), true))(ctx));
assert.equal(ctx.result, "ababaaabba");
assert.deepEqual(ctx.state, {
p: 10,
l: 1,
c: 11,
done: false,
last: "a",
});
assert(string("bba")(ctx));
assert(ctx.done);
});

it("string (no capture)", () => {
const ctx = defContext("abababbabba");
assert(join(lookahead(string("ab"), stringD("abba")))(ctx));
assert.equal(ctx.result, "abab");
Expand All @@ -28,5 +52,55 @@ describe("lookahead", () => {
last: "b",
});
assert(string("abba")(ctx));
assert(!ctx.done);
});

it("string (capture)", () => {
const ctx = defContext("abababbabba");
assert(join(lookahead(string("ab"), string("abba"), true))(ctx));
assert.equal(ctx.result, "abababba");
assert.deepEqual(ctx.state, {
p: 8,
l: 1,
c: 9,
done: false,
last: "a",
});
assert(string("bba")(ctx));
assert(ctx.done);
});

it("grammar (no capture)", () => {
const ctx = defContext("ababaaabbabba");
const lang = defGrammar(`foo: [ab](?-"abba"!) => join ;`);
assert(lang);
assert(lang.rules.foo(ctx));
assert.equal(ctx.result, "ababaa");
assert.deepEqual(ctx.state, {
p: 6,
l: 1,
c: 7,
done: false,
last: "a",
});
assert(string("abba")(ctx));
assert(!ctx.done);
});

it("grammar (capture)", () => {
const ctx = defContext("ababaaabbabba");
const lang = defGrammar(`foo: [ab](?+"abba") => join ;`);
assert(lang);
assert(lang.rules.foo(ctx));
assert.equal(ctx.result, "ababaaabba");
assert.deepEqual(ctx.state, {
p: 10,
l: 1,
c: 11,
done: false,
last: "a",
});
assert(string("bba")(ctx));
assert(ctx.done);
});
});

0 comments on commit 542c066

Please sign in to comment.