diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 06905fb5..a52d6aec 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,7 @@ Not Yet Released Code change: . +* [Feature]: Support Comment for PEG Grammar. `#69 `_ * [Feature]: Support Unicode Character Categories for P4_Range. `#65 `_, `#67 `_ 1.11.0 (8 Apr, 2021) diff --git a/ROADMAP.md b/ROADMAP.md index acb7d90d..92a02253 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -11,7 +11,6 @@ - [ ] api: Sanitize `\u0000` to whitespace for the source input, this happens in creating the source/setting the source size. - [ ] api: left recursion. https://github.com/orlandohill/peg-left-recursion - [ ] api: register a function for matching source. This should help dealing with some inputs difficult to parse. -- [ ] api: support comment in peg grammar - [ ] api: Support UTF-8 BOM sequence (0xEF 0xBB 0xBF) at the start of source. - [ ] peg: Numeric. - [ ] peg: Complement. @@ -23,6 +22,7 @@ - [ ] perf: pre-alloc tokens. - [ ] perf: Cache literal len. - [ ] perf: backrefs is not necessary if there is no BackReference in Sequence. +- [ ] perf: trace: add a tracer in P4_Source. When matching, annotate the tracer. An additional tool can aggregate data and output a DOT / compile to png. - [ ] perf: tracer: https://pegjs.org/documentation https://github.com/orlandohill/peg-left-recursion - [ ] binding: python: example: cffi, misaka, parsimonious (api). . @@ -41,13 +41,14 @@ - [ ] Pratt parser: https://en.wikipedia.org/wiki/Operator-precedence_parser - [ ] build: static lib. - [ ] build: wasm. `docker run --rm -v $(pwd):/src -u $(id -u):$(id -g) emscripten/emsdk emcc peppapeg.c -Os -s WASM=1 -s SIDE_MODULE=1 -o /src/peppapeg.wasm`. https://gist.github.com/kripken/59c67556dc03bb6d57052fedef1e61ab https://github.com/mbasso/awesome-wasm +- [x] api: support comment in peg grammar. Added in v1.12.0. - [x] peg: CharacterSet. can use range. -- [x] api: Support more spaced rules. Added in v1.11.0. - [x] peg: extend range: `[0-9..2] / [a-z] / [\p{L}] / [\u{1}-\u{10ffff}]`. Added in v1.12.0. - [x] peg: built-in rules: letters. - [x] peg: built-in rules: unicode letters. - [x] peg: built-in rules: digits. - [x] peg: built-in rules: unicode digits. +- [x] api: Support more spaced rules. Added in v1.11.0. - [x] api: `P4_AcquireSourceAst(source, &ast)`: set ast, reset source. It's useful when we need the parsed result but not care about source itself. Token tree should now owned by ast and shall then be free by the caller. Added in v1.11.0 - [x] docs: add explanations. - [x] GetErrorString. diff --git a/docs/peg.rst b/docs/peg.rst index 8259622f..46a3a610 100644 --- a/docs/peg.rst +++ b/docs/peg.rst @@ -246,6 +246,18 @@ Repeat **matches the sub-expression several times**. hex = "\u{" ([0-9] / [a-z] / [A-Z]){1,6} "}"; +Comment +------- + +Comment are any characters followed by a # (included) in a line. + +.. code-block:: + + # THIS IS A COMMENT. + rule = "hello"; # THIS IS ANOTHER COMMENT. + +Comments are ignored. + Grammar Rule Flags ------------------ @@ -446,3 +458,5 @@ Cheatsheet - repeat between m-n times * - `foo{m}` - repeat exact n times + * - `# IGNORE` + - comment diff --git a/peppapeg.c b/peppapeg.c index 6b7c1305..3ff99615 100644 --- a/peppapeg.c +++ b/peppapeg.c @@ -3585,6 +3585,22 @@ P4_Grammar* P4_CreatePegGrammar () { P4_FLAG_LIFTED | P4_FLAG_SPACED)) goto finalize; + if (P4_Ok != P4_AddSequenceWithMembers(grammar, P4_PegRuleComment, 3, + P4_CreateLiteral("#", true), + P4_CreateZeroOrMore( + P4_CreateSequenceWithMembers(2, + P4_CreateNegative(P4_CreateLiteral("\n", true)), + P4_CreateRange(0x1, 0x10ffff, 1) + ) + ), + P4_CreateZeroOrOnce(P4_CreateLiteral("\n", true)) + )) + goto finalize; + + if (P4_Ok != P4_SetGrammarRuleFlag(grammar, P4_PegRuleComment, + P4_FLAG_LIFTED | P4_FLAG_SPACED)) + goto finalize; + return grammar; finalize: diff --git a/peppapeg.h b/peppapeg.h index 20df363e..ea067adf 100644 --- a/peppapeg.h +++ b/peppapeg.h @@ -252,6 +252,7 @@ typedef enum { P4_PegRuleDot = 28, P4_PegRuleWhitespace = 29, P4_PegRuleRangeCategory = 30, + P4_PegRuleComment = 31, } P4_PegRuleID; /* diff --git a/tests/test_peg.c b/tests/test_peg.c index 68b93fbb..22d9253a 100644 --- a/tests/test_peg.c +++ b/tests/test_peg.c @@ -525,6 +525,9 @@ void test_eval_grammar(void) { ASSERT_EVAL_GRAMMAR("R1 = .;", "R1", "好", P4_Ok, "[{\"slice\":[0,3],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = \"a\"* !.;", "R1", "aaab", P4_MatchError, "[]"); ASSERT_EVAL_GRAMMAR("R1 = \"a\"*;", "R1", "aaab", P4_Ok, "[{\"slice\":[0,3],\"type\":\"R1\"}]"); + ASSERT_EVAL_GRAMMAR("R1 = \"1\"# R1 = \"3\";\n;", "R1", "1", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); + ASSERT_EVAL_GRAMMAR("# R1 = \"2\";\nR1 = \"1\";", "R1", "1", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); + ASSERT_EVAL_GRAMMAR("R1 = \"1\";##", "R1", "1", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR( "R1 = R2; "