From 4afa88c3300ecb3055a397d11ffd6a6684f0a2e1 Mon Sep 17 00:00:00 2001 From: soasme Date: Sat, 17 Apr 2021 10:48:28 +1200 Subject: [PATCH 1/4] peg: add comment to peg grammar syntax. --- peppapeg.c | 16 ++++++++++++++++ peppapeg.h | 1 + 2 files changed, 17 insertions(+) diff --git a/peppapeg.c b/peppapeg.c index 6b7c1305..3ff99615 100644 --- a/peppapeg.c +++ b/peppapeg.c @@ -3585,6 +3585,22 @@ P4_Grammar* P4_CreatePegGrammar () { P4_FLAG_LIFTED | P4_FLAG_SPACED)) goto finalize; + if (P4_Ok != P4_AddSequenceWithMembers(grammar, P4_PegRuleComment, 3, + P4_CreateLiteral("#", true), + P4_CreateZeroOrMore( + P4_CreateSequenceWithMembers(2, + P4_CreateNegative(P4_CreateLiteral("\n", true)), + P4_CreateRange(0x1, 0x10ffff, 1) + ) + ), + P4_CreateZeroOrOnce(P4_CreateLiteral("\n", true)) + )) + goto finalize; + + if (P4_Ok != P4_SetGrammarRuleFlag(grammar, P4_PegRuleComment, + P4_FLAG_LIFTED | P4_FLAG_SPACED)) + goto finalize; + return grammar; finalize: diff --git a/peppapeg.h b/peppapeg.h index 20df363e..ea067adf 100644 --- a/peppapeg.h +++ b/peppapeg.h @@ -252,6 +252,7 @@ typedef enum { P4_PegRuleDot = 28, P4_PegRuleWhitespace = 29, P4_PegRuleRangeCategory = 30, + P4_PegRuleComment = 31, } P4_PegRuleID; /* From 35b93d2c2f06ce4749c4732b1ee7108edb118896 Mon Sep 17 00:00:00 2001 From: soasme Date: Sat, 17 Apr 2021 10:48:40 +1200 Subject: [PATCH 2/4] docs: add comment to peg rst. --- docs/peg.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/peg.rst b/docs/peg.rst index 8259622f..46a3a610 100644 --- a/docs/peg.rst +++ b/docs/peg.rst @@ -246,6 +246,18 @@ Repeat **matches the sub-expression several times**. hex = "\u{" ([0-9] / [a-z] / [A-Z]){1,6} "}"; +Comment +------- + +Comment are any characters followed by a # (included) in a line. + +.. code-block:: + + # THIS IS A COMMENT. + rule = "hello"; # THIS IS ANOTHER COMMENT. + +Comments are ignored. + Grammar Rule Flags ------------------ @@ -446,3 +458,5 @@ Cheatsheet - repeat between m-n times * - `foo{m}` - repeat exact n times + * - `# IGNORE` + - comment From e378dd26de72eaabe0c43605adf7b0c986ac56b2 Mon Sep 17 00:00:00 2001 From: soasme Date: Sat, 17 Apr 2021 10:48:50 +1200 Subject: [PATCH 3/4] test: test comment for peg grammar. --- tests/test_peg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_peg.c b/tests/test_peg.c index 68b93fbb..22d9253a 100644 --- a/tests/test_peg.c +++ b/tests/test_peg.c @@ -525,6 +525,9 @@ void test_eval_grammar(void) { ASSERT_EVAL_GRAMMAR("R1 = .;", "R1", "好", P4_Ok, "[{\"slice\":[0,3],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = \"a\"* !.;", "R1", "aaab", P4_MatchError, "[]"); ASSERT_EVAL_GRAMMAR("R1 = \"a\"*;", "R1", "aaab", P4_Ok, "[{\"slice\":[0,3],\"type\":\"R1\"}]"); + ASSERT_EVAL_GRAMMAR("R1 = \"1\"# R1 = \"3\";\n;", "R1", "1", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); + ASSERT_EVAL_GRAMMAR("# R1 = \"2\";\nR1 = \"1\";", "R1", "1", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); + ASSERT_EVAL_GRAMMAR("R1 = \"1\";##", "R1", "1", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR( "R1 = R2; " From 55c6c7df3d26e1f0b77324f8eb296ced2ff4976e Mon Sep 17 00:00:00 2001 From: soasme Date: Sat, 17 Apr 2021 10:49:48 +1200 Subject: [PATCH 4/4] docs: update roadmap. --- CHANGELOG.rst | 1 + ROADMAP.md | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 06905fb5..a52d6aec 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,7 @@ Not Yet Released Code change: . +* [Feature]: Support Comment for PEG Grammar. `#69 `_ * [Feature]: Support Unicode Character Categories for P4_Range. `#65 `_, `#67 `_ 1.11.0 (8 Apr, 2021) diff --git a/ROADMAP.md b/ROADMAP.md index acb7d90d..92a02253 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -11,7 +11,6 @@ - [ ] api: Sanitize `\u0000` to whitespace for the source input, this happens in creating the source/setting the source size. - [ ] api: left recursion. https://github.com/orlandohill/peg-left-recursion - [ ] api: register a function for matching source. This should help dealing with some inputs difficult to parse. -- [ ] api: support comment in peg grammar - [ ] api: Support UTF-8 BOM sequence (0xEF 0xBB 0xBF) at the start of source. - [ ] peg: Numeric. - [ ] peg: Complement. @@ -23,6 +22,7 @@ - [ ] perf: pre-alloc tokens. - [ ] perf: Cache literal len. - [ ] perf: backrefs is not necessary if there is no BackReference in Sequence. +- [ ] perf: trace: add a tracer in P4_Source. When matching, annotate the tracer. An additional tool can aggregate data and output a DOT / compile to png. - [ ] perf: tracer: https://pegjs.org/documentation https://github.com/orlandohill/peg-left-recursion - [ ] binding: python: example: cffi, misaka, parsimonious (api). . @@ -41,13 +41,14 @@ - [ ] Pratt parser: https://en.wikipedia.org/wiki/Operator-precedence_parser - [ ] build: static lib. - [ ] build: wasm. `docker run --rm -v $(pwd):/src -u $(id -u):$(id -g) emscripten/emsdk emcc peppapeg.c -Os -s WASM=1 -s SIDE_MODULE=1 -o /src/peppapeg.wasm`. https://gist.github.com/kripken/59c67556dc03bb6d57052fedef1e61ab https://github.com/mbasso/awesome-wasm +- [x] api: support comment in peg grammar. Added in v1.12.0. - [x] peg: CharacterSet. can use range. -- [x] api: Support more spaced rules. Added in v1.11.0. - [x] peg: extend range: `[0-9..2] / [a-z] / [\p{L}] / [\u{1}-\u{10ffff}]`. Added in v1.12.0. - [x] peg: built-in rules: letters. - [x] peg: built-in rules: unicode letters. - [x] peg: built-in rules: digits. - [x] peg: built-in rules: unicode digits. +- [x] api: Support more spaced rules. Added in v1.11.0. - [x] api: `P4_AcquireSourceAst(source, &ast)`: set ast, reset source. It's useful when we need the parsed result but not care about source itself. Token tree should now owned by ast and shall then be free by the caller. Added in v1.11.0 - [x] docs: add explanations. - [x] GetErrorString.