diff --git a/docs/peg.rst b/docs/peg.rst index 5da8e7b0..890798d8 100644 --- a/docs/peg.rst +++ b/docs/peg.rst @@ -64,7 +64,13 @@ Emoji is supported: greeting = "Peppa 🐷"; -You can encode UTF-8 code points via `\\u` followed by 4 hex digits or `\\U` followed by 8 hex digits. +You can encode ASCII characters via `\\x` followed by 2 hex digits. + +.. code-block:: + + greeting = "\x48\x65\x6c\x6c\x6f, world"; + +You can encode UTF-8 characters via `\\u` followed by 4 hex digits or `\\U` followed by 8 hex digits. .. code-block:: @@ -360,6 +366,24 @@ In this example, the rule `float` will drop all `number` nodes, leaving only one number = [0-9]; +@scoped +``````` + +Ignore all the decorators set by upstream rules. + +For example, despite `greeting2` set to not using spaced rule `ws`, `greeting` can still apply to `ws` since it's under its own scope. + +.. code-block:: + + @tight + greeting2 = greeting greeting; + + @scoped + greeting = "hello" "world"; + + @spaced + ws = " "; + Use Peg API ------------ @@ -425,6 +449,10 @@ Cheatsheet - cancle effects * - `"literal"` - exact match + * - `"\x0d\x0a"` + - exact match by using ascii digits + * - `"\u4f60\u597D"` + - exact match utf-8 characters * - `i"literal"` - case-insensitive match * - `[a-z]` diff --git a/peppapeg.c b/peppapeg.c index e7ce2c64..3f7af66b 100644 --- a/peppapeg.c +++ b/peppapeg.c @@ -831,6 +831,12 @@ size_t P4_ReadEscapedRune(char* text, P4_Rune* rune) { case '"': *rune = 0x22; return 2; case '/': *rune = 0x2f; return 2; case '\\': *rune = 0x5c; return 2; + case 'x': { /* TODO: may not have enough chars. */ + char chs[3] = {0, 0, 0}; + memcpy(chs, text + 2, 2); + *rune = strtoul(chs, NULL, 16); + return 4; + } case 'u': { /* TODO: may not have enough chars. */ char chs[5] = {0, 0, 0, 0, 0}; memcpy(chs, text + 2, 4); @@ -3218,7 +3224,7 @@ P4_Grammar* P4_CreatePegGrammar () { P4_CreateRange(0x5d, 0x10ffff, 1), P4_CreateSequenceWithMembers(2, P4_CreateLiteral("\\", true), - P4_CreateChoiceWithMembers(10, + P4_CreateChoiceWithMembers(11, P4_CreateLiteral("\"", true), P4_CreateLiteral("/", true), P4_CreateLiteral("\\", true), @@ -3227,6 +3233,16 @@ P4_Grammar* P4_CreatePegGrammar () { P4_CreateLiteral("n", true), P4_CreateLiteral("r", true), P4_CreateLiteral("t", true), + P4_CreateSequenceWithMembers(2, + P4_CreateLiteral("x", true), + P4_CreateRepeatExact( + P4_CreateChoiceWithMembers(3, + P4_CreateRange('0', '9', 1), + P4_CreateRange('a', 'f', 1), + P4_CreateRange('A', 'F', 1) + ), 2 + ) + ), P4_CreateSequenceWithMembers(2, P4_CreateLiteral("u", true), P4_CreateRepeatExact( diff --git a/tests/test_peg.c b/tests/test_peg.c index ec828abf..a6da4932 100644 --- a/tests/test_peg.c +++ b/tests/test_peg.c @@ -311,7 +311,9 @@ void test_eval_literal(void) { ASSERT_EVAL_GRAMMAR("R1 = \"你好, World\";", "R1", "你好, World", P4_Ok, "[{\"slice\":[0,13],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = \"نامهای\";", "R1", "نامهای", P4_Ok, "[{\"slice\":[0,12],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = \"Peppa PEG 🐷\";", "R1", "Peppa PEG 🐷", P4_Ok, "[{\"slice\":[0,14],\"type\":\"R1\"}]"); + ASSERT_EVAL_GRAMMAR("R1 = \"\\x48\\x65\\x6c\\x6c\\x6f, world\";", "R1", "Hello, world", P4_Ok, "[{\"slice\":[0,12],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = \"\\u4f60\\u597d, world\";", "R1", "你好, world", P4_Ok, "[{\"slice\":[0,13],\"type\":\"R1\"}]"); + /* ASSERT_EVAL_GRAMMAR("R1 = \"\\xe4\\xbd\\xa0\\xe5\\xa5\\xbd, world\";", "R1", "你好, world", P4_Ok, "[{\"slice\":[0,13],\"type\":\"R1\"}]"); */ /* Not sure if this one should be the expected behavior. */ ASSERT_EVAL_GRAMMAR("R1 = \"\\n\";", "R1", "\n", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = \"\\r\";", "R1", "\r", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = \"\\t\";", "R1", "\t", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); @@ -327,6 +329,7 @@ void test_eval_insensitive(void) { ASSERT_EVAL_GRAMMAR("R1 = i\"你好, World\";", "R1", "你好, WORLD", P4_Ok, "[{\"slice\":[0,13],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = i\"نامهای\";", "R1", "نامهای", P4_Ok, "[{\"slice\":[0,12],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = i\"Peppa PEG 🐷\";", "R1", "peppa peg 🐷", P4_Ok, "[{\"slice\":[0,14],\"type\":\"R1\"}]"); + ASSERT_EVAL_GRAMMAR("R1 = i\"\\x48\\x65\\x6c\\x6c\\x6f, world\";", "R1", "HELLO, WORLD", P4_Ok, "[{\"slice\":[0,12],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = i\"\\u4f60\\u597d, world\";", "R1", "你好, WORLD", P4_Ok, "[{\"slice\":[0,13],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = i\"Hello Worìd\";", "R1", "HELLO WORÌD", P4_Ok, "[{\"slice\":[0,12],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = i\"\\n\";", "R1", "\n", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); @@ -339,6 +342,9 @@ void test_eval_range(void) { ASSERT_EVAL_GRAMMAR("R1 = [0-9];", "R1", "0", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = [a-z];", "R1", "a", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = [A-Z];", "R1", "A", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); + ASSERT_EVAL_GRAMMAR("R1 = [\\x30-\\x39];", "R1", "0", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); + ASSERT_EVAL_GRAMMAR("R1 = [\\x41-\\x5A];", "R1", "A", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); + ASSERT_EVAL_GRAMMAR("R1 = [\\x61-\\x7A];", "R1", "a", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = [\\u0001-\\U0010ffff];", "R1", "a", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = [你-好];", "R1", "你", P4_Ok, "[{\"slice\":[0,3],\"type\":\"R1\"}]"); ASSERT_EVAL_GRAMMAR("R1 = [1-9..2];", "R1", "1", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]");