Skip to content

Commit

Permalink
Merge pull request #80 from soasme/x
Browse files Browse the repository at this point in the history
peg: support \xXX
  • Loading branch information
soasme committed Jul 28, 2021
2 parents c6cc1f8 + c10d2fb commit 5a49607
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 2 deletions.
30 changes: 29 additions & 1 deletion docs/peg.rst
Expand Up @@ -64,7 +64,13 @@ Emoji is supported:
greeting = "Peppa 🐷";
You can encode UTF-8 code points via `\\u` followed by 4 hex digits or `\\U` followed by 8 hex digits.
You can encode ASCII characters via `\\x` followed by 2 hex digits.

.. code-block::
greeting = "\x48\x65\x6c\x6c\x6f, world";
You can encode UTF-8 characters via `\\u` followed by 4 hex digits or `\\U` followed by 8 hex digits.

.. code-block::
Expand Down Expand Up @@ -360,6 +366,24 @@ In this example, the rule `float` will drop all `number` nodes, leaving only one
number = [0-9];
@scoped
```````

Ignore all the decorators set by upstream rules.

For example, despite `greeting2` set to not using spaced rule `ws`, `greeting` can still apply to `ws` since it's under its own scope.

.. code-block::
@tight
greeting2 = greeting greeting;
@scoped
greeting = "hello" "world";
@spaced
ws = " ";
Use Peg API
------------
Expand Down Expand Up @@ -425,6 +449,10 @@ Cheatsheet
- cancle effects
* - `"literal"`
- exact match
* - `"\x0d\x0a"`
- exact match by using ascii digits
* - `"\u4f60\u597D"`
- exact match utf-8 characters
* - `i"literal"`
- case-insensitive match
* - `[a-z]`
Expand Down
18 changes: 17 additions & 1 deletion peppapeg.c
Expand Up @@ -831,6 +831,12 @@ size_t P4_ReadEscapedRune(char* text, P4_Rune* rune) {
case '"': *rune = 0x22; return 2;
case '/': *rune = 0x2f; return 2;
case '\\': *rune = 0x5c; return 2;
case 'x': { /* TODO: may not have enough chars. */
char chs[3] = {0, 0, 0};
memcpy(chs, text + 2, 2);
*rune = strtoul(chs, NULL, 16);
return 4;
}
case 'u': { /* TODO: may not have enough chars. */
char chs[5] = {0, 0, 0, 0, 0};
memcpy(chs, text + 2, 4);
Expand Down Expand Up @@ -3218,7 +3224,7 @@ P4_Grammar* P4_CreatePegGrammar () {
P4_CreateRange(0x5d, 0x10ffff, 1),
P4_CreateSequenceWithMembers(2,
P4_CreateLiteral("\\", true),
P4_CreateChoiceWithMembers(10,
P4_CreateChoiceWithMembers(11,
P4_CreateLiteral("\"", true),
P4_CreateLiteral("/", true),
P4_CreateLiteral("\\", true),
Expand All @@ -3227,6 +3233,16 @@ P4_Grammar* P4_CreatePegGrammar () {
P4_CreateLiteral("n", true),
P4_CreateLiteral("r", true),
P4_CreateLiteral("t", true),
P4_CreateSequenceWithMembers(2,
P4_CreateLiteral("x", true),
P4_CreateRepeatExact(
P4_CreateChoiceWithMembers(3,
P4_CreateRange('0', '9', 1),
P4_CreateRange('a', 'f', 1),
P4_CreateRange('A', 'F', 1)
), 2
)
),
P4_CreateSequenceWithMembers(2,
P4_CreateLiteral("u", true),
P4_CreateRepeatExact(
Expand Down
6 changes: 6 additions & 0 deletions tests/test_peg.c
Expand Up @@ -311,7 +311,9 @@ void test_eval_literal(void) {
ASSERT_EVAL_GRAMMAR("R1 = \"你好, World\";", "R1", "你好, World", P4_Ok, "[{\"slice\":[0,13],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = \"نامهای\";", "R1", "نامهای", P4_Ok, "[{\"slice\":[0,12],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = \"Peppa PEG 🐷\";", "R1", "Peppa PEG 🐷", P4_Ok, "[{\"slice\":[0,14],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = \"\\x48\\x65\\x6c\\x6c\\x6f, world\";", "R1", "Hello, world", P4_Ok, "[{\"slice\":[0,12],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = \"\\u4f60\\u597d, world\";", "R1", "你好, world", P4_Ok, "[{\"slice\":[0,13],\"type\":\"R1\"}]");
/* ASSERT_EVAL_GRAMMAR("R1 = \"\\xe4\\xbd\\xa0\\xe5\\xa5\\xbd, world\";", "R1", "你好, world", P4_Ok, "[{\"slice\":[0,13],\"type\":\"R1\"}]"); */ /* Not sure if this one should be the expected behavior. */
ASSERT_EVAL_GRAMMAR("R1 = \"\\n\";", "R1", "\n", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = \"\\r\";", "R1", "\r", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = \"\\t\";", "R1", "\t", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]");
Expand All @@ -327,6 +329,7 @@ void test_eval_insensitive(void) {
ASSERT_EVAL_GRAMMAR("R1 = i\"你好, World\";", "R1", "你好, WORLD", P4_Ok, "[{\"slice\":[0,13],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = i\"نامهای\";", "R1", "نامهای", P4_Ok, "[{\"slice\":[0,12],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = i\"Peppa PEG 🐷\";", "R1", "peppa peg 🐷", P4_Ok, "[{\"slice\":[0,14],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = i\"\\x48\\x65\\x6c\\x6c\\x6f, world\";", "R1", "HELLO, WORLD", P4_Ok, "[{\"slice\":[0,12],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = i\"\\u4f60\\u597d, world\";", "R1", "你好, WORLD", P4_Ok, "[{\"slice\":[0,13],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = i\"Hello Worìd\";", "R1", "HELLO WORÌD", P4_Ok, "[{\"slice\":[0,12],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = i\"\\n\";", "R1", "\n", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]");
Expand All @@ -339,6 +342,9 @@ void test_eval_range(void) {
ASSERT_EVAL_GRAMMAR("R1 = [0-9];", "R1", "0", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = [a-z];", "R1", "a", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = [A-Z];", "R1", "A", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = [\\x30-\\x39];", "R1", "0", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = [\\x41-\\x5A];", "R1", "A", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = [\\x61-\\x7A];", "R1", "a", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = [\\u0001-\\U0010ffff];", "R1", "a", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = [你-好];", "R1", "你", P4_Ok, "[{\"slice\":[0,3],\"type\":\"R1\"}]");
ASSERT_EVAL_GRAMMAR("R1 = [1-9..2];", "R1", "1", P4_Ok, "[{\"slice\":[0,1],\"type\":\"R1\"}]");
Expand Down

0 comments on commit 5a49607

Please sign in to comment.