Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Example: add toml parser #81

Merged
merged 13 commits into from Jul 29, 2021
3 changes: 3 additions & 0 deletions .gitmodules
@@ -1,3 +1,6 @@
[submodule "tests/unity"]
path = tests/unity
url = https://github.com/ThrowTheSwitch/Unity.git
[submodule "tests/toml-test"]
path = tests/toml-test
url = https://github.com/BurntSushi/toml-test.git
194 changes: 194 additions & 0 deletions examples/toml.h
@@ -0,0 +1,194 @@
/*
* Peppa PEG - Ultra lightweight PEG Parser in ANSI C.
*
* MIT License
*
* Copyright (c) 2021 Ju
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Example: Write a TOML Parser using Peppa PEG.
*
* https://toml.io/en/v1.0.0
*
*/

# ifndef P4_LANG_TOML_H
# define P4_LANG_TOML_H

#ifdef __cplusplus
extern "C"
{
#endif

#include "../peppapeg.h"

P4_Grammar* P4_CreateTomlGrammar() {
return P4_LoadGrammar(

"toml = expression (newline expression)*;\n"
"expression = ws (keyval / table)? ws comment?;\n"

/* Key-Value pairs */
"keyval = key keyval_sep val;\n"
"@lifted keyval_sep = ws \"=\" ws;\n"

/* Key */
"@lifted key = dotted_key / simple_key;\n"
"@lifted simple_key = quoted_key / unquoted_key;\n"
"@squashed unquoted_key = (ALPHA / DIGIT / \"-\" / \"_\"){1,};\n"
"@squashed quoted_key = basic_string / literal_string;\n"
"dotted_key = simple_key (dot_sep simple_key)+;\n"
"@lifted dot_sep = ws \".\" ws;\n"

/* Value */
"@lifted val = boolean / datetime / array / inline_table / string / float / integer;\n"

"@lifted string = ml_literal_string / ml_basic_string / basic_string / literal_string;\n"

/* Basic String */
"basic_string = \"\\\"\" basic_char* \"\\\"\";\n"
"@lifted basic_char = basic_unescaped / escaped;\n"
"@lifted basic_unescaped = wschar / \"!\" / [\\u0023-\\u005B] / [\\u005D-\\u007E] / non_ascii;\n"
"@scoped @squashed escaped = \"\\\\\" (\"\\\"\" / \"\\\\\" / \"b\" / \"f\" / \"n\" / \"r\" / \"t\" / \"u\" HEXDIG{4} / \"U\" HEXDIG{8});\n"

/* Multi-line Basic String */
"@squashed ml_basic_string = \"\\x22\\x22\\x22\" newline? ml_basic_body \"\\x22\\x22\\x22\";\n"
"ml_basic_body = mlb_content* (mlb_quotes mlb_content+)*;\n"
"mlb_content = mlb_char / newline / mlb_escaped_nl;\n"
"mlb_char = mlb_unescaped / escaped;\n"
"mlb_quotes = \"\\x22\";\n"
"mlb_unescaped = wschar / \"\\x21\" / [\\x23-\\x5B] / [\\x5D-\\x7E] / non_ascii;\n"
"mlb_escaped_nl = escaped ws newline *( wschar / newline );\n"

/* Literal String */
"literal_string = \"'\" literal_char* \"'\";\n"
"@lifted literal_char = \"\\t\" / [\\u0020-\\u0026] / [\\u0028-\\u007E] / non_ascii;\n"

/* Multi-line literal String */
"@squashed ml_literal_string = \"'''\" newline? ml_literal_body \"'''\";\n"
"ml_literal_body = mll_content* (mll_quotes mll_content+)*;\n"
"mll_content = mll_char / newline;\n"
"mll_char = \"\\x09\" / [\\x20-\\x26] / [\\x28-\\x7e] / non_ascii;\n"
"mll_quotes = \"''\" / \"'\";\n"

/* Boolean */
"boolean = \"true\" / \"false\";\n"

/* Integer */
"minus = \"-\";\n"
"plus = \"+\";\n"
"underscore = \"_\";\n"
"one_nine = [1-9];\n"
"zero_seven = [0-7];\n"
"zero_one = [0-1];\n"
"hex_prefix = i\"0x\";\n"
"oct_prefix = i\"0o\";\n"
"bin_prefix = i\"0b\";\n"
"dec_int = (minus / plus)? unsigned_dec_int;\n"
"unsigned_dec_int = \"0\" / one_nine (DIGIT / underscore DIGIT)*;\n"
"hex_int = hex_prefix HEXDIG (HEXDIG / underscore HEXDIG)*;\n"
"oct_int = oct_prefix zero_seven (zero_seven / underscore zero_seven)*;\n"
"bin_int = bin_prefix zero_one (zero_one / underscore zero_one)*;\n"
"@squashed integer = hex_int / oct_int / bin_int / dec_int;\n"

/* Float */
"@squashed float = special_float / dec_int (exp / frac exp?);"
"zero_prefixed_int = DIGIT (underscore DIGIT / DIGIT)*;\n"
"frac = \".\" zero_prefixed_int;\n"
"exp = i\"e\" (minus / plus)? zero_prefixed_int;\n"
"special_float = (minus / plus)? (inf / nan);\n"
"inf = \"inf\";\n"
"nan = \"nan\";\n"

/* Date and Time */
"@squashed date_fullyear = DIGIT{4};\n"
"@squashed date_month = \"0\" [1-9] / \"1\" [0-2];\n"
"@squashed date_mday = DIGIT{2};\n"
"time_delim = i\"t\" / \" \";\n"
"@squashed time_hour = DIGIT{2};\n"
"@squashed time_minute = DIGIT{2};\n"
"@squashed time_second = DIGIT{2};\n"
"@squashed time_secfrac = DIGIT+;\n"
"time_numoffset = (\"+\" / \"-\") time_hour \":\" time_minute;\n"
"time_offset = i\"z\" / time_numoffset;\n"
"partial_time = time_hour \":\" time_minute \":\" time_second (\".\" time_secfrac)?;\n"
"full_date = date_fullyear \"-\" date_month \"-\" date_mday;\n"
"full_time = partial_time time_offset;\n"
"offset_date_time = full_date time_delim full_time;\n"
"local_date_time = full_date time_delim partial_time;\n"
"local_date = full_date;\n"
"local_time = partial_time;\n"
"datetime = offset_date_time / local_date_time / local_date / local_time;\n"

/* Array */
"array = array_open array_ws array_values? array_ws array_close;\n"
"@lifted array_values = val (array_ws array_sep array_ws val)*;\n"
"@lifted array_open = \"[\";\n"
"@lifted array_close = \"]\";\n"
"@lifted array_sep = \",\";\n"
"@lifted @squashed array_ws = (wschar / comment? newline)*;\n"

/* Inline Table */
"inline_table = inline_table_open ws inline_table_values? ws inline_table_close;\n"
"@lifted inline_table_values = keyval (ws inline_table_sep ws keyval)*;\n"
"@lifted inline_table_open = \"{\";\n"
"@lifted inline_table_close = \"}\";\n"
"@lifted inline_table_sep = \",\";\n"

/* Table */
"@lifted table = array_table / std_table;\n"

/* Standard Table */
"std_table = std_table_open ws key ws std_table_close;\n"
"@lifted std_table_open = \"[\";\n"
"@lifted std_table_close = \"]\";\n"

/* Array Table */
"array_table = array_table_open ws key ws array_table_close;\n"
"@lifted array_table_open = \"[[\";\n"
"@lifted array_table_close = \"]]\";\n"

/* Comment */
"comment = comment_start comment_body;\n"

"comment_start = \"#\";\n"
"@squashed comment_body = non_eol*;\n"

"non_eol = \"\\t\" / [\\u0020-\\u007F] / non_ascii;\n"
"non_ascii = [\\u0080-\\uD7FF] / [\\uE000-\\U0010FFFF];\n"

/* Newline */
"newline = \"\\n\" / \"\\r\\n\";\n"

/* Whitespace */
"@lifted @squashed ws = wschar*;\n"
"wschar = \" \" / \"\\t\";\n"

"ALPHA = [a-z] / [A-Z];\n"
"DIGIT = [0-9];\n"
"HEXDIG = [a-f] / [A-F] / [0-9];\n"
);
}

#ifdef __cplusplus
}
#endif

# endif
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Expand Up @@ -20,6 +20,7 @@ set(
test_example_json
test_example_calc
test_example_dot
test_example_toml
)

option(ENABLE_VALGRIND "Enable vagrind memory leak report." OFF)
Expand Down
86 changes: 86 additions & 0 deletions tests/test_example_toml.c
@@ -0,0 +1,86 @@
#include <stdio.h>
#include <string.h>
#include "unity/src/unity.h"
#include "common.h"
#include "../examples/toml.h"

# define ASSERT_TOML(entry, input, code, output) do { \
P4_Grammar* grammar = P4_CreateTomlGrammar(); \
P4_Source* source = P4_CreateSource((input), (entry)); \
TEST_ASSERT_EQUAL_MESSAGE((code), P4_Parse(grammar, source), "unexpected parse grammar return code"); \
P4_Node* root = P4_GetSourceAst(source); \
FILE *f = fopen("check.json","w"); \
P4_JsonifySourceAst(grammar, f, root); \
fclose(f); \
P4_String s = read_file("check.json"); \
printf("%s\n%s\n", input, s); \
free(s); \
P4_DeleteSource(source); \
P4_DeleteGrammar(grammar); \
} while (0);

void test_valid(void) {
ASSERT_TOML(1, "abc = true # comment", P4_Ok, "[]");
ASSERT_TOML(1,
"abc = true\n"
"xyz = false", P4_Ok, "[]");
ASSERT_TOML(1,
"a.b.c = true\n"
"x.y.z = false", P4_Ok, "[]");
ASSERT_TOML(1, "abc = \"xyz\"", P4_Ok, "[]");
ASSERT_TOML(1, "abc = \"\"", P4_Ok, "[]");
ASSERT_TOML(1, "abc = \"a\\u0031\\U00000032\\n\"", P4_Ok, "[]");
ASSERT_TOML(1, "\"abc\" = \"\"", P4_Ok, "[]");
ASSERT_TOML(1, "\"a\\u0031c\" = \"\"", P4_Ok, "[]");
ASSERT_TOML(1, "a.\"b\".c = \"\"", P4_Ok, "[]");
ASSERT_TOML(1, "abc = ''", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 'a\\u0031\\U00000032\\n'", P4_Ok, "[]");
ASSERT_TOML(1, "'abc' = ''", P4_Ok, "[]");
ASSERT_TOML(1, "a.'b'.c = ''", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 2000-01-01", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 00:00:00", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 00:00:00.000000", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 2000-01-01T00:00:00", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 2000-01-01T00:00:00.0000", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 2000-01-01t00:00:00", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 2000-01-01t00:00:00.0000", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 2000-01-01 00:00:00", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 2000-01-01 00:00:00.0000", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 2000-01-01T00:00:00Z", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 2000-01-01T00:00:00.0000Z", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 2000-01-01T00:00:00+12:00", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 2000-01-01T00:00:00.0000+12:00", P4_Ok, "[]");
ASSERT_TOML(1, "abc = []", P4_Ok, "[]");
ASSERT_TOML(1, "abc = [ # comment\n ]", P4_Ok, "[]");
ASSERT_TOML(1, "abc = [true]", P4_Ok, "[]");
ASSERT_TOML(1, "abc = [true,false]", P4_Ok, "[]");
ASSERT_TOML(1, "abc = [true, false]", P4_Ok, "[]");
ASSERT_TOML(1, "abc = [ true, false ]", P4_Ok, "[]");
ASSERT_TOML(1, "abc = { abc=true, xyz=false }", P4_Ok, "[]");
ASSERT_TOML(1, "[abc]", P4_Ok, "[]");
ASSERT_TOML(1, "[ a.\"b\".c ]", P4_Ok, "[]");
ASSERT_TOML(1, "[[abc]]", P4_Ok, "[]");
ASSERT_TOML(1, "[[ a.\"b\".c ]]", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 0", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 1", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 123_456_789", P4_Ok, "[]");
ASSERT_TOML(1, "abc = +123_456_789", P4_Ok, "[]");
ASSERT_TOML(1, "abc = -123_456_789", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 0x123", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 0b0001", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 0o123", P4_Ok, "[]");
ASSERT_TOML(1, "abc = 1.0", P4_Ok, "[]");
ASSERT_TOML(1, "abc = -1.0", P4_Ok, "[]");
ASSERT_TOML(1, "abc = '''abc'''", P4_Ok, "[]");
/* ASSERT_TOML(1, "abc = '''abc'''''", P4_Ok, "[]"); */
ASSERT_TOML(1, "abc = '''a'b'c'''", P4_Ok, "[]");
ASSERT_TOML(1, "abc = \"\"\"abc\"\"\"", P4_Ok, "[]");
ASSERT_TOML(1, "abc = \"\"\"a\"b\"c\"\"\"", P4_Ok, "[]");
/* ASSERT_TOML(1, "abc = \"\"\"abc\"\"\"\"\"", P4_Ok, "[]"); */
}

int main(void) {
UNITY_BEGIN();
RUN_TEST(test_valid);
return UNITY_END();
}
1 change: 1 addition & 0 deletions tests/toml-test
Submodule toml-test added at facb9e