From e2f1010570df43e3ced12a659f3eeb9d4b1afd70 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sun, 5 Sep 2021 00:39:03 +0100 Subject: [PATCH] Extract visitors from the grammar nodes and call makers in the peg generator Simplify the peg generator logic by extracting as much visitors as possible to disentangle the flow and separate concerns. --- Lib/test/test_peg_generator/test_pegen.py | 16 +- Parser/parser.c | 334 +++++++++--------- Tools/peg_generator/pegen/__main__.py | 3 +- Tools/peg_generator/pegen/build.py | 11 +- Tools/peg_generator/pegen/c_generator.py | 55 +-- Tools/peg_generator/pegen/first_sets.py | 12 +- Tools/peg_generator/pegen/grammar.py | 141 +------- .../peg_generator/pegen/grammar_visualizer.py | 3 +- Tools/peg_generator/pegen/keywordgen.py | 6 +- Tools/peg_generator/pegen/parser_generator.py | 208 +++++++++-- Tools/peg_generator/pegen/python_generator.py | 33 +- Tools/peg_generator/pegen/testutil.py | 5 +- Tools/peg_generator/pegen/validator.py | 7 +- 13 files changed, 419 insertions(+), 415 deletions(-) diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py index 71b0fdc56465bf..99c75f09aa1f7c 100644 --- a/Lib/test/test_peg_generator/test_pegen.py +++ b/Lib/test/test_peg_generator/test_pegen.py @@ -15,6 +15,7 @@ from pegen.grammar import GrammarVisitor, GrammarError, Grammar from pegen.grammar_visualizer import ASTGrammarPrinter from pegen.parser import Parser + from pegen.parser_generator import compute_nullables, compute_left_recursives from pegen.python_generator import PythonParserGenerator @@ -502,11 +503,10 @@ def test_nullable(self) -> None: sign: ['-' | '+'] """ grammar: Grammar = parse_string(grammar_source, GrammarParser) - out = io.StringIO() - genr = PythonParserGenerator(grammar, out) rules = grammar.rules - self.assertFalse(rules["start"].nullable) # Not None! - self.assertTrue(rules["sign"].nullable) + nullables = compute_nullables(rules) + self.assertNotIn(rules["start"], nullables) # Not None! + self.assertIn(rules["sign"], nullables) def test_advanced_left_recursive(self) -> None: grammar_source = """ @@ -514,11 +514,11 @@ def test_advanced_left_recursive(self) -> None: sign: ['-'] """ grammar: Grammar = parse_string(grammar_source, GrammarParser) - out = io.StringIO() - genr = PythonParserGenerator(grammar, out) rules = grammar.rules - self.assertFalse(rules["start"].nullable) # Not None! - self.assertTrue(rules["sign"].nullable) + nullables = compute_nullables(rules) + compute_left_recursives(rules) + self.assertNotIn(rules["start"], nullables) # Not None! + self.assertIn(rules["sign"], nullables) self.assertTrue(rules["start"].left_recursive) self.assertFalse(rules["sign"].left_recursive) diff --git a/Parser/parser.c b/Parser/parser.c index 87227b7f2f7078..3cea370c5ad2d0 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -11,57 +11,57 @@ static KeywordToken *reserved_keywords[] = { (KeywordToken[]) {{NULL, -1}}, (KeywordToken[]) {{NULL, -1}}, (KeywordToken[]) { - {"if", 510}, - {"in", 520}, - {"as", 522}, - {"is", 529}, - {"or", 531}, + {"if", 624}, + {"as", 622}, + {"in", 631}, + {"or", 571}, + {"is", 579}, {NULL, -1}, }, (KeywordToken[]) { - {"del", 503}, - {"try", 511}, - {"def", 516}, - {"for", 519}, - {"not", 528}, - {"and", 532}, + {"del", 597}, + {"def", 632}, + {"for", 630}, + {"try", 609}, + {"and", 572}, + {"not", 578}, {NULL, -1}, }, (KeywordToken[]) { - {"pass", 502}, - {"from", 514}, - {"elif", 517}, - {"else", 518}, - {"with", 521}, - {"None", 525}, - {"True", 526}, + {"from", 569}, + {"pass", 504}, + {"with", 606}, + {"elif", 626}, + {"else", 627}, + {"None", 595}, + {"True", 594}, {NULL, -1}, }, (KeywordToken[]) { - {"raise", 501}, - {"yield", 504}, - {"break", 506}, - {"while", 512}, - {"class", 515}, - {"False", 527}, + {"raise", 522}, + {"yield", 570}, + {"break", 508}, + {"class", 633}, + {"while", 629}, + {"False", 596}, {NULL, -1}, }, (KeywordToken[]) { - {"return", 500}, - {"assert", 505}, - {"global", 508}, - {"import", 513}, - {"except", 523}, - {"lambda", 530}, + {"return", 519}, + {"import", 531}, + {"assert", 526}, + {"global", 523}, + {"except", 620}, + {"lambda", 583}, {NULL, -1}, }, (KeywordToken[]) { - {"finally", 524}, + {"finally", 617}, {NULL, -1}, }, (KeywordToken[]) { - {"continue", 507}, - {"nonlocal", 509}, + {"continue", 509}, + {"nonlocal", 524}, {NULL, -1}, }, }; @@ -1562,7 +1562,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'return' return_stmt")); stmt_ty return_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 500) // token='return' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 519) // token='return' && (return_stmt_var = return_stmt_rule(p)) // return_stmt ) @@ -1604,7 +1604,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'raise' raise_stmt")); stmt_ty raise_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 501) // token='raise' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 522) // token='raise' && (raise_stmt_var = raise_stmt_rule(p)) // raise_stmt ) @@ -1625,7 +1625,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'pass'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 502)) // token='pass' + (_keyword = _PyPegen_expect_token(p, 504)) // token='pass' ) { D(fprintf(stderr, "%*c+ simple_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'pass'")); @@ -1658,7 +1658,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'del' del_stmt")); stmt_ty del_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 503) // token='del' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 597) // token='del' && (del_stmt_var = del_stmt_rule(p)) // del_stmt ) @@ -1679,7 +1679,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'yield' yield_stmt")); stmt_ty yield_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 504) // token='yield' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 570) // token='yield' && (yield_stmt_var = yield_stmt_rule(p)) // yield_stmt ) @@ -1700,7 +1700,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'assert' assert_stmt")); stmt_ty assert_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 505) // token='assert' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 526) // token='assert' && (assert_stmt_var = assert_stmt_rule(p)) // assert_stmt ) @@ -1721,7 +1721,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'break'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 506)) // token='break' + (_keyword = _PyPegen_expect_token(p, 508)) // token='break' ) { D(fprintf(stderr, "%*c+ simple_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'break'")); @@ -1754,7 +1754,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'continue'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 507)) // token='continue' + (_keyword = _PyPegen_expect_token(p, 509)) // token='continue' ) { D(fprintf(stderr, "%*c+ simple_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'continue'")); @@ -1787,7 +1787,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'global' global_stmt")); stmt_ty global_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 508) // token='global' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 523) // token='global' && (global_stmt_var = global_stmt_rule(p)) // global_stmt ) @@ -1808,7 +1808,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'nonlocal' nonlocal_stmt")); stmt_ty nonlocal_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 509) // token='nonlocal' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 524) // token='nonlocal' && (nonlocal_stmt_var = nonlocal_stmt_rule(p)) // nonlocal_stmt ) @@ -1876,7 +1876,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'if' if_stmt")); stmt_ty if_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 510) // token='if' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 624) // token='if' && (if_stmt_var = if_stmt_rule(p)) // if_stmt ) @@ -1960,7 +1960,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'try' try_stmt")); stmt_ty try_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 511) // token='try' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 609) // token='try' && (try_stmt_var = try_stmt_rule(p)) // try_stmt ) @@ -1981,7 +1981,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'while' while_stmt")); stmt_ty while_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 512) // token='while' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 629) // token='while' && (while_stmt_var = while_stmt_rule(p)) // while_stmt ) @@ -2666,7 +2666,7 @@ return_stmt_rule(Parser *p) Token * _keyword; void *a; if ( - (_keyword = _PyPegen_expect_token(p, 500)) // token='return' + (_keyword = _PyPegen_expect_token(p, 519)) // token='return' && (a = star_expressions_rule(p), 1) // star_expressions? ) @@ -2729,7 +2729,7 @@ raise_stmt_rule(Parser *p) expr_ty a; void *b; if ( - (_keyword = _PyPegen_expect_token(p, 501)) // token='raise' + (_keyword = _PyPegen_expect_token(p, 522)) // token='raise' && (a = expression_rule(p)) // expression && @@ -2766,7 +2766,7 @@ raise_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> raise_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'raise'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 501)) // token='raise' + (_keyword = _PyPegen_expect_token(p, 522)) // token='raise' ) { D(fprintf(stderr, "%*c+ raise_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'raise'")); @@ -2826,7 +2826,7 @@ global_stmt_rule(Parser *p) Token * _keyword; asdl_expr_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 508)) // token='global' + (_keyword = _PyPegen_expect_token(p, 523)) // token='global' && (a = (asdl_expr_seq*)_gather_18_rule(p)) // ','.NAME+ ) @@ -2888,7 +2888,7 @@ nonlocal_stmt_rule(Parser *p) Token * _keyword; asdl_expr_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 509)) // token='nonlocal' + (_keyword = _PyPegen_expect_token(p, 524)) // token='nonlocal' && (a = (asdl_expr_seq*)_gather_20_rule(p)) // ','.NAME+ ) @@ -2950,7 +2950,7 @@ del_stmt_rule(Parser *p) Token * _keyword; asdl_expr_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 503)) // token='del' + (_keyword = _PyPegen_expect_token(p, 597)) // token='del' && (a = del_targets_rule(p)) // del_targets && @@ -3093,7 +3093,7 @@ assert_stmt_rule(Parser *p) expr_ty a; void *b; if ( - (_keyword = _PyPegen_expect_token(p, 505)) // token='assert' + (_keyword = _PyPegen_expect_token(p, 526)) // token='assert' && (a = expression_rule(p)) // expression && @@ -3212,7 +3212,7 @@ import_name_rule(Parser *p) Token * _keyword; asdl_alias_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 513)) // token='import' + (_keyword = _PyPegen_expect_token(p, 531)) // token='import' && (a = dotted_as_names_rule(p)) // dotted_as_names ) @@ -3279,13 +3279,13 @@ import_from_rule(Parser *p) expr_ty b; asdl_alias_seq* c; if ( - (_keyword = _PyPegen_expect_token(p, 514)) // token='from' + (_keyword = _PyPegen_expect_token(p, 569)) // token='from' && (a = _loop0_24_rule(p)) // (('.' | '...'))* && (b = dotted_name_rule(p)) // dotted_name && - (_keyword_1 = _PyPegen_expect_token(p, 513)) // token='import' + (_keyword_1 = _PyPegen_expect_token(p, 531)) // token='import' && (c = import_from_targets_rule(p)) // import_from_targets ) @@ -3323,11 +3323,11 @@ import_from_rule(Parser *p) asdl_seq * a; asdl_alias_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 514)) // token='from' + (_keyword = _PyPegen_expect_token(p, 569)) // token='from' && (a = _loop1_25_rule(p)) // (('.' | '...'))+ && - (_keyword_1 = _PyPegen_expect_token(p, 513)) // token='import' + (_keyword_1 = _PyPegen_expect_token(p, 531)) // token='import' && (b = import_from_targets_rule(p)) // import_from_targets ) @@ -4051,7 +4051,7 @@ class_def_raw_rule(Parser *p) void *b; asdl_stmt_seq* c; if ( - (_keyword = _PyPegen_expect_token(p, 515)) // token='class' + (_keyword = _PyPegen_expect_token(p, 633)) // token='class' && (a = _PyPegen_name_token(p)) // NAME && @@ -4211,7 +4211,7 @@ function_def_raw_rule(Parser *p) void *params; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 516)) // token='def' + (_keyword = _PyPegen_expect_token(p, 632)) // token='def' && (n = _PyPegen_name_token(p)) // NAME && @@ -4271,7 +4271,7 @@ function_def_raw_rule(Parser *p) if ( (async_var = _PyPegen_expect_token(p, ASYNC)) // token='ASYNC' && - (_keyword = _PyPegen_expect_token(p, 516)) // token='def' + (_keyword = _PyPegen_expect_token(p, 632)) // token='def' && (n = _PyPegen_name_token(p)) // NAME && @@ -5319,7 +5319,7 @@ if_stmt_rule(Parser *p) asdl_stmt_seq* b; stmt_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (a = named_expression_rule(p)) // named_expression && @@ -5364,7 +5364,7 @@ if_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (a = named_expression_rule(p)) // named_expression && @@ -5457,7 +5457,7 @@ elif_stmt_rule(Parser *p) asdl_stmt_seq* b; stmt_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 517)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 626)) // token='elif' && (a = named_expression_rule(p)) // named_expression && @@ -5502,7 +5502,7 @@ elif_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 517)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 626)) // token='elif' && (a = named_expression_rule(p)) // named_expression && @@ -5581,7 +5581,7 @@ else_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 518)) // token='else' + (_keyword = _PyPegen_expect_token(p, 627)) // token='else' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -5658,7 +5658,7 @@ while_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 512)) // token='while' + (_keyword = _PyPegen_expect_token(p, 629)) // token='while' && (a = named_expression_rule(p)) // named_expression && @@ -5756,11 +5756,11 @@ for_stmt_rule(Parser *p) expr_ty t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 519)) // token='for' + (_keyword = _PyPegen_expect_token(p, 630)) // token='for' && (t = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 631)) // token='in' && (_cut_var = 1) && @@ -5820,11 +5820,11 @@ for_stmt_rule(Parser *p) if ( (async_var = _PyPegen_expect_token(p, ASYNC)) // token='ASYNC' && - (_keyword = _PyPegen_expect_token(p, 519)) // token='for' + (_keyword = _PyPegen_expect_token(p, 630)) // token='for' && (t = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 631)) // token='in' && (_cut_var = 1) && @@ -5950,7 +5950,7 @@ with_stmt_rule(Parser *p) asdl_withitem_seq* a; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -5999,7 +5999,7 @@ with_stmt_rule(Parser *p) asdl_stmt_seq* b; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' && (a = (asdl_withitem_seq*)_gather_52_rule(p)) // ','.with_item+ && @@ -6050,7 +6050,7 @@ with_stmt_rule(Parser *p) if ( (async_var = _PyPegen_expect_token(p, ASYNC)) // token='ASYNC' && - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -6102,7 +6102,7 @@ with_stmt_rule(Parser *p) if ( (async_var = _PyPegen_expect_token(p, ASYNC)) // token='ASYNC' && - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' && (a = (asdl_withitem_seq*)_gather_56_rule(p)) // ','.with_item+ && @@ -6186,7 +6186,7 @@ with_item_rule(Parser *p) if ( (e = expression_rule(p)) // expression && - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (t = star_target_rule(p)) // star_target && @@ -6308,7 +6308,7 @@ try_stmt_rule(Parser *p) asdl_stmt_seq* b; asdl_stmt_seq* f; if ( - (_keyword = _PyPegen_expect_token(p, 511)) // token='try' + (_keyword = _PyPegen_expect_token(p, 609)) // token='try' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -6352,7 +6352,7 @@ try_stmt_rule(Parser *p) asdl_excepthandler_seq* ex; void *f; if ( - (_keyword = _PyPegen_expect_token(p, 511)) // token='try' + (_keyword = _PyPegen_expect_token(p, 609)) // token='try' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -6448,7 +6448,7 @@ except_block_rule(Parser *p) expr_ty e; void *t; if ( - (_keyword = _PyPegen_expect_token(p, 523)) // token='except' + (_keyword = _PyPegen_expect_token(p, 620)) // token='except' && (e = expression_rule(p)) // expression && @@ -6491,7 +6491,7 @@ except_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 523)) // token='except' + (_keyword = _PyPegen_expect_token(p, 620)) // token='except' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -6585,7 +6585,7 @@ finally_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 524)) // token='finally' + (_keyword = _PyPegen_expect_token(p, 617)) // token='finally' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -6885,7 +6885,7 @@ guard_rule(Parser *p) Token * _keyword; expr_ty guard; if ( - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (guard = named_expression_rule(p)) // named_expression ) @@ -7074,7 +7074,7 @@ as_pattern_rule(Parser *p) if ( (pattern = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (target = pattern_capture_target_rule(p)) // pattern_capture_target ) @@ -7497,7 +7497,7 @@ literal_pattern_rule(Parser *p) D(fprintf(stderr, "%*c> literal_pattern[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'None'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 525)) // token='None' + (_keyword = _PyPegen_expect_token(p, 595)) // token='None' ) { D(fprintf(stderr, "%*c+ literal_pattern[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'None'")); @@ -7530,7 +7530,7 @@ literal_pattern_rule(Parser *p) D(fprintf(stderr, "%*c> literal_pattern[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'True'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 526)) // token='True' + (_keyword = _PyPegen_expect_token(p, 594)) // token='True' ) { D(fprintf(stderr, "%*c+ literal_pattern[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'True'")); @@ -7563,7 +7563,7 @@ literal_pattern_rule(Parser *p) D(fprintf(stderr, "%*c> literal_pattern[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'False'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 527)) // token='False' + (_keyword = _PyPegen_expect_token(p, 596)) // token='False' ) { D(fprintf(stderr, "%*c+ literal_pattern[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'False'")); @@ -7687,7 +7687,7 @@ literal_expr_rule(Parser *p) D(fprintf(stderr, "%*c> literal_expr[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'None'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 525)) // token='None' + (_keyword = _PyPegen_expect_token(p, 595)) // token='None' ) { D(fprintf(stderr, "%*c+ literal_expr[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'None'")); @@ -7720,7 +7720,7 @@ literal_expr_rule(Parser *p) D(fprintf(stderr, "%*c> literal_expr[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'True'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 526)) // token='True' + (_keyword = _PyPegen_expect_token(p, 594)) // token='True' ) { D(fprintf(stderr, "%*c+ literal_expr[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'True'")); @@ -7753,7 +7753,7 @@ literal_expr_rule(Parser *p) D(fprintf(stderr, "%*c> literal_expr[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'False'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 527)) // token='False' + (_keyword = _PyPegen_expect_token(p, 596)) // token='False' ) { D(fprintf(stderr, "%*c+ literal_expr[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'False'")); @@ -9776,11 +9776,11 @@ expression_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (b = disjunction_rule(p)) // disjunction && - (_keyword_1 = _PyPegen_expect_token(p, 518)) // token='else' + (_keyword_1 = _PyPegen_expect_token(p, 627)) // token='else' && (c = expression_rule(p)) // expression ) @@ -9882,9 +9882,9 @@ yield_expr_rule(Parser *p) Token * _keyword_1; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 504)) // token='yield' + (_keyword = _PyPegen_expect_token(p, 570)) // token='yield' && - (_keyword_1 = _PyPegen_expect_token(p, 514)) // token='from' + (_keyword_1 = _PyPegen_expect_token(p, 569)) // token='from' && (a = expression_rule(p)) // expression ) @@ -9920,7 +9920,7 @@ yield_expr_rule(Parser *p) Token * _keyword; void *a; if ( - (_keyword = _PyPegen_expect_token(p, 504)) // token='yield' + (_keyword = _PyPegen_expect_token(p, 570)) // token='yield' && (a = star_expressions_rule(p), 1) // star_expressions? ) @@ -10642,7 +10642,7 @@ inversion_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 528)) // token='not' + (_keyword = _PyPegen_expect_token(p, 578)) // token='not' && (a = inversion_rule(p)) // inversion ) @@ -11278,9 +11278,9 @@ notin_bitwise_or_rule(Parser *p) Token * _keyword_1; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 528)) // token='not' + (_keyword = _PyPegen_expect_token(p, 578)) // token='not' && - (_keyword_1 = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 631)) // token='in' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -11324,7 +11324,7 @@ in_bitwise_or_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword = _PyPegen_expect_token(p, 631)) // token='in' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -11369,9 +11369,9 @@ isnot_bitwise_or_rule(Parser *p) Token * _keyword_1; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 529)) // token='is' + (_keyword = _PyPegen_expect_token(p, 579)) // token='is' && - (_keyword_1 = _PyPegen_expect_token(p, 528)) // token='not' + (_keyword_1 = _PyPegen_expect_token(p, 578)) // token='not' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -11415,7 +11415,7 @@ is_bitwise_or_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 529)) // token='is' + (_keyword = _PyPegen_expect_token(p, 579)) // token='is' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -13196,7 +13196,7 @@ atom_rule(Parser *p) D(fprintf(stderr, "%*c> atom[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'True'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 526)) // token='True' + (_keyword = _PyPegen_expect_token(p, 594)) // token='True' ) { D(fprintf(stderr, "%*c+ atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'True'")); @@ -13229,7 +13229,7 @@ atom_rule(Parser *p) D(fprintf(stderr, "%*c> atom[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'False'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 527)) // token='False' + (_keyword = _PyPegen_expect_token(p, 596)) // token='False' ) { D(fprintf(stderr, "%*c+ atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'False'")); @@ -13262,7 +13262,7 @@ atom_rule(Parser *p) D(fprintf(stderr, "%*c> atom[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'None'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 525)) // token='None' + (_keyword = _PyPegen_expect_token(p, 595)) // token='None' ) { D(fprintf(stderr, "%*c+ atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'None'")); @@ -13526,7 +13526,7 @@ lambdef_rule(Parser *p) void *a; expr_ty b; if ( - (_keyword = _PyPegen_expect_token(p, 530)) // token='lambda' + (_keyword = _PyPegen_expect_token(p, 583)) // token='lambda' && (a = lambda_params_rule(p), 1) // lambda_params? && @@ -14958,11 +14958,11 @@ for_if_clause_rule(Parser *p) if ( (async_var = _PyPegen_expect_token(p, ASYNC)) // token='ASYNC' && - (_keyword = _PyPegen_expect_token(p, 519)) // token='for' + (_keyword = _PyPegen_expect_token(p, 630)) // token='for' && (a = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 631)) // token='in' && (_cut_var = 1) && @@ -15001,11 +15001,11 @@ for_if_clause_rule(Parser *p) expr_ty b; asdl_expr_seq* c; if ( - (_keyword = _PyPegen_expect_token(p, 519)) // token='for' + (_keyword = _PyPegen_expect_token(p, 630)) // token='for' && (a = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 631)) // token='in' && (_cut_var = 1) && @@ -18085,11 +18085,11 @@ expression_without_invalid_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (b = disjunction_rule(p)) // disjunction && - (_keyword_1 = _PyPegen_expect_token(p, 518)) // token='else' + (_keyword_1 = _PyPegen_expect_token(p, 627)) // token='else' && (c = expression_rule(p)) // expression ) @@ -18280,7 +18280,7 @@ invalid_expression_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (b = disjunction_rule(p)) // disjunction && @@ -18736,7 +18736,7 @@ invalid_del_stmt_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 503)) // token='del' + (_keyword = _PyPegen_expect_token(p, 597)) // token='del' && (a = star_expressions_rule(p)) // star_expressions ) @@ -19386,7 +19386,7 @@ invalid_with_item_rule(Parser *p) if ( (expression_var = expression_rule(p)) // expression && - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (a = expression_rule(p)) // expression && @@ -19436,7 +19436,7 @@ invalid_for_target_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (_keyword = _PyPegen_expect_token(p, 519)) // token='for' + (_keyword = _PyPegen_expect_token(p, 630)) // token='for' && (a = star_expressions_rule(p)) // star_expressions ) @@ -19614,7 +19614,7 @@ invalid_with_stmt_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' && (_gather_162_var = _gather_162_rule(p)) // ','.(expression ['as' star_target])+ && @@ -19647,7 +19647,7 @@ invalid_with_stmt_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -19702,7 +19702,7 @@ invalid_with_stmt_indent_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (a = _PyPegen_expect_token(p, 521)) // token='with' + (a = _PyPegen_expect_token(p, 606)) // token='with' && (_gather_166_var = _gather_166_rule(p)) // ','.(expression ['as' star_target])+ && @@ -19745,7 +19745,7 @@ invalid_with_stmt_indent_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (a = _PyPegen_expect_token(p, 521)) // token='with' + (a = _PyPegen_expect_token(p, 606)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -19802,7 +19802,7 @@ invalid_try_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 511)) // token='try' + (a = _PyPegen_expect_token(p, 609)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -19834,7 +19834,7 @@ invalid_try_stmt_rule(Parser *p) Token * _literal; asdl_stmt_seq* block_var; if ( - (_keyword = _PyPegen_expect_token(p, 511)) // token='try' + (_keyword = _PyPegen_expect_token(p, 609)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -19890,7 +19890,7 @@ invalid_except_stmt_rule(Parser *p) expr_ty a; expr_ty expressions_var; if ( - (_keyword = _PyPegen_expect_token(p, 523)) // token='except' + (_keyword = _PyPegen_expect_token(p, 620)) // token='except' && (a = expression_rule(p)) // expression && @@ -19928,7 +19928,7 @@ invalid_except_stmt_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 523)) // token='except' + (a = _PyPegen_expect_token(p, 620)) // token='except' && (expression_var = expression_rule(p)) // expression && @@ -19959,7 +19959,7 @@ invalid_except_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 523)) // token='except' + (a = _PyPegen_expect_token(p, 620)) // token='except' && (newline_var = _PyPegen_expect_token(p, NEWLINE)) // token='NEWLINE' ) @@ -20004,7 +20004,7 @@ invalid_finally_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 524)) // token='finally' + (a = _PyPegen_expect_token(p, 617)) // token='finally' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -20058,7 +20058,7 @@ invalid_except_stmt_indent_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 523)) // token='except' + (a = _PyPegen_expect_token(p, 620)) // token='except' && (expression_var = expression_rule(p)) // expression && @@ -20094,7 +20094,7 @@ invalid_except_stmt_indent_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 523)) // token='except' + (a = _PyPegen_expect_token(p, 620)) // token='except' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -20319,7 +20319,7 @@ invalid_as_pattern_rule(Parser *p) if ( (or_pattern_var = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (a = _PyPegen_expect_soft_keyword(p, "_")) // soft_keyword='"_"' ) @@ -20349,7 +20349,7 @@ invalid_as_pattern_rule(Parser *p) if ( (or_pattern_var = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && _PyPegen_lookahead_with_name(0, _PyPegen_name_token, p) && @@ -20497,7 +20497,7 @@ invalid_if_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -20528,7 +20528,7 @@ invalid_if_stmt_rule(Parser *p) expr_ty a_1; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 510)) // token='if' + (a = _PyPegen_expect_token(p, 624)) // token='if' && (a_1 = named_expression_rule(p)) // named_expression && @@ -20581,7 +20581,7 @@ invalid_elif_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 517)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 626)) // token='elif' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -20612,7 +20612,7 @@ invalid_elif_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 517)) // token='elif' + (a = _PyPegen_expect_token(p, 626)) // token='elif' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -20663,7 +20663,7 @@ invalid_else_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 518)) // token='else' + (a = _PyPegen_expect_token(p, 627)) // token='else' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -20714,7 +20714,7 @@ invalid_while_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 512)) // token='while' + (_keyword = _PyPegen_expect_token(p, 629)) // token='while' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -20745,7 +20745,7 @@ invalid_while_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 512)) // token='while' + (a = _PyPegen_expect_token(p, 629)) // token='while' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -20803,11 +20803,11 @@ invalid_for_stmt_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (a = _PyPegen_expect_token(p, 519)) // token='for' + (a = _PyPegen_expect_token(p, 630)) // token='for' && (star_targets_var = star_targets_rule(p)) // star_targets && - (_keyword = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword = _PyPegen_expect_token(p, 631)) // token='in' && (star_expressions_var = star_expressions_rule(p)) // star_expressions && @@ -20870,7 +20870,7 @@ invalid_def_raw_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (a = _PyPegen_expect_token(p, 516)) // token='def' + (a = _PyPegen_expect_token(p, 632)) // token='def' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -20932,7 +20932,7 @@ invalid_class_def_raw_rule(Parser *p) expr_ty name_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 515)) // token='class' + (a = _PyPegen_expect_token(p, 633)) // token='class' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -21510,7 +21510,7 @@ _tmp_6_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_6[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'import'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 513)) // token='import' + (_keyword = _PyPegen_expect_token(p, 531)) // token='import' ) { D(fprintf(stderr, "%*c+ _tmp_6[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'import'")); @@ -21529,7 +21529,7 @@ _tmp_6_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_6[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'from'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 514)) // token='from' + (_keyword = _PyPegen_expect_token(p, 569)) // token='from' ) { D(fprintf(stderr, "%*c+ _tmp_6[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'from'")); @@ -21565,7 +21565,7 @@ _tmp_7_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_7[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'def'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 516)) // token='def' + (_keyword = _PyPegen_expect_token(p, 632)) // token='def' ) { D(fprintf(stderr, "%*c+ _tmp_7[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'def'")); @@ -21639,7 +21639,7 @@ _tmp_8_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_8[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'class'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 515)) // token='class' + (_keyword = _PyPegen_expect_token(p, 633)) // token='class' ) { D(fprintf(stderr, "%*c+ _tmp_8[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'class'")); @@ -21694,7 +21694,7 @@ _tmp_9_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_9[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'with'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' ) { D(fprintf(stderr, "%*c+ _tmp_9[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'with'")); @@ -21749,7 +21749,7 @@ _tmp_10_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_10[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'for'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 519)) // token='for' + (_keyword = _PyPegen_expect_token(p, 630)) // token='for' ) { D(fprintf(stderr, "%*c+ _tmp_10[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'for'")); @@ -22140,7 +22140,7 @@ _tmp_17_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 514)) // token='from' + (_keyword = _PyPegen_expect_token(p, 569)) // token='from' && (z = expression_rule(p)) // expression ) @@ -22762,7 +22762,7 @@ _tmp_28_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -22920,7 +22920,7 @@ _tmp_31_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -24730,7 +24730,7 @@ _tmp_60_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -29791,7 +29791,7 @@ _tmp_144_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_144[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'else'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 518)) // token='else' + (_keyword = _PyPegen_expect_token(p, 627)) // token='else' ) { D(fprintf(stderr, "%*c+ _tmp_144[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'else'")); @@ -29958,7 +29958,7 @@ _tmp_146_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_146[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'True'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 526)) // token='True' + (_keyword = _PyPegen_expect_token(p, 594)) // token='True' ) { D(fprintf(stderr, "%*c+ _tmp_146[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'True'")); @@ -29977,7 +29977,7 @@ _tmp_146_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_146[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'None'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 525)) // token='None' + (_keyword = _PyPegen_expect_token(p, 595)) // token='None' ) { D(fprintf(stderr, "%*c+ _tmp_146[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'None'")); @@ -29996,7 +29996,7 @@ _tmp_146_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_146[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'False'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 527)) // token='False' + (_keyword = _PyPegen_expect_token(p, 596)) // token='False' ) { D(fprintf(stderr, "%*c+ _tmp_146[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'False'")); @@ -31444,7 +31444,7 @@ _tmp_170_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_170[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 523)) // token='except' + (_keyword = _PyPegen_expect_token(p, 620)) // token='except' ) { D(fprintf(stderr, "%*c+ _tmp_170[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except'")); @@ -31463,7 +31463,7 @@ _tmp_170_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_170[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'finally'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 524)) // token='finally' + (_keyword = _PyPegen_expect_token(p, 617)) // token='finally' ) { D(fprintf(stderr, "%*c+ _tmp_170[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'finally'")); @@ -31500,7 +31500,7 @@ _tmp_171_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -31539,7 +31539,7 @@ _tmp_172_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -31578,7 +31578,7 @@ _tmp_173_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -32196,7 +32196,7 @@ _tmp_186_rule(Parser *p) Token * _keyword; expr_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 531)) // token='or' + (_keyword = _PyPegen_expect_token(p, 571)) // token='or' && (c = conjunction_rule(p)) // conjunction ) @@ -32240,7 +32240,7 @@ _tmp_187_rule(Parser *p) Token * _keyword; expr_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 532)) // token='and' + (_keyword = _PyPegen_expect_token(p, 572)) // token='and' && (c = inversion_rule(p)) // inversion ) @@ -32284,7 +32284,7 @@ _tmp_188_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (z = disjunction_rule(p)) // disjunction ) @@ -32328,7 +32328,7 @@ _tmp_189_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (z = disjunction_rule(p)) // disjunction ) @@ -32922,7 +32922,7 @@ _tmp_202_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) @@ -32961,7 +32961,7 @@ _tmp_203_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) @@ -33000,7 +33000,7 @@ _tmp_204_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) @@ -33039,7 +33039,7 @@ _tmp_205_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py index a12fe787f427db..2910d6ccf1c694 100755 --- a/Tools/peg_generator/pegen/__main__.py +++ b/Tools/peg_generator/pegen/__main__.py @@ -10,10 +10,9 @@ import time import token import traceback - from typing import Tuple -from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator +from pegen.build import Grammar, Parser, ParserGenerator, Tokenizer from pegen.validator import validate_grammar diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py index 6f0a091ff47bd0..bf01078ff0b4a0 100644 --- a/Tools/peg_generator/pegen/build.py +++ b/Tools/peg_generator/pegen/build.py @@ -1,11 +1,10 @@ +import itertools import pathlib import shutil -import tokenize import sysconfig import tempfile -import itertools - -from typing import Optional, Tuple, List, IO, Set, Dict +import tokenize +from typing import IO, Dict, List, Optional, Set, Tuple from pegen.c_generator import CParserGenerator from pegen.grammar import Grammar @@ -45,9 +44,9 @@ def compile_c_extension( of distutils (this is useful in case you want to use a temporary directory). """ import distutils.log - from distutils.core import Distribution, Extension - from distutils.command.clean import clean # type: ignore from distutils.command.build_ext import build_ext # type: ignore + from distutils.command.clean import clean # type: ignore + from distutils.core import Distribution, Extension from distutils.tests.support import fixup_build_ext # type: ignore if verbose: diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index e928fd3de1704f..d15e91098dfe9c 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -1,8 +1,8 @@ import ast -from dataclasses import field, dataclass import re -from typing import Any, Dict, IO, Optional, List, Text, Tuple, Set +from dataclasses import dataclass, field from enum import Enum +from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple from pegen import grammar from pegen.grammar import ( @@ -27,7 +27,6 @@ ) from pegen.parser_generator import ParserGenerator - EXTENSION_PREFIX = """\ #include "pegen.h" @@ -120,23 +119,18 @@ def __init__( self.exact_tokens = exact_tokens self.non_exact_tokens = non_exact_tokens self.cache: Dict[Any, FunctionCall] = {} - self.keyword_cache: Dict[str, int] = {} - self.soft_keywords: Set[str] = set() def keyword_helper(self, keyword: str) -> FunctionCall: - if keyword not in self.keyword_cache: - self.keyword_cache[keyword] = self.gen.keyword_type() return FunctionCall( assigned_variable="_keyword", function="_PyPegen_expect_token", - arguments=["p", self.keyword_cache[keyword]], + arguments=["p", self.gen.keywords[keyword]], return_type="Token *", nodetype=NodeTypes.KEYWORD, comment=f"token='{keyword}'", ) def soft_keyword_helper(self, value: str) -> FunctionCall: - self.soft_keywords.add(value.replace('"', "")) return FunctionCall( assigned_variable="_keyword", function="_PyPegen_expect_soft_keyword", @@ -200,20 +194,12 @@ def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall: ) def visit_Rhs(self, node: Rhs) -> FunctionCall: - def can_we_inline(node: Rhs) -> int: - if len(node.alts) != 1 or len(node.alts[0].items) != 1: - return False - # If the alternative has an action we cannot inline - if getattr(node.alts[0], "action", None) is not None: - return False - return True - if node in self.cache: return self.cache[node] - if can_we_inline(node): + if node.can_be_inlined: self.cache[node] = self.generate_call(node.alts[0].items[0]) else: - name = self.gen.name_node(node) + name = self.gen.artifical_rule_from_rhs(node) self.cache[node] = FunctionCall( assigned_variable=f"{name}_var", function=f"{name}_rule", @@ -306,7 +292,7 @@ def visit_Opt(self, node: Opt) -> FunctionCall: def visit_Repeat0(self, node: Repeat0) -> FunctionCall: if node in self.cache: return self.cache[node] - name = self.gen.name_loop(node.node, False) + name = self.gen.artificial_rule_from_repeat(node.node, False) self.cache[node] = FunctionCall( assigned_variable=f"{name}_var", function=f"{name}_rule", @@ -319,7 +305,7 @@ def visit_Repeat0(self, node: Repeat0) -> FunctionCall: def visit_Repeat1(self, node: Repeat1) -> FunctionCall: if node in self.cache: return self.cache[node] - name = self.gen.name_loop(node.node, True) + name = self.gen.artificial_rule_from_repeat(node.node, True) self.cache[node] = FunctionCall( assigned_variable=f"{name}_var", function=f"{name}_rule", @@ -332,7 +318,7 @@ def visit_Repeat1(self, node: Repeat1) -> FunctionCall: def visit_Gather(self, node: Gather) -> FunctionCall: if node in self.cache: return self.cache[node] - name = self.gen.name_gather(node) + name = self.gen.artifical_rule_from_gather(node) self.cache[node] = FunctionCall( assigned_variable=f"{name}_var", function=f"{name}_rule", @@ -429,7 +415,7 @@ def out_of_memory_goto(self, expr: str, goto_target: str) -> None: self.print(f"}}") def generate(self, filename: str) -> None: - self.collect_todo() + self.collect_rules() self.print(f"// @generated by pegen from {filename}") header = self.grammar.metas.get("header", EXTENSION_PREFIX) if header: @@ -439,11 +425,11 @@ def generate(self, filename: str) -> None: self.print(subheader) self._setup_keywords() self._setup_soft_keywords() - for i, (rulename, rule) in enumerate(self.todo.items(), 1000): + for i, (rulename, rule) in enumerate(self.all_rules.items(), 1000): comment = " // Left-recursive" if rule.left_recursive else "" self.print(f"#define {rulename}_type {i}{comment}") self.print() - for rulename, rule in self.todo.items(): + for rulename, rule in self.all_rules.items(): if rule.is_loop() or rule.is_gather(): type = "asdl_seq *" elif rule.type: @@ -452,13 +438,11 @@ def generate(self, filename: str) -> None: type = "void *" self.print(f"static {type}{rulename}_rule(Parser *p);") self.print() - while self.todo: - for rulename, rule in list(self.todo.items()): - del self.todo[rulename] - self.print() - if rule.left_recursive: - self.print("// Left-recursive") - self.visit(rule) + for rulename, rule in list(self.all_rules.items()): + self.print() + if rule.left_recursive: + self.print("// Left-recursive") + self.visit(rule) if self.skip_actions: mode = 0 else: @@ -472,7 +456,7 @@ def generate(self, filename: str) -> None: def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]: groups: Dict[int, List[Tuple[str, int]]] = {} - for keyword_str, keyword_type in self.callmakervisitor.keyword_cache.items(): + for keyword_str, keyword_type in self.keywords.items(): length = len(keyword_str) if length in groups: groups[length].append((keyword_str, keyword_type)) @@ -481,9 +465,8 @@ def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]: return groups def _setup_keywords(self) -> None: - keyword_cache = self.callmakervisitor.keyword_cache n_keyword_lists = ( - len(max(keyword_cache.keys(), key=len)) + 1 if len(keyword_cache) > 0 else 0 + len(max(self.keywords.keys(), key=len)) + 1 if len(self.keywords) > 0 else 0 ) self.print(f"static const int n_keyword_lists = {n_keyword_lists};") groups = self._group_keywords_by_length() @@ -503,7 +486,7 @@ def _setup_keywords(self) -> None: self.print("};") def _setup_soft_keywords(self) -> None: - soft_keywords = sorted(self.callmakervisitor.soft_keywords) + soft_keywords = sorted(self.soft_keywords) self.print("static char *soft_keywords[] = {") with self.indent(): for keyword in soft_keywords: diff --git a/Tools/peg_generator/pegen/first_sets.py b/Tools/peg_generator/pegen/first_sets.py index 50ced22c2a5cf2..611ef514d09bda 100755 --- a/Tools/peg_generator/pegen/first_sets.py +++ b/Tools/peg_generator/pegen/first_sets.py @@ -3,30 +3,27 @@ import argparse import pprint import sys -from typing import Set, Dict +from typing import Dict, Set from pegen.build import build_parser from pegen.grammar import ( Alt, Cut, Gather, - Grammar, GrammarVisitor, Group, - Leaf, Lookahead, NamedItem, NameLeaf, NegativeLookahead, Opt, - Repeat, Repeat0, Repeat1, Rhs, Rule, StringLeaf, - PositiveLookahead, ) +from pegen.parser_generator import compute_nullables argparser = argparse.ArgumentParser( prog="calculate_first_sets", @@ -38,8 +35,7 @@ class FirstSetCalculator(GrammarVisitor): def __init__(self, rules: Dict[str, Rule]) -> None: self.rules = rules - for rule in rules.values(): - rule.nullable_visit(rules) + self.nullables = compute_nullables(rules) self.first_sets: Dict[str, Set[str]] = dict() self.in_process: Set[str] = set() @@ -129,7 +125,7 @@ def visit_Rule(self, item: Rule) -> Set[str]: elif item.name not in self.first_sets: self.in_process.add(item.name) terminals = self.visit(item.rhs) - if item.nullable: + if item in self.nullables: terminals.add("") self.first_sets[item.name] = terminals self.in_process.remove(item.name) diff --git a/Tools/peg_generator/pegen/grammar.py b/Tools/peg_generator/pegen/grammar.py index 66fd5b329a5136..fa47b98201c0fd 100644 --- a/Tools/peg_generator/pegen/grammar.py +++ b/Tools/peg_generator/pegen/grammar.py @@ -2,6 +2,7 @@ from abc import abstractmethod from typing import ( + TYPE_CHECKING, AbstractSet, Any, Dict, @@ -11,11 +12,9 @@ Optional, Set, Tuple, - TYPE_CHECKING, Union, ) - if TYPE_CHECKING: from pegen.parser_generator import ParserGenerator @@ -31,7 +30,7 @@ def visit(self, node: Any, *args: Any, **kwargs: Any) -> Any: visitor = getattr(self, method, self.generic_visit) return visitor(node, *args, **kwargs) - def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> None: + def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Any: """Called if no explicit visitor function exists for a node.""" for value in node: if isinstance(value, list): @@ -73,8 +72,6 @@ def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[obje self.type = type self.rhs = rhs self.memo = bool(memo) - self.visited = False - self.nullable = False self.left_recursive = False self.leader = False @@ -101,17 +98,6 @@ def __repr__(self) -> str: def __iter__(self) -> Iterator[Rhs]: yield self.rhs - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - if self.visited: - # A left-recursive rule is considered non-nullable. - return False - self.visited = True - self.nullable = self.rhs.nullable_visit(rules) - return self.nullable - - def initial_names(self) -> AbstractSet[str]: - return self.rhs.initial_names() - def flatten(self) -> Rhs: # If it's a single parenthesized group, flatten it. rhs = self.rhs @@ -124,10 +110,6 @@ def flatten(self) -> Rhs: rhs = rhs.alts[0].items[0].item.rhs return rhs - def collect_todo(self, gen: ParserGenerator) -> None: - rhs = self.flatten() - rhs.collect_todo(gen) - class Leaf: def __init__(self, value: str): @@ -140,14 +122,6 @@ def __iter__(self) -> Iterable[str]: if False: yield - @abstractmethod - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - raise NotImplementedError - - @abstractmethod - def initial_names(self) -> AbstractSet[str]: - raise NotImplementedError - class NameLeaf(Leaf): """The value is the name.""" @@ -160,15 +134,6 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"NameLeaf({self.value!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - if self.value in rules: - return rules[self.value].nullable_visit(rules) - # Token or unknown; never empty. - return False - - def initial_names(self) -> AbstractSet[str]: - return {self.value} - class StringLeaf(Leaf): """The value is a string literal, including quotes.""" @@ -176,13 +141,6 @@ class StringLeaf(Leaf): def __repr__(self) -> str: return f"StringLeaf({self.value!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - # The string token '' is considered empty. - return not self.value - - def initial_names(self) -> AbstractSet[str]: - return set() - class Rhs: def __init__(self, alts: List[Alt]): @@ -198,21 +156,14 @@ def __repr__(self) -> str: def __iter__(self) -> Iterator[List[Alt]]: yield self.alts - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - for alt in self.alts: - if alt.nullable_visit(rules): - return True - return False - - def initial_names(self) -> AbstractSet[str]: - names: Set[str] = set() - for alt in self.alts: - names |= alt.initial_names() - return names - - def collect_todo(self, gen: ParserGenerator) -> None: - for alt in self.alts: - alt.collect_todo(gen) + @property + def can_be_inlined(self) -> bool: + if len(self.alts) != 1 or len(self.alts[0].items) != 1: + return False + # If the alternative has an action we cannot inline + if getattr(self.alts[0], "action", None) is not None: + return False + return True class Alt: @@ -239,31 +190,12 @@ def __repr__(self) -> str: def __iter__(self) -> Iterator[List[NamedItem]]: yield self.items - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - for item in self.items: - if not item.nullable_visit(rules): - return False - return True - - def initial_names(self) -> AbstractSet[str]: - names: Set[str] = set() - for item in self.items: - names |= item.initial_names() - if not item.nullable: - break - return names - - def collect_todo(self, gen: ParserGenerator) -> None: - for item in self.items: - item.collect_todo(gen) - class NamedItem: def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None): self.name = name self.item = item self.type = type - self.nullable = False def __str__(self) -> str: if not SIMPLE_STR and self.name: @@ -277,16 +209,6 @@ def __repr__(self) -> str: def __iter__(self) -> Iterator[Item]: yield self.item - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - self.nullable = self.item.nullable_visit(rules) - return self.nullable - - def initial_names(self) -> AbstractSet[str]: - return self.item.initial_names() - - def collect_todo(self, gen: ParserGenerator) -> None: - gen.callmakervisitor.visit(self.item) - class Forced: def __init__(self, node: Plain): @@ -298,12 +220,6 @@ def __str__(self) -> str: def __iter__(self) -> Iterator[Plain]: yield self.node - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - - def initial_names(self) -> AbstractSet[str]: - return set() - class Lookahead: def __init__(self, node: Plain, sign: str): @@ -316,12 +232,6 @@ def __str__(self) -> str: def __iter__(self) -> Iterator[Plain]: yield self.node - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - - def initial_names(self) -> AbstractSet[str]: - return set() - class PositiveLookahead(Lookahead): def __init__(self, node: Plain): @@ -357,12 +267,6 @@ def __repr__(self) -> str: def __iter__(self) -> Iterator[Item]: yield self.node - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - - def initial_names(self) -> AbstractSet[str]: - return self.node.initial_names() - class Repeat: """Shared base class for x* and x+.""" @@ -371,16 +275,9 @@ def __init__(self, node: Plain): self.node = node self.memo: Optional[Tuple[Optional[str], str]] = None - @abstractmethod - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - raise NotImplementedError - def __iter__(self) -> Iterator[Plain]: yield self.node - def initial_names(self) -> AbstractSet[str]: - return self.node.initial_names() - class Repeat0(Repeat): def __str__(self) -> str: @@ -394,9 +291,6 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"Repeat0({self.node!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - class Repeat1(Repeat): def __str__(self) -> str: @@ -410,9 +304,6 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"Repeat1({self.node!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return False - class Gather(Repeat): def __init__(self, separator: Plain, node: Plain): @@ -425,9 +316,6 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"Gather({self.separator!r}, {self.node!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return False - class Group: def __init__(self, rhs: Rhs): @@ -442,12 +330,6 @@ def __repr__(self) -> str: def __iter__(self) -> Iterator[Rhs]: yield self.rhs - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return self.rhs.nullable_visit(rules) - - def initial_names(self) -> AbstractSet[str]: - return self.rhs.initial_names() - class Cut: def __init__(self) -> None: @@ -468,9 +350,6 @@ def __eq__(self, other: object) -> bool: return NotImplemented return True - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - def initial_names(self) -> AbstractSet[str]: return set() diff --git a/Tools/peg_generator/pegen/grammar_visualizer.py b/Tools/peg_generator/pegen/grammar_visualizer.py index 7362ec5fa0f4de..ab5c6364f63908 100644 --- a/Tools/peg_generator/pegen/grammar_visualizer.py +++ b/Tools/peg_generator/pegen/grammar_visualizer.py @@ -1,7 +1,6 @@ import argparse import sys - -from typing import Any, Iterator, Callable +from typing import Any, Callable, Iterator from pegen.build import build_parser from pegen.grammar import Grammar, Rule diff --git a/Tools/peg_generator/pegen/keywordgen.py b/Tools/peg_generator/pegen/keywordgen.py index 6a07f6e8b7bfe3..35a5e1a229cdec 100644 --- a/Tools/peg_generator/pegen/keywordgen.py +++ b/Tools/peg_generator/pegen/keywordgen.py @@ -59,11 +59,11 @@ def main() -> None: with args.tokens_file as tok_file: all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) gen = CParserGenerator(grammar, all_tokens, exact_tok, non_exact_tok, file=None) - gen.collect_todo() + gen.collect_rules() with args.keyword_file as thefile: - all_keywords = sorted(list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS) - all_soft_keywords = sorted(gen.callmakervisitor.soft_keywords) + all_keywords = sorted(list(gen.keywords.keys()) + EXTRA_KEYWORDS) + all_soft_keywords = sorted(gen.soft_keywords) keywords = "" if not all_keywords else " " + ",\n ".join(map(repr, all_keywords)) soft_keywords = ( diff --git a/Tools/peg_generator/pegen/parser_generator.py b/Tools/peg_generator/pegen/parser_generator.py index 33ecee1ed441f9..f2105d8faa273a 100644 --- a/Tools/peg_generator/pegen/parser_generator.py +++ b/Tools/peg_generator/pegen/parser_generator.py @@ -1,22 +1,76 @@ +import ast import contextlib +import re from abc import abstractmethod -from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple +from typing import ( + IO, + AbstractSet, + Any, + Dict, + Iterable, + Iterator, + List, + Optional, + Set, + Text, + Tuple, + Union, +) from pegen import sccutils from pegen.grammar import ( Alt, + Cut, + Forced, Gather, Grammar, GrammarError, GrammarVisitor, + Group, + Lookahead, NamedItem, NameLeaf, + Opt, Plain, + Repeat0, + Repeat1, Rhs, Rule, + StringLeaf, ) +class RuleCollectorVisitor(GrammarVisitor): + """Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes""" + + def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None: + self.rulses = rules + self.callmaker = callmakervisitor + + def visit_Rule(self, rule: Rule) -> None: + self.visit(rule.flatten()) + + def visit_NamedItem(self, item: NamedItem) -> None: + self.callmaker.visit(item) + + +class KeywordCollectorVisitor(GrammarVisitor): + """Visitor that collects all the keywods and soft keywords in the Grammar""" + + def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]): + self.generator = gen + self.keywords = keywords + self.soft_keywords = soft_keywords + + def visit_StringLeaf(self, node: StringLeaf) -> None: + val = ast.literal_eval(node.value) + if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword + if node.value.endswith("'") and node.value not in self.keywords: + self.keywords[val] = self.generator.keyword_type() + else: + return self.soft_keywords.add(node.value.replace('"', "")) + + class RuleCheckingVisitor(GrammarVisitor): def __init__(self, rules: Dict[str, Rule], tokens: Set[str]): self.rules = rules @@ -39,6 +93,8 @@ class ParserGenerator: def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]): self.grammar = grammar self.tokens = tokens + self.keywords: Dict[str, int] = {} + self.soft_keywords: Set[str] = set() self.rules = grammar.rules self.validate_rule_names() if "trailer" not in grammar.metas and "start" not in self.rules: @@ -48,12 +104,10 @@ def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]) checker.visit(rule) self.file = file self.level = 0 - compute_nullables(self.rules) self.first_graph, self.first_sccs = compute_left_recursives(self.rules) - self.todo = self.rules.copy() # Rules to generate self.counter = 0 # For name_rule()/name_loop() self.keyword_counter = 499 # For keyword_type() - self.all_rules: Dict[str, Rule] = {} # Rules + temporal rules + self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules self._local_variable_stack: List[List[str]] = [] def validate_rule_names(self) -> None: @@ -94,39 +148,43 @@ def printblock(self, lines: str) -> None: for line in lines.splitlines(): self.print(line) - def collect_todo(self) -> None: + def collect_rules(self) -> None: + keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords) + for rule in self.all_rules.values(): + keyword_collector.visit(rule) + + rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor) done: Set[str] = set() while True: - alltodo = list(self.todo) - self.all_rules.update(self.todo) - todo = [i for i in alltodo if i not in done] + computed_rules = list(self.all_rules) + todo = [i for i in computed_rules if i not in done] if not todo: break + done = set(self.all_rules) for rulename in todo: - self.todo[rulename].collect_todo(self) - done = set(alltodo) + rule_collector.visit(self.all_rules[rulename]) def keyword_type(self) -> int: self.keyword_counter += 1 return self.keyword_counter - def name_node(self, rhs: Rhs) -> str: + def artifical_rule_from_rhs(self, rhs: Rhs) -> str: self.counter += 1 name = f"_tmp_{self.counter}" # TODO: Pick a nicer name. - self.todo[name] = Rule(name, None, rhs) + self.all_rules[name] = Rule(name, None, rhs) return name - def name_loop(self, node: Plain, is_repeat1: bool) -> str: + def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str: self.counter += 1 if is_repeat1: prefix = "_loop1_" else: prefix = "_loop0_" - name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name. - self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])])) + name = f"{prefix}{self.counter}" + self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])])) return name - def name_gather(self, node: Gather) -> str: + def artifical_rule_from_gather(self, node: Gather) -> str: self.counter += 1 name = f"_gather_{self.counter}" self.counter += 1 @@ -135,7 +193,7 @@ def name_gather(self, node: Gather) -> str: [NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem", ) - self.todo[extra_function_name] = Rule( + self.all_rules[extra_function_name] = Rule( extra_function_name, None, Rhs([extra_function_alt]), @@ -143,7 +201,7 @@ def name_gather(self, node: Gather) -> str: alt = Alt( [NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))], ) - self.todo[name] = Rule( + self.all_rules[name] = Rule( name, None, Rhs([alt]), @@ -160,13 +218,120 @@ def dedupe(self, name: str) -> str: return name -def compute_nullables(rules: Dict[str, Rule]) -> None: +class NullableVisitor(GrammarVisitor): + def __init__(self, rules: Dict[str, Rule]) -> None: + self.rules = rules + self.visited: Set[Any] = set() + self.nullables: Set[Union[Rule, NamedItem]] = set() + + def visit_Rule(self, rule: Rule) -> bool: + if rule in self.visited: + return False + self.visited.add(rule) + if self.visit(rule.rhs): + self.nullables.add(rule) + return rule in self.nullables + + def visit_Rhs(self, rhs: Rhs) -> bool: + for alt in rhs.alts: + if self.visit(alt): + return True + return False + + def visit_Alt(self, alt: Alt) -> bool: + for item in alt.items: + if not self.visit(item): + return False + return True + + def visit_Forced(self, force: Forced) -> bool: + return True + + def visit_LookAhead(self, lookahead: Lookahead) -> bool: + return True + + def visit_Opt(self, opt: Opt) -> bool: + return True + + def visit_Repeat0(self, repeat: Repeat0) -> bool: + return True + + def visit_Repeat1(self, repeat: Repeat1) -> bool: + return False + + def visit_Gather(self, gather: Gather) -> bool: + return False + + def visit_Cut(self, cut: Cut) -> bool: + return False + + def visit_Group(self, group: Group) -> bool: + return self.visit(group.rhs) + + def visit_NamedItem(self, item: NamedItem) -> bool: + if self.visit(item.item): + self.nullables.add(item) + return item in self.nullables + + def visit_NameLeaf(self, node: NameLeaf) -> bool: + if node.value in self.rules: + return self.visit(self.rules[node.value]) + # Token or unknown; never empty. + return False + + def visit_StringLeaf(self, node: StringLeaf) -> bool: + # The string token '' is considered empty. + return not node.value + + +def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]: """Compute which rules in a grammar are nullable. Thanks to TatSu (tatsu/leftrec.py) for inspiration. """ + nullable_visitor = NullableVisitor(rules) for rule in rules.values(): - rule.nullable_visit(rules) + nullable_visitor.visit(rule) + return nullable_visitor.nullables + + +class InitialNamesVisitor(GrammarVisitor): + def __init__(self, rules: Dict[str, Rule]) -> None: + self.rules = rules + self.nullables = compute_nullables(rules) + + def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]: + names: Set[str] = set() + for value in node: + if isinstance(value, list): + for item in value: + names |= self.visit(item, *args, **kwargs) + else: + names |= self.visit(value, *args, **kwargs) + return names + + def visit_Alt(self, alt: Alt) -> Set[Any]: + names: Set[str] = set() + for item in alt.items: + names |= self.visit(item) + if item not in self.nullables: + break + return names + + def visit_Forced(self, force: Forced) -> Set[Any]: + return set() + + def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]: + return set() + + def visit_Cut(self, cut: Cut) -> Set[Any]: + return set() + + def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]: + return {node.value} + + def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]: + return set() def compute_left_recursives( @@ -207,10 +372,11 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]: Note that this requires the nullable flags to have been computed. """ + initial_name_visitor = InitialNamesVisitor(rules) graph = {} vertices: Set[str] = set() for rulename, rhs in rules.items(): - graph[rulename] = names = rhs.initial_names() + graph[rulename] = names = initial_name_visitor.visit(rhs) vertices |= names for vertex in vertices: graph.setdefault(vertex, set()) diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py index 201bf2baa805ff..7aa730ae1c9534 100644 --- a/Tools/peg_generator/pegen/python_generator.py +++ b/Tools/peg_generator/pegen/python_generator.py @@ -95,8 +95,6 @@ class PythonCallMakerVisitor(GrammarVisitor): def __init__(self, parser_generator: ParserGenerator): self.gen = parser_generator self.cache: Dict[Any, Any] = {} - self.keywords: Set[str] = set() - self.soft_keywords: Set[str] = set() def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: name = node.value @@ -111,12 +109,6 @@ def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: return name, f"self.{name}()" def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: - val = ast.literal_eval(node.value) - if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword - if node.value.endswith("'"): - self.keywords.add(val) - else: - self.soft_keywords.add(val) return "literal", f"self.expect({node.value})" def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]: @@ -125,7 +117,7 @@ def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]: if len(node.alts) == 1 and len(node.alts[0].items) == 1: self.cache[node] = self.visit(node.alts[0].items[0]) else: - name = self.gen.name_node(node) + name = self.gen.artifical_rule_from_rhs(node) self.cache[node] = name, f"self.{name}()" return self.cache[node] @@ -163,21 +155,21 @@ def visit_Opt(self, node: Opt) -> Tuple[str, str]: def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: if node in self.cache: return self.cache[node] - name = self.gen.name_loop(node.node, False) + name = self.gen.artificial_rule_from_repeat(node.node, False) self.cache[node] = name, f"self.{name}()," # Also a trailing comma! return self.cache[node] def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: if node in self.cache: return self.cache[node] - name = self.gen.name_loop(node.node, True) + name = self.gen.artificial_rule_from_repeat(node.node, True) self.cache[node] = name, f"self.{name}()" # But no trailing comma here! return self.cache[node] def visit_Gather(self, node: Gather) -> Tuple[str, str]: if node in self.cache: return self.cache[node] - name = self.gen.name_gather(node) + name = self.gen.artifical_rule_from_gather(node) self.cache[node] = name, f"self.{name}()" # No trailing comma here either! return self.cache[node] @@ -219,6 +211,7 @@ def __init__( ) def generate(self, filename: str) -> None: + self.collect_rules() header = self.grammar.metas.get("header", MODULE_PREFIX) if header is not None: self.print(header.rstrip("\n").format(filename=filename)) @@ -228,17 +221,15 @@ def generate(self, filename: str) -> None: cls_name = self.grammar.metas.get("class", "GeneratedParser") self.print("# Keywords and soft keywords are listed at the end of the parser definition.") self.print(f"class {cls_name}(Parser):") - while self.todo: - for rulename, rule in list(self.todo.items()): - del self.todo[rulename] - self.print() - with self.indent(): - self.visit(rule) + for rule in self.all_rules.values(): + self.print() + with self.indent(): + self.visit(rule) self.print() with self.indent(): - self.print(f"KEYWORDS = {tuple(self.callmakervisitor.keywords)}") - self.print(f"SOFT_KEYWORDS = {tuple(self.callmakervisitor.soft_keywords)}") + self.print(f"KEYWORDS = {tuple(self.keywords)}") + self.print(f"SOFT_KEYWORDS = {tuple(self.soft_keywords)}") trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name)) if trailer is not None: @@ -270,8 +261,6 @@ def visit_Rule(self, node: Rule) -> None: self.print(f"def {node.name}(self) -> Optional[{node_type}]:") with self.indent(): self.print(f"# {node.name}: {rhs}") - if node.nullable: - self.print(f"# nullable={node.nullable}") self.print("mark = self._mark()") if self.alts_uses_locations(node.rhs.alts): self.print("tok = self._tokenizer.peek()") diff --git a/Tools/peg_generator/pegen/testutil.py b/Tools/peg_generator/pegen/testutil.py index e0928a4af701d8..8e5dbc5cdbb339 100644 --- a/Tools/peg_generator/pegen/testutil.py +++ b/Tools/peg_generator/pegen/testutil.py @@ -4,10 +4,9 @@ import pathlib import sys import textwrap -import tokenize import token - -from typing import Any, cast, Dict, IO, Type, Final +import tokenize +from typing import IO, Any, Dict, Final, Type, cast from pegen.build import compile_c_extension from pegen.c_generator import CParserGenerator diff --git a/Tools/peg_generator/pegen/validator.py b/Tools/peg_generator/pegen/validator.py index e7d6980d8b2dda..c48a01eedf5d5c 100644 --- a/Tools/peg_generator/pegen/validator.py +++ b/Tools/peg_generator/pegen/validator.py @@ -1,12 +1,7 @@ from typing import Optional from pegen import grammar -from pegen.grammar import ( - Alt, - GrammarVisitor, - Rule, - Rhs, -) +from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule class ValidationError(Exception):