bpo-40939: Use the new grammar for the grammar specification document…

…ation (pythonGH-19969) (We censor the heck out of actions and some other stuff using a custom "highlighter".) Co-authored-by: Guido van Rossum <guido@python.org>
shihai1991 · Aug 20, 2020 · 718d817 · 718d817
1 parent 3c3ae53
commit 718d817
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 210 deletions.
diff --git a/Doc/conf.py b/Doc/conf.py
@@ -15,7 +15,7 @@
 
 extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest',
               'pyspecific', 'c_annotations', 'escape4chm',
-              'asdl_highlight']
+              'asdl_highlight', 'peg_highlight']
 
 
 doctest_global_setup = '''

diff --git a/Doc/reference/grammar.rst b/Doc/reference/grammar.rst
@@ -1,7 +1,19 @@
 Full Grammar specification
 ==========================
 
-This is the full Python grammar, as it is read by the parser generator and used
-to parse Python source files:
+This is the full Python grammar, derived directly from the grammar
+used to generate the CPython parser (see :source:`Grammar/python.gram`).
+The version here omits details related to code generation and
+error recovery.
 
-.. literalinclude:: ../../Grammar/Grammar
+The notation is a mixture of `EBNF
+<https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form>`_
+and `PEG <https://en.wikipedia.org/wiki/Parsing_expression_grammar>`_.
+In particular, ``&`` followed by a symbol, token or parenthesized
+group indicates a positive lookahead (i.e., is required to match but
+not consumed), while ``!`` indicates a negative lookahead (i.e., is
+required _not_ to match).  We use the ``|`` separator to mean PEG's
+"ordered choice" (written as ``/`` in traditional PEG grammars).
+
+.. literalinclude:: ../../Grammar/python.gram
+  :language: peg
diff --git a/Doc/tools/extensions/peg_highlight.py b/Doc/tools/extensions/peg_highlight.py
@@ -0,0 +1,75 @@
+from pygments.lexer import RegexLexer, bygroups, include
+from pygments.token import Comment, Generic, Keyword, Name, Operator, Punctuation, Text
+
+from sphinx.highlighting import lexers
+
+
+class PEGLexer(RegexLexer):
+    """Pygments Lexer for PEG grammar (.gram) files
+
+    This lexer strips the following elements from the grammar:
+
+        - Meta-tags
+        - Variable assignments
+        - Actions
+        - Lookaheads
+        - Rule types
+        - Rule options
+        - Rules named `invalid_*` or `incorrect_*`
+    """
+
+    name = "PEG"
+    aliases = ["peg"]
+    filenames = ["*.gram"]
+    _name = r"([^\W\d]\w*)"
+    _text_ws = r"(\s*)"
+
+    tokens = {
+        "ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),],
+        "lookaheads": [
+            (r"(?<=\|\s)(&\w+\s?)", bygroups(None)),
+            (r"(?<=\|\s)(&'.+'\s?)", bygroups(None)),
+            (r'(?<=\|\s)(&".+"\s?)', bygroups(None)),
+            (r"(?<=\|\s)(&\(.+\)\s?)", bygroups(None)),
+        ],
+        "metas": [
+            (r"(@\w+ '''(.|\n)+?''')", bygroups(None)),
+            (r"^(@.*)$", bygroups(None)),
+        ],
+        "actions": [(r"{(.|\n)+?}", bygroups(None)),],
+        "strings": [
+            (r"'\w+?'", Keyword),
+            (r'"\w+?"', Keyword),
+            (r"'\W+?'", Text),
+            (r'"\W+?"', Text),
+        ],
+        "variables": [(_name + _text_ws + "(=)", bygroups(None, None, None),),],
+        "invalids": [
+            (r"^(\s+\|\s+invalid_\w+\s*\n)", bygroups(None)),
+            (r"^(\s+\|\s+incorrect_\w+\s*\n)", bygroups(None)),
+            (r"^(#.*invalid syntax.*(?:.|\n)*)", bygroups(None),),
+        ],
+        "root": [
+            include("invalids"),
+            include("ws"),
+            include("lookaheads"),
+            include("metas"),
+            include("actions"),
+            include("strings"),
+            include("variables"),
+            (r"\b(?!(NULL|EXTRA))([A-Z_]+)\b\s*(?!\()", Text,),
+            (
+                r"^\s*" + _name + "\s*" + "(\[.*\])?" + "\s*" + "(\(.+\))?" + "\s*(:)",
+                bygroups(Name.Function, None, None, Punctuation),
+            ),
+            (_name, Name.Function),
+            (r"[\||\.|\+|\*|\?]", Operator),
+            (r"{|}|\(|\)|\[|\]", Punctuation),
+            (r".", Text),
+        ],
+    }
+
+
+def setup(app):
+    lexers["peg"] = PEGLexer()
+    return {"version": "1.0", "parallel_read_safe": True}
diff --git a/Grammar/Grammar b/Grammar/Grammar