Skip to content

Commit

Permalink
bpo-40939: Use the new grammar for the grammar specification document…
Browse files Browse the repository at this point in the history
…ation (pythonGH-19969)

(We censor the heck out of actions and some other stuff using a custom "highlighter".)

Co-authored-by: Guido van Rossum <guido@python.org>
  • Loading branch information
2 people authored and shihai1991 committed Aug 20, 2020
1 parent 3c3ae53 commit 718d817
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 210 deletions.
2 changes: 1 addition & 1 deletion Doc/conf.py
Expand Up @@ -15,7 +15,7 @@

extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest',
'pyspecific', 'c_annotations', 'escape4chm',
'asdl_highlight']
'asdl_highlight', 'peg_highlight']


doctest_global_setup = '''
Expand Down
18 changes: 15 additions & 3 deletions Doc/reference/grammar.rst
@@ -1,7 +1,19 @@
Full Grammar specification
==========================

This is the full Python grammar, as it is read by the parser generator and used
to parse Python source files:
This is the full Python grammar, derived directly from the grammar
used to generate the CPython parser (see :source:`Grammar/python.gram`).
The version here omits details related to code generation and
error recovery.

.. literalinclude:: ../../Grammar/Grammar
The notation is a mixture of `EBNF
<https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form>`_
and `PEG <https://en.wikipedia.org/wiki/Parsing_expression_grammar>`_.
In particular, ``&`` followed by a symbol, token or parenthesized
group indicates a positive lookahead (i.e., is required to match but
not consumed), while ``!`` indicates a negative lookahead (i.e., is
required _not_ to match). We use the ``|`` separator to mean PEG's
"ordered choice" (written as ``/`` in traditional PEG grammars).

.. literalinclude:: ../../Grammar/python.gram
:language: peg
75 changes: 75 additions & 0 deletions Doc/tools/extensions/peg_highlight.py
@@ -0,0 +1,75 @@
from pygments.lexer import RegexLexer, bygroups, include
from pygments.token import Comment, Generic, Keyword, Name, Operator, Punctuation, Text

from sphinx.highlighting import lexers


class PEGLexer(RegexLexer):
"""Pygments Lexer for PEG grammar (.gram) files
This lexer strips the following elements from the grammar:
- Meta-tags
- Variable assignments
- Actions
- Lookaheads
- Rule types
- Rule options
- Rules named `invalid_*` or `incorrect_*`
"""

name = "PEG"
aliases = ["peg"]
filenames = ["*.gram"]
_name = r"([^\W\d]\w*)"
_text_ws = r"(\s*)"

tokens = {
"ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),],
"lookaheads": [
(r"(?<=\|\s)(&\w+\s?)", bygroups(None)),
(r"(?<=\|\s)(&'.+'\s?)", bygroups(None)),
(r'(?<=\|\s)(&".+"\s?)', bygroups(None)),
(r"(?<=\|\s)(&\(.+\)\s?)", bygroups(None)),
],
"metas": [
(r"(@\w+ '''(.|\n)+?''')", bygroups(None)),
(r"^(@.*)$", bygroups(None)),
],
"actions": [(r"{(.|\n)+?}", bygroups(None)),],
"strings": [
(r"'\w+?'", Keyword),
(r'"\w+?"', Keyword),
(r"'\W+?'", Text),
(r'"\W+?"', Text),
],
"variables": [(_name + _text_ws + "(=)", bygroups(None, None, None),),],
"invalids": [
(r"^(\s+\|\s+invalid_\w+\s*\n)", bygroups(None)),
(r"^(\s+\|\s+incorrect_\w+\s*\n)", bygroups(None)),
(r"^(#.*invalid syntax.*(?:.|\n)*)", bygroups(None),),
],
"root": [
include("invalids"),
include("ws"),
include("lookaheads"),
include("metas"),
include("actions"),
include("strings"),
include("variables"),
(r"\b(?!(NULL|EXTRA))([A-Z_]+)\b\s*(?!\()", Text,),
(
r"^\s*" + _name + "\s*" + "(\[.*\])?" + "\s*" + "(\(.+\))?" + "\s*(:)",
bygroups(Name.Function, None, None, Punctuation),
),
(_name, Name.Function),
(r"[\||\.|\+|\*|\?]", Operator),
(r"{|}|\(|\)|\[|\]", Punctuation),
(r".", Text),
],
}


def setup(app):
lexers["peg"] = PEGLexer()
return {"version": "1.0", "parallel_read_safe": True}
206 changes: 0 additions & 206 deletions Grammar/Grammar

This file was deleted.

0 comments on commit 718d817

Please sign in to comment.