From 0b9c9c79563cc32a9d77e36068aa5f68b35f5dac Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 30 May 2017 15:49:18 +0300 Subject: [PATCH 01/15] bpo-30455: Generate tokens related C code and docs from token.py. --- Doc/library/token-list.inc | 64 ++++++++ Doc/library/token.rst | 70 +-------- Include/token.h | 122 +++++++------- Lib/symbol.py | 53 ++++++- Lib/test/test_tokenize.py | 2 + Lib/token.py | 204 +++++++++--------------- Lib/tokenize.py | 61 +------ Makefile.pre.in | 24 ++- Misc/NEWS | 3 + PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 + Parser/token.c | 234 +++++++++++++++++++++++++++ Parser/tokenizer.c | 236 ---------------------------- Tools/scripts/generate_token_c.py | 97 ++++++++++++ Tools/scripts/generate_token_h.py | 55 +++++++ Tools/scripts/generate_token_rst.py | 26 +++ 16 files changed, 700 insertions(+), 555 deletions(-) create mode 100644 Doc/library/token-list.inc create mode 100644 Parser/token.c create mode 100644 Tools/scripts/generate_token_c.py create mode 100644 Tools/scripts/generate_token_h.py create mode 100644 Tools/scripts/generate_token_rst.py diff --git a/Doc/library/token-list.inc b/Doc/library/token-list.inc new file mode 100644 index 00000000000000..c13d2e921b9b45 --- /dev/null +++ b/Doc/library/token-list.inc @@ -0,0 +1,64 @@ +.. Auto-generated by Tools/scripts/generate_token_rst.py +.. data:: + ENDMARKER + NAME + NUMBER + STRING + NEWLINE + INDENT + DEDENT + LPAR + RPAR + LSQB + RSQB + COLON + COMMA + SEMI + PLUS + MINUS + STAR + SLASH + VBAR + AMPER + LESS + GREATER + EQUAL + DOT + PERCENT + LBRACE + RBRACE + EQEQUAL + NOTEQUAL + LESSEQUAL + GREATEREQUAL + TILDE + CIRCUMFLEX + LEFTSHIFT + RIGHTSHIFT + DOUBLESTAR + PLUSEQUAL + MINEQUAL + STAREQUAL + SLASHEQUAL + PERCENTEQUAL + AMPEREQUAL + VBAREQUAL + CIRCUMFLEXEQUAL + LEFTSHIFTEQUAL + RIGHTSHIFTEQUAL + DOUBLESTAREQUAL + DOUBLESLASH + DOUBLESLASHEQUAL + AT + ATEQUAL + RARROW + ELLIPSIS + OP + AWAIT + ASYNC + ERRORTOKEN + COMMENT + NL + ENCODING + N_TOKENS + NT_OFFSET diff --git a/Doc/library/token.rst b/Doc/library/token.rst index effb7113230e74..ec49ed7b70bce7 100644 --- a/Doc/library/token.rst +++ b/Doc/library/token.rst @@ -44,67 +44,9 @@ functions. The functions mirror definitions in the Python C header files. The token constants are: -.. data:: ENDMARKER - NAME - NUMBER - STRING - NEWLINE - INDENT - DEDENT - LPAR - RPAR - LSQB - RSQB - COLON - COMMA - SEMI - PLUS - MINUS - STAR - SLASH - VBAR - AMPER - LESS - GREATER - EQUAL - DOT - PERCENT - LBRACE - RBRACE - EQEQUAL - NOTEQUAL - LESSEQUAL - GREATEREQUAL - TILDE - CIRCUMFLEX - LEFTSHIFT - RIGHTSHIFT - DOUBLESTAR - PLUSEQUAL - MINEQUAL - STAREQUAL - SLASHEQUAL - PERCENTEQUAL - AMPEREQUAL - VBAREQUAL - CIRCUMFLEXEQUAL - LEFTSHIFTEQUAL - RIGHTSHIFTEQUAL - DOUBLESTAREQUAL - DOUBLESLASH - DOUBLESLASHEQUAL - AT - ATEQUAL - RARROW - ELLIPSIS - OP - AWAIT - ASYNC - ERRORTOKEN - N_TOKENS - NT_OFFSET - - .. versionchanged:: 3.5 - Added :data:`AWAIT` and :data:`ASYNC` tokens. Starting with - Python 3.7, "async" and "await" will be tokenized as :data:`NAME` - tokens, and :data:`AWAIT` and :data:`ASYNC` will be removed. +.. include:: /library/token-list.inc + +.. versionchanged:: 3.5 + Added :data:`AWAIT` and :data:`ASYNC` tokens. Starting with + Python 3.7, "async" and "await" will be tokenized as :data:`NAME` + tokens, and :data:`AWAIT` and :data:`ASYNC` will be removed. diff --git a/Include/token.h b/Include/token.h index 595afa01580888..f5f68b4226a5f6 100644 --- a/Include/token.h +++ b/Include/token.h @@ -1,3 +1,4 @@ +/* Auto-generated by Tools/scripts/generate_token_h.py */ /* Token types */ #ifndef Py_LIMITED_API @@ -9,73 +10,74 @@ extern "C" { #undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ -#define ENDMARKER 0 -#define NAME 1 -#define NUMBER 2 -#define STRING 3 -#define NEWLINE 4 -#define INDENT 5 -#define DEDENT 6 -#define LPAR 7 -#define RPAR 8 -#define LSQB 9 -#define RSQB 10 -#define COLON 11 -#define COMMA 12 -#define SEMI 13 -#define PLUS 14 -#define MINUS 15 -#define STAR 16 -#define SLASH 17 -#define VBAR 18 -#define AMPER 19 -#define LESS 20 -#define GREATER 21 -#define EQUAL 22 -#define DOT 23 -#define PERCENT 24 -#define LBRACE 25 -#define RBRACE 26 -#define EQEQUAL 27 -#define NOTEQUAL 28 -#define LESSEQUAL 29 -#define GREATEREQUAL 30 -#define TILDE 31 -#define CIRCUMFLEX 32 -#define LEFTSHIFT 33 -#define RIGHTSHIFT 34 -#define DOUBLESTAR 35 -#define PLUSEQUAL 36 -#define MINEQUAL 37 -#define STAREQUAL 38 -#define SLASHEQUAL 39 -#define PERCENTEQUAL 40 -#define AMPEREQUAL 41 -#define VBAREQUAL 42 -#define CIRCUMFLEXEQUAL 43 -#define LEFTSHIFTEQUAL 44 -#define RIGHTSHIFTEQUAL 45 -#define DOUBLESTAREQUAL 46 -#define DOUBLESLASH 47 +#define ENDMARKER 0 +#define NAME 1 +#define NUMBER 2 +#define STRING 3 +#define NEWLINE 4 +#define INDENT 5 +#define DEDENT 6 +#define LPAR 7 +#define RPAR 8 +#define LSQB 9 +#define RSQB 10 +#define COLON 11 +#define COMMA 12 +#define SEMI 13 +#define PLUS 14 +#define MINUS 15 +#define STAR 16 +#define SLASH 17 +#define VBAR 18 +#define AMPER 19 +#define LESS 20 +#define GREATER 21 +#define EQUAL 22 +#define DOT 23 +#define PERCENT 24 +#define LBRACE 25 +#define RBRACE 26 +#define EQEQUAL 27 +#define NOTEQUAL 28 +#define LESSEQUAL 29 +#define GREATEREQUAL 30 +#define TILDE 31 +#define CIRCUMFLEX 32 +#define LEFTSHIFT 33 +#define RIGHTSHIFT 34 +#define DOUBLESTAR 35 +#define PLUSEQUAL 36 +#define MINEQUAL 37 +#define STAREQUAL 38 +#define SLASHEQUAL 39 +#define PERCENTEQUAL 40 +#define AMPEREQUAL 41 +#define VBAREQUAL 42 +#define CIRCUMFLEXEQUAL 43 +#define LEFTSHIFTEQUAL 44 +#define RIGHTSHIFTEQUAL 45 +#define DOUBLESTAREQUAL 46 +#define DOUBLESLASH 47 #define DOUBLESLASHEQUAL 48 #define AT 49 -#define ATEQUAL 50 +#define ATEQUAL 50 #define RARROW 51 #define ELLIPSIS 52 -/* Don't forget to update the table _PyParser_TokenNames in tokenizer.c! */ -#define OP 53 -#define AWAIT 54 -#define ASYNC 55 -#define ERRORTOKEN 56 -#define N_TOKENS 57 +#define OP 53 +#define AWAIT 54 +#define ASYNC 55 +#define ERRORTOKEN 56 +#define COMMENT 57 +#define NL 58 +#define ENCODING 59 +#define N_TOKENS 60 +#define NT_OFFSET 256 /* Special definitions for cooperation with parser */ -#define NT_OFFSET 256 - -#define ISTERMINAL(x) ((x) < NT_OFFSET) -#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) -#define ISEOF(x) ((x) == ENDMARKER) +#define ISTERMINAL(x) ((x) < NT_OFFSET) +#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) +#define ISEOF(x) ((x) == ENDMARKER) PyAPI_DATA(const char *) _PyParser_TokenNames[]; /* Token names */ diff --git a/Lib/symbol.py b/Lib/symbol.py index d9f01e081a7595..f5ce0bdc545b24 100755 --- a/Lib/symbol.py +++ b/Lib/symbol.py @@ -104,12 +104,53 @@ sym_name[_value] = _name -def _main(): +def _main(inFileName="Include/graminit.h", outFileName="Lib/symbol.py"): + import re import sys - import token - if len(sys.argv) == 1: - sys.argv = sys.argv + ["Include/graminit.h", "Lib/symbol.py"] - token._main() + try: + fp = open(inFileName) + except OSError as err: + sys.stdout.write("I/O error: %s\n" % str(err)) + sys.exit(1) + with fp: + lines = fp.read().split("\n") + prog = re.compile( + "#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)", + re.IGNORECASE) + tokens = {} + for line in lines: + match = prog.match(line) + if match: + name, val = match.group(1, 2) + val = int(val) + tokens[val] = name # reverse so we can sort them... + keys = sorted(tokens.keys()) + # load the output skeleton from the target: + try: + fp = open(outFileName) + except OSError as err: + sys.stderr.write("I/O error: %s\n" % str(err)) + sys.exit(2) + with fp: + format = fp.read().split("\n") + try: + start = format.index("#--start constants--") + 1 + end = format.index("#--end constants--") + except ValueError: + sys.stderr.write("target does not contain format markers") + sys.exit(3) + lines = [] + for val in keys: + lines.append("%s = %d" % (tokens[val], val)) + format[start:end] = lines + try: + fp = open(outFileName, 'w') + except OSError as err: + sys.stderr.write("I/O error: %s\n" % str(err)) + sys.exit(4) + with fp: + fp.write("\n".join(format)) if __name__ == "__main__": - _main() + import sys + _main(*sys.argv[1:]) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index dcaf58f5272bac..0617965cf43709 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1582,6 +1582,8 @@ def test_random_files(self): testfiles = random.sample(testfiles, 10) for testfile in testfiles: + if support.verbose >= 2: + print('tokenize', testfile) with open(testfile, 'rb') as f: with self.subTest(file=testfile): self.check_roundtrip(f) diff --git a/Lib/token.py b/Lib/token.py index 5fdb2221333e2c..a7a31a68095025 100644 --- a/Lib/token.py +++ b/Lib/token.py @@ -1,80 +1,85 @@ -"""Token constants (from "token.h").""" +"""Token constants.""" __all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] -# This file is automatically generated; please don't muck it up! -# -# To update the symbols in this file, 'cd' to the top directory of -# the python source tree after building the interpreter and run: -# -# ./python Lib/token.py +tokens = [ + 'ENDMARKER', + 'NAME', + 'NUMBER', + 'STRING', + 'NEWLINE', + 'INDENT', + 'DEDENT', -#--start constants-- -ENDMARKER = 0 -NAME = 1 -NUMBER = 2 -STRING = 3 -NEWLINE = 4 -INDENT = 5 -DEDENT = 6 -LPAR = 7 -RPAR = 8 -LSQB = 9 -RSQB = 10 -COLON = 11 -COMMA = 12 -SEMI = 13 -PLUS = 14 -MINUS = 15 -STAR = 16 -SLASH = 17 -VBAR = 18 -AMPER = 19 -LESS = 20 -GREATER = 21 -EQUAL = 22 -DOT = 23 -PERCENT = 24 -LBRACE = 25 -RBRACE = 26 -EQEQUAL = 27 -NOTEQUAL = 28 -LESSEQUAL = 29 -GREATEREQUAL = 30 -TILDE = 31 -CIRCUMFLEX = 32 -LEFTSHIFT = 33 -RIGHTSHIFT = 34 -DOUBLESTAR = 35 -PLUSEQUAL = 36 -MINEQUAL = 37 -STAREQUAL = 38 -SLASHEQUAL = 39 -PERCENTEQUAL = 40 -AMPEREQUAL = 41 -VBAREQUAL = 42 -CIRCUMFLEXEQUAL = 43 -LEFTSHIFTEQUAL = 44 -RIGHTSHIFTEQUAL = 45 -DOUBLESTAREQUAL = 46 -DOUBLESLASH = 47 -DOUBLESLASHEQUAL = 48 -AT = 49 -ATEQUAL = 50 -RARROW = 51 -ELLIPSIS = 52 -OP = 53 -AWAIT = 54 -ASYNC = 55 -ERRORTOKEN = 56 -N_TOKENS = 57 -NT_OFFSET = 256 -#--end constants-- + ('LPAR', '('), + ('RPAR', ')'), + ('LSQB', '['), + ('RSQB', ']'), + ('COLON', ':'), + ('COMMA', ','), + ('SEMI', ';'), + ('PLUS', '+'), + ('MINUS', '-'), + ('STAR', '*'), + ('SLASH', '/'), + ('VBAR', '|'), + ('AMPER', '&'), + ('LESS', '<'), + ('GREATER', '>'), + ('EQUAL', '='), + ('DOT', '.'), + ('PERCENT', '%'), + ('LBRACE', '{'), + ('RBRACE', '}'), + ('EQEQUAL', '=='), + ('NOTEQUAL', '!='), + ('LESSEQUAL', '<='), + ('GREATEREQUAL', '>='), + ('TILDE', '~'), + ('CIRCUMFLEX', '^'), + ('LEFTSHIFT', '<<'), + ('RIGHTSHIFT', '>>'), + ('DOUBLESTAR', '**'), + ('PLUSEQUAL', '+='), + ('MINEQUAL', '-='), + ('STAREQUAL', '*='), + ('SLASHEQUAL', '/='), + ('PERCENTEQUAL', '%='), + ('AMPEREQUAL', '&='), + ('VBAREQUAL', '|='), + ('CIRCUMFLEXEQUAL', '^='), + ('LEFTSHIFTEQUAL', '<<='), + ('RIGHTSHIFTEQUAL', '>>='), + ('DOUBLESTAREQUAL', '**='), + ('DOUBLESLASH', '//'), + ('DOUBLESLASHEQUAL', '//='), + ('AT', '@'), + ('ATEQUAL', '@='), + ('RARROW', '->'), + ('ELLIPSIS', '...'), + + 'OP', + 'AWAIT', + 'ASYNC', + 'ERRORTOKEN', + 'COMMENT', + 'NL', + 'ENCODING', + + 'N_TOKENS', +] + +tok_name = {i: v[0] if isinstance(v, tuple) else v + for i, v in enumerate(tokens)} +EXACT_TOKEN_TYPES = {v[1]: i + for i, v in enumerate(tokens) + if isinstance(v, tuple)} +del tokens + +tok_name[256] = 'NT_OFFSET' -tok_name = {value: name - for name, value in globals().items() - if isinstance(value, int) and not name.startswith('_')} __all__.extend(tok_name.values()) +globals().update({name: value for value, name in tok_name.items()}) def ISTERMINAL(x): return x < NT_OFFSET @@ -84,60 +89,3 @@ def ISNONTERMINAL(x): def ISEOF(x): return x == ENDMARKER - - -def _main(): - import re - import sys - args = sys.argv[1:] - inFileName = args and args[0] or "Include/token.h" - outFileName = "Lib/token.py" - if len(args) > 1: - outFileName = args[1] - try: - fp = open(inFileName) - except OSError as err: - sys.stdout.write("I/O error: %s\n" % str(err)) - sys.exit(1) - with fp: - lines = fp.read().split("\n") - prog = re.compile( - "#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)", - re.IGNORECASE) - tokens = {} - for line in lines: - match = prog.match(line) - if match: - name, val = match.group(1, 2) - val = int(val) - tokens[val] = name # reverse so we can sort them... - keys = sorted(tokens.keys()) - # load the output skeleton from the target: - try: - fp = open(outFileName) - except OSError as err: - sys.stderr.write("I/O error: %s\n" % str(err)) - sys.exit(2) - with fp: - format = fp.read().split("\n") - try: - start = format.index("#--start constants--") + 1 - end = format.index("#--end constants--") - except ValueError: - sys.stderr.write("target does not contain format markers") - sys.exit(3) - lines = [] - for val in keys: - lines.append("%s = %d" % (tokens[val], val)) - format[start:end] = lines - try: - fp = open(outFileName, 'w') - except OSError as err: - sys.stderr.write("I/O error: %s\n" % str(err)) - sys.exit(4) - with fp: - fp.write("\n".join(format)) - - -if __name__ == "__main__": - _main() diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 9017bb13e78dcd..4f3bf51f4a80d3 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -33,71 +33,16 @@ import re import sys from token import * +from token import EXACT_TOKEN_TYPES cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) import token -__all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding", - "NL", "untokenize", "ENCODING", "TokenInfo"] +__all__ = token.__all__ + ["tokenize", "detect_encoding", "untokenize", + "TokenInfo"] del token -COMMENT = N_TOKENS -tok_name[COMMENT] = 'COMMENT' -NL = N_TOKENS + 1 -tok_name[NL] = 'NL' -ENCODING = N_TOKENS + 2 -tok_name[ENCODING] = 'ENCODING' -N_TOKENS += 3 -EXACT_TOKEN_TYPES = { - '(': LPAR, - ')': RPAR, - '[': LSQB, - ']': RSQB, - ':': COLON, - ',': COMMA, - ';': SEMI, - '+': PLUS, - '-': MINUS, - '*': STAR, - '/': SLASH, - '|': VBAR, - '&': AMPER, - '<': LESS, - '>': GREATER, - '=': EQUAL, - '.': DOT, - '%': PERCENT, - '{': LBRACE, - '}': RBRACE, - '==': EQEQUAL, - '!=': NOTEQUAL, - '<=': LESSEQUAL, - '>=': GREATEREQUAL, - '~': TILDE, - '^': CIRCUMFLEX, - '<<': LEFTSHIFT, - '>>': RIGHTSHIFT, - '**': DOUBLESTAR, - '+=': PLUSEQUAL, - '-=': MINEQUAL, - '*=': STAREQUAL, - '/=': SLASHEQUAL, - '%=': PERCENTEQUAL, - '&=': AMPEREQUAL, - '|=': VBAREQUAL, - '^=': CIRCUMFLEXEQUAL, - '<<=': LEFTSHIFTEQUAL, - '>>=': RIGHTSHIFTEQUAL, - '**=': DOUBLESTAREQUAL, - '//': DOUBLESLASH, - '//=': DOUBLESLASHEQUAL, - '...': ELLIPSIS, - '->': RARROW, - '@': AT, - '@=': ATEQUAL, -} - class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')): def __repr__(self): annotated_type = '%d (%s)' % (self.type, tok_name[self.type]) diff --git a/Makefile.pre.in b/Makefile.pre.in index ee1cd4a0280ed4..4572a1010e7722 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -293,7 +293,7 @@ POBJS= \ Parser/grammar.o \ Parser/pgen.o -PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o +PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/token.o Parser/tokenizer.o PGOBJS= \ Objects/obmalloc.o \ @@ -516,7 +516,7 @@ coverage-lcov: @echo # Force regeneration of parser and importlib -coverage-report: regen-grammar regen-importlib +coverage-report: regen-grammar regen-token regen-importlib @ # build with coverage info $(MAKE) coverage @ # run tests, ignore failures @@ -705,7 +705,7 @@ regen-importlib: Programs/_freeze_importlib ############################################################################ # Regenerate all generated files -regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar regen-ast regen-importlib +regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar regen-token regen-ast regen-importlib ############################################################################ # Special rules for object files @@ -808,6 +808,24 @@ regen-opcode: $(srcdir)/Lib/opcode.py \ $(srcdir)/Include/opcode.h +.PHONY: regen-token +regen-token: + # Regenerate Include/token.h from Lib/token.py + # using Tools/scripts/generate_token_h.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token_h.py \ + $(srcdir)/Lib/token.py \ + $(srcdir)/Include/token.h + # Regenerate Parser/token.c from Lib/token.py + # using Tools/scripts/generate_token_c.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token_c.py \ + $(srcdir)/Lib/token.py \ + $(srcdir)/Parser/token_names.h + # Regenerate Doc/library/token.rst from Lib/token.py + # using Tools/scripts/generate_token_rst.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token_rst.py \ + $(srcdir)/Lib/token.py \ + $(srcdir)/Doc/library/token.rst + Python/compile.o Python/symtable.o Python/ast.o: $(srcdir)/Include/graminit.h $(srcdir)/Include/Python-ast.h Python/getplatform.o: $(srcdir)/Python/getplatform.c diff --git a/Misc/NEWS b/Misc/NEWS index 6f90175bf7713f..24a0506b5bc0dd 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -1125,6 +1125,9 @@ Documentation Build ----- +- bpo-30455: C code and documentation related to tokens now are generated + from token.py. + - bpo-20210: Support the *disabled* marker in Setup files. Extension modules listed after this marker are not built at all, neither by the Makefile nor by setup.py. diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 8ebb22e0e2bfca..de2d648bb8815b 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -343,6 +343,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index cbe1a3943ff19c..5e6bd6ff28a891 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -818,6 +818,9 @@ Parser + + Parser + PC diff --git a/Parser/token.c b/Parser/token.c new file mode 100644 index 00000000000000..dbbe2f37d30846 --- /dev/null +++ b/Parser/token.c @@ -0,0 +1,234 @@ +/* Auto-generated by Tools/scripts/generate_token_c.py */ + +#include "Python.h" +#include "token.h" + +/* Token names */ + +const char *_PyParser_TokenNames[] = { + "ENDMARKER", + "NAME", + "NUMBER", + "STRING", + "NEWLINE", + "INDENT", + "DEDENT", + "LPAR", + "RPAR", + "LSQB", + "RSQB", + "COLON", + "COMMA", + "SEMI", + "PLUS", + "MINUS", + "STAR", + "SLASH", + "VBAR", + "AMPER", + "LESS", + "GREATER", + "EQUAL", + "DOT", + "PERCENT", + "LBRACE", + "RBRACE", + "EQEQUAL", + "NOTEQUAL", + "LESSEQUAL", + "GREATEREQUAL", + "TILDE", + "CIRCUMFLEX", + "LEFTSHIFT", + "RIGHTSHIFT", + "DOUBLESTAR", + "PLUSEQUAL", + "MINEQUAL", + "STAREQUAL", + "SLASHEQUAL", + "PERCENTEQUAL", + "AMPEREQUAL", + "VBAREQUAL", + "CIRCUMFLEXEQUAL", + "LEFTSHIFTEQUAL", + "RIGHTSHIFTEQUAL", + "DOUBLESTAREQUAL", + "DOUBLESLASH", + "DOUBLESLASHEQUAL", + "AT", + "ATEQUAL", + "RARROW", + "ELLIPSIS", + "OP", + "AWAIT", + "ASYNC", + "", + "", + "", + "", + "", +}; + +/* Return the token corresponding to a single character */ + +int +PyToken_OneChar(int c1) +{ + switch (c1) { + case '%': return PERCENT; + case '&': return AMPER; + case '(': return LPAR; + case ')': return RPAR; + case '*': return STAR; + case '+': return PLUS; + case ',': return COMMA; + case '-': return MINUS; + case '.': return DOT; + case '/': return SLASH; + case ':': return COLON; + case ';': return SEMI; + case '<': return LESS; + case '=': return EQUAL; + case '>': return GREATER; + case '@': return AT; + case '[': return LSQB; + case ']': return RSQB; + case '^': return CIRCUMFLEX; + case '{': return LBRACE; + case '|': return VBAR; + case '}': return RBRACE; + case '~': return TILDE; + } + return OP; +} + +int +PyToken_TwoChars(int c1, int c2) +{ + switch (c1) { + case '!': + switch (c2) { + case '=': return NOTEQUAL; + } + break; + case '%': + switch (c2) { + case '=': return PERCENTEQUAL; + } + break; + case '&': + switch (c2) { + case '=': return AMPEREQUAL; + } + break; + case '*': + switch (c2) { + case '*': return DOUBLESTAR; + case '=': return STAREQUAL; + } + break; + case '+': + switch (c2) { + case '=': return PLUSEQUAL; + } + break; + case '-': + switch (c2) { + case '=': return MINEQUAL; + case '>': return RARROW; + } + break; + case '/': + switch (c2) { + case '/': return DOUBLESLASH; + case '=': return SLASHEQUAL; + } + break; + case '<': + switch (c2) { + case '<': return LEFTSHIFT; + case '=': return LESSEQUAL; + } + break; + case '=': + switch (c2) { + case '=': return EQEQUAL; + } + break; + case '>': + switch (c2) { + case '=': return GREATEREQUAL; + case '>': return RIGHTSHIFT; + } + break; + case '@': + switch (c2) { + case '=': return ATEQUAL; + } + break; + case '^': + switch (c2) { + case '=': return CIRCUMFLEXEQUAL; + } + break; + case '|': + switch (c2) { + case '=': return VBAREQUAL; + } + break; + } + return OP; +} + +int +PyToken_ThreeChars(int c1, int c2, int c3) +{ + switch (c1) { + case '*': + switch (c2) { + case '*': + switch (c3) { + case '=': return DOUBLESTAREQUAL; + } + break; + } + break; + case '.': + switch (c2) { + case '.': + switch (c3) { + case '.': return ELLIPSIS; + } + break; + } + break; + case '/': + switch (c2) { + case '/': + switch (c3) { + case '=': return DOUBLESLASHEQUAL; + } + break; + } + break; + case '<': + switch (c2) { + case '<': + switch (c3) { + case '=': return LEFTSHIFTEQUAL; + } + break; + } + break; + case '>': + switch (c2) { + case '>': + switch (c3) { + case '=': return RIGHTSHIFTEQUAL; + } + break; + } + break; + } + return OP; +} diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 5cc9533c3cd7ba..0d469b15ebb1c6 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -45,71 +45,6 @@ static int tok_nextc(struct tok_state *tok); static void tok_backup(struct tok_state *tok, int c); -/* Token names */ - -const char *_PyParser_TokenNames[] = { - "ENDMARKER", - "NAME", - "NUMBER", - "STRING", - "NEWLINE", - "INDENT", - "DEDENT", - "LPAR", - "RPAR", - "LSQB", - "RSQB", - "COLON", - "COMMA", - "SEMI", - "PLUS", - "MINUS", - "STAR", - "SLASH", - "VBAR", - "AMPER", - "LESS", - "GREATER", - "EQUAL", - "DOT", - "PERCENT", - "LBRACE", - "RBRACE", - "EQEQUAL", - "NOTEQUAL", - "LESSEQUAL", - "GREATEREQUAL", - "TILDE", - "CIRCUMFLEX", - "LEFTSHIFT", - "RIGHTSHIFT", - "DOUBLESTAR", - "PLUSEQUAL", - "MINEQUAL", - "STAREQUAL", - "SLASHEQUAL", - "PERCENTEQUAL", - "AMPEREQUAL", - "VBAREQUAL", - "CIRCUMFLEXEQUAL", - "LEFTSHIFTEQUAL", - "RIGHTSHIFTEQUAL", - "DOUBLESTAREQUAL", - "DOUBLESLASH", - "DOUBLESLASHEQUAL", - "AT", - "ATEQUAL", - "RARROW", - "ELLIPSIS", - /* This table must match the #defines in token.h! */ - "OP", - "AWAIT", - "ASYNC", - "", - "" -}; - - /* Create and initialize a new tok_state structure */ static struct tok_state * @@ -1112,177 +1047,6 @@ tok_backup(struct tok_state *tok, int c) } -/* Return the token corresponding to a single character */ - -int -PyToken_OneChar(int c) -{ - switch (c) { - case '(': return LPAR; - case ')': return RPAR; - case '[': return LSQB; - case ']': return RSQB; - case ':': return COLON; - case ',': return COMMA; - case ';': return SEMI; - case '+': return PLUS; - case '-': return MINUS; - case '*': return STAR; - case '/': return SLASH; - case '|': return VBAR; - case '&': return AMPER; - case '<': return LESS; - case '>': return GREATER; - case '=': return EQUAL; - case '.': return DOT; - case '%': return PERCENT; - case '{': return LBRACE; - case '}': return RBRACE; - case '^': return CIRCUMFLEX; - case '~': return TILDE; - case '@': return AT; - default: return OP; - } -} - - -int -PyToken_TwoChars(int c1, int c2) -{ - switch (c1) { - case '=': - switch (c2) { - case '=': return EQEQUAL; - } - break; - case '!': - switch (c2) { - case '=': return NOTEQUAL; - } - break; - case '<': - switch (c2) { - case '>': return NOTEQUAL; - case '=': return LESSEQUAL; - case '<': return LEFTSHIFT; - } - break; - case '>': - switch (c2) { - case '=': return GREATEREQUAL; - case '>': return RIGHTSHIFT; - } - break; - case '+': - switch (c2) { - case '=': return PLUSEQUAL; - } - break; - case '-': - switch (c2) { - case '=': return MINEQUAL; - case '>': return RARROW; - } - break; - case '*': - switch (c2) { - case '*': return DOUBLESTAR; - case '=': return STAREQUAL; - } - break; - case '/': - switch (c2) { - case '/': return DOUBLESLASH; - case '=': return SLASHEQUAL; - } - break; - case '|': - switch (c2) { - case '=': return VBAREQUAL; - } - break; - case '%': - switch (c2) { - case '=': return PERCENTEQUAL; - } - break; - case '&': - switch (c2) { - case '=': return AMPEREQUAL; - } - break; - case '^': - switch (c2) { - case '=': return CIRCUMFLEXEQUAL; - } - break; - case '@': - switch (c2) { - case '=': return ATEQUAL; - } - break; - } - return OP; -} - -int -PyToken_ThreeChars(int c1, int c2, int c3) -{ - switch (c1) { - case '<': - switch (c2) { - case '<': - switch (c3) { - case '=': - return LEFTSHIFTEQUAL; - } - break; - } - break; - case '>': - switch (c2) { - case '>': - switch (c3) { - case '=': - return RIGHTSHIFTEQUAL; - } - break; - } - break; - case '*': - switch (c2) { - case '*': - switch (c3) { - case '=': - return DOUBLESTAREQUAL; - } - break; - } - break; - case '/': - switch (c2) { - case '/': - switch (c3) { - case '=': - return DOUBLESLASHEQUAL; - } - break; - } - break; - case '.': - switch (c2) { - case '.': - switch (c3) { - case '.': - return ELLIPSIS; - } - break; - } - break; - } - return OP; -} - static int indenterror(struct tok_state *tok) { diff --git a/Tools/scripts/generate_token_c.py b/Tools/scripts/generate_token_c.py new file mode 100644 index 00000000000000..2ced04f7f6c662 --- /dev/null +++ b/Tools/scripts/generate_token_c.py @@ -0,0 +1,97 @@ +# This script generates the opcode.h header file. + +template = """\ +/* Auto-generated by Tools/scripts/generate_token_c.py */ + +#include "Python.h" +#include "token.h" + +/* Token names */ + +const char *_PyParser_TokenNames[] = { +%s\ +}; + +/* Return the token corresponding to a single character */ + +int +PyToken_OneChar(int c1) +{ +%s\ + return OP; +} + +int +PyToken_TwoChars(int c1, int c2) +{ +%s\ + return OP; +} + +int +PyToken_ThreeChars(int c1, int c2, int c3) +{ +%s\ + return OP; +} +""" + +def generate_chars_to_token(mapping, n=1): + result = [] + write = result.append + indent = ' ' * n + write(indent) + write('switch (c%d) {\n' % (n,)) + for c in sorted(mapping): + write(indent) + value = mapping[c] + if isinstance(value, dict): + write("case '%s':\n" % (c,)) + write(generate_chars_to_token(value, n + 1)) + write(indent) + write(' break;\n') + else: + write("case '%s': return %s;\n" % (c, value)) + write(indent) + write('}\n') + return ''.join(result) + +def main(token_py='Lib/token.py', outfile='Parser/token.c'): + token = {} + with open(token_py) as fp: + code = fp.read() + exec(code, token) + tok_name = token['tok_name'] + ERRORTOKEN = token['ERRORTOKEN'] + N_TOKENS = token['N_TOKENS'] + EXACT_TOKEN_TYPES = token['EXACT_TOKEN_TYPES'] + chars_to_token = {} + for string, value in EXACT_TOKEN_TYPES.items(): + assert 1 <= len(string) <= 3 + name = tok_name[value] + m = chars_to_token.setdefault(len(string), {}) + for c in string[:-1]: + m = m.setdefault(c, {}) + m[string[-1]] = name + + names = [] + for value in range(N_TOKENS + 1): + name = tok_name[value] + if value >= ERRORTOKEN: + name = '<%s>' % name + names.append(' "%s",\n' % name) + + with open(outfile, 'w') as fobj: + fobj.write(template % ( + ''.join(names), + generate_chars_to_token(chars_to_token[1]), + generate_chars_to_token(chars_to_token[2]), + generate_chars_to_token(chars_to_token[3]) + )) + + print("%s regenerated from %s" % (outfile, token_py)) + + +if __name__ == '__main__': + import sys + main(*sys.argv[1:]) diff --git a/Tools/scripts/generate_token_h.py b/Tools/scripts/generate_token_h.py new file mode 100644 index 00000000000000..828c29ab5dc273 --- /dev/null +++ b/Tools/scripts/generate_token_h.py @@ -0,0 +1,55 @@ +# This script generates the token.h header file. + +header = """/* Auto-generated by Tools/scripts/generate_token_h.py */ + +/* Token types */ +#ifndef Py_LIMITED_API +#ifndef Py_TOKEN_H +#define Py_TOKEN_H +#ifdef __cplusplus +extern "C" { +#endif + +#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ + +""" + +footer = """ +/* Special definitions for cooperation with parser */ + +#define ISTERMINAL(x) ((x) < NT_OFFSET) +#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) +#define ISEOF(x) ((x) == ENDMARKER) + + +PyAPI_DATA(const char *) _PyParser_TokenNames[]; /* Token names */ +PyAPI_FUNC(int) PyToken_OneChar(int); +PyAPI_FUNC(int) PyToken_TwoChars(int, int); +PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_TOKEN_H */ +#endif /* Py_LIMITED_API */ +""" + + +def main(token_py='Lib/token.py', outfile='Include/token.h'): + token = {} + with open(token_py) as fp: + code = fp.read() + exec(code, token) + tok_name = token['tok_name'] + with open(outfile, 'w') as fobj: + fobj.write(header) + for value in sorted(tok_name): + name = tok_name[value] + fobj.write("#define %-15s %d\n" % (name, value)) + fobj.write(footer) + print("%s regenerated from %s" % (outfile, token_py)) + + +if __name__ == '__main__': + import sys + main(*sys.argv[1:]) diff --git a/Tools/scripts/generate_token_rst.py b/Tools/scripts/generate_token_rst.py new file mode 100644 index 00000000000000..e5f07de0c194bf --- /dev/null +++ b/Tools/scripts/generate_token_rst.py @@ -0,0 +1,26 @@ +# This script generates the token-list.inc documentation file. + +header = """\ +.. Auto-generated by Tools/scripts/generate_token_rst.py +.. data:: +""" + + +def main(token_py='Lib/token.py', outfile='Doc/library/token-list.inc'): + token = {} + with open(token_py) as fp: + code = fp.read() + exec(code, token) + tok_name = token['tok_name'] + with open(outfile, 'w') as fobj: + fobj.write(header) + for value in sorted(tok_name): + name = tok_name[value] + fobj.write(" %s\n" % (name,)) + + print("%s regenerated from %s" % (outfile, token_py)) + + +if __name__ == '__main__': + import sys + main(*sys.argv[1:]) From 3f789556db26e212d7b4d4a7a66510a9ee44099f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 2 Jun 2017 01:01:34 +0300 Subject: [PATCH 02/15] Generate regexpes from EXACT_TOKEN_TYPES. --- Lib/token.py | 5 +++-- Lib/tokenize.py | 16 +++++----------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/Lib/token.py b/Lib/token.py index 3abcbd1fd05ea9..f8f86cff7fea3e 100644 --- a/Lib/token.py +++ b/Lib/token.py @@ -71,9 +71,10 @@ tok_name = {i: v[0] if isinstance(v, tuple) else v for i, v in enumerate(tokens)} -EXACT_TOKEN_TYPES = {v[1]: i +EXACT_TOKEN_TYPES = {x: i for i, v in enumerate(tokens) - if isinstance(v, tuple)} + if isinstance(v, tuple) + for x in v[1:]} del tokens # Special definitions for cooperation with parser diff --git a/Lib/tokenize.py b/Lib/tokenize.py index e93282ae4421a4..9c97aeebf11d86 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -116,17 +116,11 @@ def _compile(expr): String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"') -# Because of leftmost-then-longest match semantics, be sure to put the -# longest operators first (e.g., if = came before ==, == would get -# recognized as two instances of =). -Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=", - r"//=?", r"->", - r"[+\-*/%&@|^=<>]=?", - r"~") - -Bracket = '[][(){}]' -Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]') -Funny = group(Operator, Bracket, Special) +# Sorting in reverse order puts the long operators before their prefixes. +# Otherwise if = came before ==, == would get recognized as two instances +# of =. +Special = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True))) +Funny = group(r'\r?\n', Special) PlainToken = group(Number, Funny, String, Name) Token = Ignore + PlainToken From 3c10bf6def1d514a7dfee3030d6d7fcbe85295ce Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 3 Jul 2017 10:28:12 +0300 Subject: [PATCH 03/15] Fix generating the documentation. --- Doc/library/token-list.inc | 3 -- Doc/library/token.rst | 61 +---------------------------- Makefile.pre.in | 4 +- Tools/scripts/generate_token_rst.py | 2 + 4 files changed, 5 insertions(+), 65 deletions(-) diff --git a/Doc/library/token-list.inc b/Doc/library/token-list.inc index c13d2e921b9b45..50f4bdf5848811 100644 --- a/Doc/library/token-list.inc +++ b/Doc/library/token-list.inc @@ -57,8 +57,5 @@ AWAIT ASYNC ERRORTOKEN - COMMENT - NL - ENCODING N_TOKENS NT_OFFSET diff --git a/Doc/library/token.rst b/Doc/library/token.rst index b7ca9dbca722f8..d709d535961bea 100644 --- a/Doc/library/token.rst +++ b/Doc/library/token.rst @@ -44,66 +44,7 @@ functions. The functions mirror definitions in the Python C header files. The token constants are: -.. data:: ENDMARKER - NAME - NUMBER - STRING - NEWLINE - INDENT - DEDENT - LPAR - RPAR - LSQB - RSQB - COLON - COMMA - SEMI - PLUS - MINUS - STAR - SLASH - VBAR - AMPER - LESS - GREATER - EQUAL - DOT - PERCENT - LBRACE - RBRACE - EQEQUAL - NOTEQUAL - LESSEQUAL - GREATEREQUAL - TILDE - CIRCUMFLEX - LEFTSHIFT - RIGHTSHIFT - DOUBLESTAR - PLUSEQUAL - MINEQUAL - STAREQUAL - SLASHEQUAL - PERCENTEQUAL - AMPEREQUAL - VBAREQUAL - CIRCUMFLEXEQUAL - LEFTSHIFTEQUAL - RIGHTSHIFTEQUAL - DOUBLESTAREQUAL - DOUBLESLASH - DOUBLESLASHEQUAL - AT - ATEQUAL - RARROW - ELLIPSIS - OP - AWAIT - ASYNC - ERRORTOKEN - N_TOKENS - NT_OFFSET - +.. include:: token-list.inc The following token type values aren't used by the C tokenizer but are needed for the :mod:`tokenize` module. diff --git a/Makefile.pre.in b/Makefile.pre.in index 3363eedf40124c..5517117024391f 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -820,11 +820,11 @@ regen-token: $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token_c.py \ $(srcdir)/Lib/token.py \ $(srcdir)/Parser/token_names.h - # Regenerate Doc/library/token.rst from Lib/token.py + # Regenerate Doc/library/token-list.inc from Lib/token.py # using Tools/scripts/generate_token_rst.py $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token_rst.py \ $(srcdir)/Lib/token.py \ - $(srcdir)/Doc/library/token.rst + $(srcdir)/Doc/library/token-list.inc Python/compile.o Python/symtable.o Python/ast.o: $(srcdir)/Include/graminit.h $(srcdir)/Include/Python-ast.h diff --git a/Tools/scripts/generate_token_rst.py b/Tools/scripts/generate_token_rst.py index e5f07de0c194bf..0b3369ebec2ed2 100644 --- a/Tools/scripts/generate_token_rst.py +++ b/Tools/scripts/generate_token_rst.py @@ -15,6 +15,8 @@ def main(token_py='Lib/token.py', outfile='Doc/library/token-list.inc'): with open(outfile, 'w') as fobj: fobj.write(header) for value in sorted(tok_name): + if token['ERRORTOKEN'] < value < token['N_TOKENS']: + continue name = tok_name[value] fobj.write(" %s\n" % (name,)) From f116722191038617caad398c3a8d36a6c1076e69 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 3 Jul 2017 10:39:17 +0300 Subject: [PATCH 04/15] Add shebangs and executable bits. --- Tools/scripts/generate_token_c.py | 1 + Tools/scripts/generate_token_h.py | 1 + Tools/scripts/generate_token_rst.py | 1 + 3 files changed, 3 insertions(+) mode change 100644 => 100755 Tools/scripts/generate_token_c.py mode change 100644 => 100755 Tools/scripts/generate_token_h.py mode change 100644 => 100755 Tools/scripts/generate_token_rst.py diff --git a/Tools/scripts/generate_token_c.py b/Tools/scripts/generate_token_c.py old mode 100644 new mode 100755 index 2ced04f7f6c662..9e6b98a32983a2 --- a/Tools/scripts/generate_token_c.py +++ b/Tools/scripts/generate_token_c.py @@ -1,3 +1,4 @@ +#! /usr/bin/env python3 # This script generates the opcode.h header file. template = """\ diff --git a/Tools/scripts/generate_token_h.py b/Tools/scripts/generate_token_h.py old mode 100644 new mode 100755 index 828c29ab5dc273..9e12c64a94048e --- a/Tools/scripts/generate_token_h.py +++ b/Tools/scripts/generate_token_h.py @@ -1,3 +1,4 @@ +#! /usr/bin/env python3 # This script generates the token.h header file. header = """/* Auto-generated by Tools/scripts/generate_token_h.py */ diff --git a/Tools/scripts/generate_token_rst.py b/Tools/scripts/generate_token_rst.py old mode 100644 new mode 100755 index 0b3369ebec2ed2..5e602b8d30bd25 --- a/Tools/scripts/generate_token_rst.py +++ b/Tools/scripts/generate_token_rst.py @@ -1,3 +1,4 @@ +#! /usr/bin/env python3 # This script generates the token-list.inc documentation file. header = """\ From 812fc1fc09d1ffcdd1814dcb9385bcc627ad8895 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 3 Jul 2017 10:40:06 +0300 Subject: [PATCH 05/15] Add generated file Parser/token_names.h. --- Parser/token_names.h | 234 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 Parser/token_names.h diff --git a/Parser/token_names.h b/Parser/token_names.h new file mode 100644 index 00000000000000..dbbe2f37d30846 --- /dev/null +++ b/Parser/token_names.h @@ -0,0 +1,234 @@ +/* Auto-generated by Tools/scripts/generate_token_c.py */ + +#include "Python.h" +#include "token.h" + +/* Token names */ + +const char *_PyParser_TokenNames[] = { + "ENDMARKER", + "NAME", + "NUMBER", + "STRING", + "NEWLINE", + "INDENT", + "DEDENT", + "LPAR", + "RPAR", + "LSQB", + "RSQB", + "COLON", + "COMMA", + "SEMI", + "PLUS", + "MINUS", + "STAR", + "SLASH", + "VBAR", + "AMPER", + "LESS", + "GREATER", + "EQUAL", + "DOT", + "PERCENT", + "LBRACE", + "RBRACE", + "EQEQUAL", + "NOTEQUAL", + "LESSEQUAL", + "GREATEREQUAL", + "TILDE", + "CIRCUMFLEX", + "LEFTSHIFT", + "RIGHTSHIFT", + "DOUBLESTAR", + "PLUSEQUAL", + "MINEQUAL", + "STAREQUAL", + "SLASHEQUAL", + "PERCENTEQUAL", + "AMPEREQUAL", + "VBAREQUAL", + "CIRCUMFLEXEQUAL", + "LEFTSHIFTEQUAL", + "RIGHTSHIFTEQUAL", + "DOUBLESTAREQUAL", + "DOUBLESLASH", + "DOUBLESLASHEQUAL", + "AT", + "ATEQUAL", + "RARROW", + "ELLIPSIS", + "OP", + "AWAIT", + "ASYNC", + "", + "", + "", + "", + "", +}; + +/* Return the token corresponding to a single character */ + +int +PyToken_OneChar(int c1) +{ + switch (c1) { + case '%': return PERCENT; + case '&': return AMPER; + case '(': return LPAR; + case ')': return RPAR; + case '*': return STAR; + case '+': return PLUS; + case ',': return COMMA; + case '-': return MINUS; + case '.': return DOT; + case '/': return SLASH; + case ':': return COLON; + case ';': return SEMI; + case '<': return LESS; + case '=': return EQUAL; + case '>': return GREATER; + case '@': return AT; + case '[': return LSQB; + case ']': return RSQB; + case '^': return CIRCUMFLEX; + case '{': return LBRACE; + case '|': return VBAR; + case '}': return RBRACE; + case '~': return TILDE; + } + return OP; +} + +int +PyToken_TwoChars(int c1, int c2) +{ + switch (c1) { + case '!': + switch (c2) { + case '=': return NOTEQUAL; + } + break; + case '%': + switch (c2) { + case '=': return PERCENTEQUAL; + } + break; + case '&': + switch (c2) { + case '=': return AMPEREQUAL; + } + break; + case '*': + switch (c2) { + case '*': return DOUBLESTAR; + case '=': return STAREQUAL; + } + break; + case '+': + switch (c2) { + case '=': return PLUSEQUAL; + } + break; + case '-': + switch (c2) { + case '=': return MINEQUAL; + case '>': return RARROW; + } + break; + case '/': + switch (c2) { + case '/': return DOUBLESLASH; + case '=': return SLASHEQUAL; + } + break; + case '<': + switch (c2) { + case '<': return LEFTSHIFT; + case '=': return LESSEQUAL; + } + break; + case '=': + switch (c2) { + case '=': return EQEQUAL; + } + break; + case '>': + switch (c2) { + case '=': return GREATEREQUAL; + case '>': return RIGHTSHIFT; + } + break; + case '@': + switch (c2) { + case '=': return ATEQUAL; + } + break; + case '^': + switch (c2) { + case '=': return CIRCUMFLEXEQUAL; + } + break; + case '|': + switch (c2) { + case '=': return VBAREQUAL; + } + break; + } + return OP; +} + +int +PyToken_ThreeChars(int c1, int c2, int c3) +{ + switch (c1) { + case '*': + switch (c2) { + case '*': + switch (c3) { + case '=': return DOUBLESTAREQUAL; + } + break; + } + break; + case '.': + switch (c2) { + case '.': + switch (c3) { + case '.': return ELLIPSIS; + } + break; + } + break; + case '/': + switch (c2) { + case '/': + switch (c3) { + case '=': return DOUBLESLASHEQUAL; + } + break; + } + break; + case '<': + switch (c2) { + case '<': + switch (c3) { + case '=': return LEFTSHIFTEQUAL; + } + break; + } + break; + case '>': + switch (c2) { + case '>': + switch (c3) { + case '=': return RIGHTSHIFTEQUAL; + } + break; + } + break; + } + return OP; +} From 8258f498ba1fc00f94a62c9a7fd770fa26c89b6c Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 3 Jul 2017 11:36:57 +0300 Subject: [PATCH 06/15] Misc other fixes and enhancements. --- Include/token.h | 3 - Lib/token.py | 2 + Makefile.pre.in | 12 +- Misc/NEWS | 4 +- Parser/token_names.h | 234 ---------------------------- Tools/scripts/generate_token_c.py | 22 +-- Tools/scripts/generate_token_h.py | 16 +- Tools/scripts/generate_token_rst.py | 16 +- 8 files changed, 48 insertions(+), 261 deletions(-) delete mode 100644 Parser/token_names.h diff --git a/Include/token.h b/Include/token.h index f5f68b4226a5f6..5202e45ca158d3 100644 --- a/Include/token.h +++ b/Include/token.h @@ -67,9 +67,6 @@ extern "C" { #define AWAIT 54 #define ASYNC 55 #define ERRORTOKEN 56 -#define COMMENT 57 -#define NL 58 -#define ENCODING 59 #define N_TOKENS 60 #define NT_OFFSET 256 diff --git a/Lib/token.py b/Lib/token.py index f8f86cff7fea3e..bbcc16bce512ca 100644 --- a/Lib/token.py +++ b/Lib/token.py @@ -62,6 +62,8 @@ 'AWAIT', 'ASYNC', 'ERRORTOKEN', + + # These aren't used by the C tokenizer but are needed for tokenize.py 'COMMENT', 'NL', 'ENCODING', diff --git a/Makefile.pre.in b/Makefile.pre.in index 5517117024391f..38440f3a7ad5e4 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -705,7 +705,7 @@ regen-importlib: Programs/_freeze_importlib ############################################################################ # Regenerate all generated files -regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar regen-token regen-ast regen-importlib +regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar regen-token regen-symbol regen-ast regen-importlib ############################################################################ # Special rules for object files @@ -819,13 +819,21 @@ regen-token: # using Tools/scripts/generate_token_c.py $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token_c.py \ $(srcdir)/Lib/token.py \ - $(srcdir)/Parser/token_names.h + $(srcdir)/Parser/token.c # Regenerate Doc/library/token-list.inc from Lib/token.py # using Tools/scripts/generate_token_rst.py $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token_rst.py \ $(srcdir)/Lib/token.py \ $(srcdir)/Doc/library/token-list.inc +.PHONY: regen-symbol +regen-symbol: $(srcdir)/Include/graminit.h + # Regenerate Lib/symbol.py from Include/graminit.h + # using Lib/symbol.py + $(PYTHON_FOR_REGEN) $(srcdir)/Lib/symbol.py \ + $(srcdir)/Include/graminit.h \ + $(srcdir)/Lib/symbol.py + Python/compile.o Python/symtable.o Python/ast.o: $(srcdir)/Include/graminit.h $(srcdir)/Include/Python-ast.h Python/getplatform.o: $(srcdir)/Python/getplatform.c diff --git a/Misc/NEWS b/Misc/NEWS index 7b9946c906a308..2da802b183d404 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -1333,8 +1333,8 @@ Documentation Build ----- -- bpo-30455: C code and documentation related to tokens now are generated - from token.py. +- bpo-30455: The C code and the documentation related to tokens now are + generated from token.py. - bpo-30687: Locate msbuild.exe on Windows when building rather than vcvarsall.bat diff --git a/Parser/token_names.h b/Parser/token_names.h deleted file mode 100644 index dbbe2f37d30846..00000000000000 --- a/Parser/token_names.h +++ /dev/null @@ -1,234 +0,0 @@ -/* Auto-generated by Tools/scripts/generate_token_c.py */ - -#include "Python.h" -#include "token.h" - -/* Token names */ - -const char *_PyParser_TokenNames[] = { - "ENDMARKER", - "NAME", - "NUMBER", - "STRING", - "NEWLINE", - "INDENT", - "DEDENT", - "LPAR", - "RPAR", - "LSQB", - "RSQB", - "COLON", - "COMMA", - "SEMI", - "PLUS", - "MINUS", - "STAR", - "SLASH", - "VBAR", - "AMPER", - "LESS", - "GREATER", - "EQUAL", - "DOT", - "PERCENT", - "LBRACE", - "RBRACE", - "EQEQUAL", - "NOTEQUAL", - "LESSEQUAL", - "GREATEREQUAL", - "TILDE", - "CIRCUMFLEX", - "LEFTSHIFT", - "RIGHTSHIFT", - "DOUBLESTAR", - "PLUSEQUAL", - "MINEQUAL", - "STAREQUAL", - "SLASHEQUAL", - "PERCENTEQUAL", - "AMPEREQUAL", - "VBAREQUAL", - "CIRCUMFLEXEQUAL", - "LEFTSHIFTEQUAL", - "RIGHTSHIFTEQUAL", - "DOUBLESTAREQUAL", - "DOUBLESLASH", - "DOUBLESLASHEQUAL", - "AT", - "ATEQUAL", - "RARROW", - "ELLIPSIS", - "OP", - "AWAIT", - "ASYNC", - "", - "", - "", - "", - "", -}; - -/* Return the token corresponding to a single character */ - -int -PyToken_OneChar(int c1) -{ - switch (c1) { - case '%': return PERCENT; - case '&': return AMPER; - case '(': return LPAR; - case ')': return RPAR; - case '*': return STAR; - case '+': return PLUS; - case ',': return COMMA; - case '-': return MINUS; - case '.': return DOT; - case '/': return SLASH; - case ':': return COLON; - case ';': return SEMI; - case '<': return LESS; - case '=': return EQUAL; - case '>': return GREATER; - case '@': return AT; - case '[': return LSQB; - case ']': return RSQB; - case '^': return CIRCUMFLEX; - case '{': return LBRACE; - case '|': return VBAR; - case '}': return RBRACE; - case '~': return TILDE; - } - return OP; -} - -int -PyToken_TwoChars(int c1, int c2) -{ - switch (c1) { - case '!': - switch (c2) { - case '=': return NOTEQUAL; - } - break; - case '%': - switch (c2) { - case '=': return PERCENTEQUAL; - } - break; - case '&': - switch (c2) { - case '=': return AMPEREQUAL; - } - break; - case '*': - switch (c2) { - case '*': return DOUBLESTAR; - case '=': return STAREQUAL; - } - break; - case '+': - switch (c2) { - case '=': return PLUSEQUAL; - } - break; - case '-': - switch (c2) { - case '=': return MINEQUAL; - case '>': return RARROW; - } - break; - case '/': - switch (c2) { - case '/': return DOUBLESLASH; - case '=': return SLASHEQUAL; - } - break; - case '<': - switch (c2) { - case '<': return LEFTSHIFT; - case '=': return LESSEQUAL; - } - break; - case '=': - switch (c2) { - case '=': return EQEQUAL; - } - break; - case '>': - switch (c2) { - case '=': return GREATEREQUAL; - case '>': return RIGHTSHIFT; - } - break; - case '@': - switch (c2) { - case '=': return ATEQUAL; - } - break; - case '^': - switch (c2) { - case '=': return CIRCUMFLEXEQUAL; - } - break; - case '|': - switch (c2) { - case '=': return VBAREQUAL; - } - break; - } - return OP; -} - -int -PyToken_ThreeChars(int c1, int c2, int c3) -{ - switch (c1) { - case '*': - switch (c2) { - case '*': - switch (c3) { - case '=': return DOUBLESTAREQUAL; - } - break; - } - break; - case '.': - switch (c2) { - case '.': - switch (c3) { - case '.': return ELLIPSIS; - } - break; - } - break; - case '/': - switch (c2) { - case '/': - switch (c3) { - case '=': return DOUBLESLASHEQUAL; - } - break; - } - break; - case '<': - switch (c2) { - case '<': - switch (c3) { - case '=': return LEFTSHIFTEQUAL; - } - break; - } - break; - case '>': - switch (c2) { - case '>': - switch (c3) { - case '=': return RIGHTSHIFTEQUAL; - } - break; - } - break; - } - return OP; -} diff --git a/Tools/scripts/generate_token_c.py b/Tools/scripts/generate_token_c.py index 9e6b98a32983a2..51cc4a1b651b43 100755 --- a/Tools/scripts/generate_token_c.py +++ b/Tools/scripts/generate_token_c.py @@ -1,5 +1,5 @@ #! /usr/bin/env python3 -# This script generates the opcode.h header file. +# This script generates the token.c source file. template = """\ /* Auto-generated by Tools/scripts/generate_token_c.py */ @@ -37,6 +37,13 @@ } """ +def load_module(path): + module = type('Namespace', (), {})() + with open(path, 'rb') as fp: + code = fp.read() + exec(code, module.__dict__) + return module + def generate_chars_to_token(mapping, n=1): result = [] write = result.append @@ -58,14 +65,11 @@ def generate_chars_to_token(mapping, n=1): return ''.join(result) def main(token_py='Lib/token.py', outfile='Parser/token.c'): - token = {} - with open(token_py) as fp: - code = fp.read() - exec(code, token) - tok_name = token['tok_name'] - ERRORTOKEN = token['ERRORTOKEN'] - N_TOKENS = token['N_TOKENS'] - EXACT_TOKEN_TYPES = token['EXACT_TOKEN_TYPES'] + token = load_module(token_py) + tok_name = token.tok_name + ERRORTOKEN = token.ERRORTOKEN + N_TOKENS = token.N_TOKENS + EXACT_TOKEN_TYPES = token.EXACT_TOKEN_TYPES chars_to_token = {} for string, value in EXACT_TOKEN_TYPES.items(): assert 1 <= len(string) <= 3 diff --git a/Tools/scripts/generate_token_h.py b/Tools/scripts/generate_token_h.py index 9e12c64a94048e..61c06ea24d567a 100755 --- a/Tools/scripts/generate_token_h.py +++ b/Tools/scripts/generate_token_h.py @@ -36,15 +36,21 @@ """ -def main(token_py='Lib/token.py', outfile='Include/token.h'): - token = {} - with open(token_py) as fp: +def load_module(path): + module = type('Namespace', (), {})() + with open(path, 'rb') as fp: code = fp.read() - exec(code, token) - tok_name = token['tok_name'] + exec(code, module.__dict__) + return module + +def main(token_py='Lib/token.py', outfile='Include/token.h'): + token = load_module(token_py) + tok_name = token.tok_name with open(outfile, 'w') as fobj: fobj.write(header) for value in sorted(tok_name): + if token.ERRORTOKEN < value < token.N_TOKENS: + continue name = tok_name[value] fobj.write("#define %-15s %d\n" % (name, value)) fobj.write(footer) diff --git a/Tools/scripts/generate_token_rst.py b/Tools/scripts/generate_token_rst.py index 5e602b8d30bd25..57a1e5291fab11 100755 --- a/Tools/scripts/generate_token_rst.py +++ b/Tools/scripts/generate_token_rst.py @@ -7,16 +7,20 @@ """ -def main(token_py='Lib/token.py', outfile='Doc/library/token-list.inc'): - token = {} - with open(token_py) as fp: +def load_module(path): + module = type('Namespace', (), {})() + with open(path, 'rb') as fp: code = fp.read() - exec(code, token) - tok_name = token['tok_name'] + exec(code, module.__dict__) + return module + +def main(token_py='Lib/token.py', outfile='Doc/library/token-list.inc'): + token = load_module(token_py) + tok_name = token.tok_name with open(outfile, 'w') as fobj: fobj.write(header) for value in sorted(tok_name): - if token['ERRORTOKEN'] < value < token['N_TOKENS']: + if token.ERRORTOKEN < value < token.N_TOKENS: continue name = tok_name[value] fobj.write(" %s\n" % (name,)) From e26b9c8c77fc0691a97b4895c0bea2c93ab4a47f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 14 Apr 2018 11:29:25 +0300 Subject: [PATCH 07/15] Move symbol.py generating code into a separate file. --- Lib/symbol.py | 60 +---------------------------- Makefile.pre.in | 4 +- Tools/scripts/generate_symbol_py.py | 52 +++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 61 deletions(-) mode change 100755 => 100644 Lib/symbol.py create mode 100755 Tools/scripts/generate_symbol_py.py diff --git a/Lib/symbol.py b/Lib/symbol.py old mode 100755 new mode 100644 index 54b704b91ab9f9..664a2a4932da5c --- a/Lib/symbol.py +++ b/Lib/symbol.py @@ -1,13 +1,6 @@ -#! /usr/bin/env python3 - """Non-terminal symbols of Python grammar (from "graminit.h").""" # This file is automatically generated; please don't muck it up! -# -# To update the symbols in this file, 'cd' to the top directory of -# the python source tree after building the interpreter and run: -# -# ./python Lib/symbol.py #--start constants-- single_input = 256 @@ -103,55 +96,4 @@ for _name, _value in list(globals().items()): if type(_value) is type(0): sym_name[_value] = _name - - -def _main(inFileName="Include/graminit.h", outFileName="Lib/symbol.py"): - import re - import sys - try: - fp = open(inFileName) - except OSError as err: - sys.stdout.write("I/O error: %s\n" % str(err)) - sys.exit(1) - with fp: - lines = fp.read().split("\n") - prog = re.compile( - "#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)", - re.IGNORECASE) - tokens = {} - for line in lines: - match = prog.match(line) - if match: - name, val = match.group(1, 2) - val = int(val) - tokens[val] = name # reverse so we can sort them... - keys = sorted(tokens.keys()) - # load the output skeleton from the target: - try: - fp = open(outFileName) - except OSError as err: - sys.stderr.write("I/O error: %s\n" % str(err)) - sys.exit(2) - with fp: - format = fp.read().split("\n") - try: - start = format.index("#--start constants--") + 1 - end = format.index("#--end constants--") - except ValueError: - sys.stderr.write("target does not contain format markers") - sys.exit(3) - lines = [] - for val in keys: - lines.append("%s = %d" % (tokens[val], val)) - format[start:end] = lines - try: - fp = open(outFileName, 'w') - except OSError as err: - sys.stderr.write("I/O error: %s\n" % str(err)) - sys.exit(4) - with fp: - fp.write("\n".join(format)) - -if __name__ == "__main__": - import sys - _main(*sys.argv[1:]) +del _name, _value diff --git a/Makefile.pre.in b/Makefile.pre.in index 584503543e205a..b98dc6aaad2e48 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -862,8 +862,8 @@ regen-token: .PHONY: regen-symbol regen-symbol: $(srcdir)/Include/graminit.h # Regenerate Lib/symbol.py from Include/graminit.h - # using Lib/symbol.py - $(PYTHON_FOR_REGEN) $(srcdir)/Lib/symbol.py \ + # using Tools/scripts/generate_symbol_py.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_symbol_py.py \ $(srcdir)/Include/graminit.h \ $(srcdir)/Lib/symbol.py diff --git a/Tools/scripts/generate_symbol_py.py b/Tools/scripts/generate_symbol_py.py new file mode 100755 index 00000000000000..c64757b1597c85 --- /dev/null +++ b/Tools/scripts/generate_symbol_py.py @@ -0,0 +1,52 @@ +#! /usr/bin/env python3 +# This script generates the symbol.py source file. + +import sys +import re + +def main(inFileName="Include/graminit.h", outFileName="Lib/symbol.py"): + try: + fp = open(inFileName) + except OSError as err: + sys.stdout.write("I/O error: %s\n" % str(err)) + sys.exit(1) + with fp: + lines = fp.read().split("\n") + prog = re.compile( + "#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)", + re.IGNORECASE) + tokens = {} + for line in lines: + match = prog.match(line) + if match: + name, val = match.group(1, 2) + val = int(val) + tokens[val] = name # reverse so we can sort them... + keys = sorted(tokens.keys()) + # load the output skeleton from the target: + try: + fp = open(outFileName) + except OSError as err: + sys.stderr.write("I/O error: %s\n" % str(err)) + sys.exit(2) + with fp: + format = fp.read().split("\n") + try: + start = format.index("#--start constants--") + 1 + end = format.index("#--end constants--") + except ValueError: + sys.stderr.write("target does not contain format markers") + sys.exit(3) + lines = [] + for val in keys: + lines.append("%s = %d" % (tokens[val], val)) + format[start:end] = lines + try: + fp = open(outFileName, 'w') + except OSError as err: + sys.stderr.write("I/O error: %s\n" % str(err)) + sys.exit(4) + with fp: + fp.write("\n".join(format)) + +main(*sys.argv[1:]) From 33e37245ead494db51181065b213a82e550d07ce Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 14 Apr 2018 11:58:36 +0300 Subject: [PATCH 08/15] Fix dependencies for pgen. --- Makefile.pre.in | 3 ++- Python/graminit.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index b98dc6aaad2e48..bad2524f9756de 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -295,9 +295,10 @@ POBJS= \ Parser/metagrammar.o \ Parser/firstsets.o \ Parser/grammar.o \ + Parser/token.o \ Parser/pgen.o -PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/token.o Parser/tokenizer.o +PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o PGOBJS= \ Objects/obmalloc.o \ diff --git a/Python/graminit.c b/Python/graminit.c index 8e89ccea3bab6a..095204bd440187 100644 --- a/Python/graminit.c +++ b/Python/graminit.c @@ -2219,7 +2219,7 @@ static label labels[177] = { {27, 0}, {30, 0}, {29, 0}, - {28, 0}, + {3, "'<>'"}, {28, 0}, {1, "is"}, {316, 0}, From c9966b2311fbf93e99f69c201d751df0d4707761 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 14 Apr 2018 11:59:53 +0300 Subject: [PATCH 09/15] Add a hack for '<>'. --- Parser/token.c | 1 + Python/graminit.c | 2 +- Tools/scripts/generate_token_c.py | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Parser/token.c b/Parser/token.c index dbbe2f37d30846..c0694c987ff157 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -148,6 +148,7 @@ PyToken_TwoChars(int c1, int c2) switch (c2) { case '<': return LEFTSHIFT; case '=': return LESSEQUAL; + case '>': return NOTEQUAL; } break; case '=': diff --git a/Python/graminit.c b/Python/graminit.c index 095204bd440187..8e89ccea3bab6a 100644 --- a/Python/graminit.c +++ b/Python/graminit.c @@ -2219,7 +2219,7 @@ static label labels[177] = { {27, 0}, {30, 0}, {29, 0}, - {3, "'<>'"}, + {28, 0}, {28, 0}, {1, "is"}, {316, 0}, diff --git a/Tools/scripts/generate_token_c.py b/Tools/scripts/generate_token_c.py index 51cc4a1b651b43..9aa332eba8c253 100755 --- a/Tools/scripts/generate_token_c.py +++ b/Tools/scripts/generate_token_c.py @@ -69,7 +69,8 @@ def main(token_py='Lib/token.py', outfile='Parser/token.c'): tok_name = token.tok_name ERRORTOKEN = token.ERRORTOKEN N_TOKENS = token.N_TOKENS - EXACT_TOKEN_TYPES = token.EXACT_TOKEN_TYPES + EXACT_TOKEN_TYPES = dict(token.EXACT_TOKEN_TYPES) + EXACT_TOKEN_TYPES['<>'] = EXACT_TOKEN_TYPES['!='] chars_to_token = {} for string, value in EXACT_TOKEN_TYPES.items(): assert 1 <= len(string) <= 3 From 72bd747c1aee735822b17eb07515b76edfa5ff29 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 14 Apr 2018 14:45:16 +0300 Subject: [PATCH 10/15] Make _PyParser_TokenNames a const array. --- Include/token.h | 2 +- Parser/token.c | 2 +- Tools/scripts/generate_token_c.py | 2 +- Tools/scripts/generate_token_h.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Include/token.h b/Include/token.h index 5202e45ca158d3..b24d4290982fbf 100644 --- a/Include/token.h +++ b/Include/token.h @@ -77,7 +77,7 @@ extern "C" { #define ISEOF(x) ((x) == ENDMARKER) -PyAPI_DATA(const char *) _PyParser_TokenNames[]; /* Token names */ +PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ PyAPI_FUNC(int) PyToken_OneChar(int); PyAPI_FUNC(int) PyToken_TwoChars(int, int); PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); diff --git a/Parser/token.c b/Parser/token.c index c0694c987ff157..18426f600579f2 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -5,7 +5,7 @@ /* Token names */ -const char *_PyParser_TokenNames[] = { +const char * const _PyParser_TokenNames[] = { "ENDMARKER", "NAME", "NUMBER", diff --git a/Tools/scripts/generate_token_c.py b/Tools/scripts/generate_token_c.py index 9aa332eba8c253..8bd4343773b837 100755 --- a/Tools/scripts/generate_token_c.py +++ b/Tools/scripts/generate_token_c.py @@ -9,7 +9,7 @@ /* Token names */ -const char *_PyParser_TokenNames[] = { +const char * const _PyParser_TokenNames[] = { %s\ }; diff --git a/Tools/scripts/generate_token_h.py b/Tools/scripts/generate_token_h.py index 61c06ea24d567a..c1fb19c03e2910 100755 --- a/Tools/scripts/generate_token_h.py +++ b/Tools/scripts/generate_token_h.py @@ -23,7 +23,7 @@ #define ISEOF(x) ((x) == ENDMARKER) -PyAPI_DATA(const char *) _PyParser_TokenNames[]; /* Token names */ +PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ PyAPI_FUNC(int) PyToken_OneChar(int); PyAPI_FUNC(int) PyToken_TwoChars(int, int); PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); From 85f37dbb4d4ca96fc5c4d312a1ed391f133a71e2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 14 Apr 2018 15:10:51 +0300 Subject: [PATCH 11/15] Fix tests. --- Lib/test/test_symbol.py | 5 ++++- Tools/scripts/generate_symbol_py.py | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_symbol.py b/Lib/test/test_symbol.py index c1306f54327f4e..ed86aec36b873c 100644 --- a/Lib/test/test_symbol.py +++ b/Lib/test/test_symbol.py @@ -6,6 +6,9 @@ SYMBOL_FILE = support.findfile('symbol.py') +GEN_SYMBOL_FILE = os.path.join(os.path.dirname(__file__), + '..', '..', 'Tools', 'scripts', + 'generate_symbol_py.py') GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), '..', '..', 'Include', 'graminit.h') TEST_PY_FILE = 'symbol_test.py' @@ -22,7 +25,7 @@ def _copy_file_without_generated_symbols(self, source_file, dest_file): def _generate_symbols(self, grammar_file, target_symbol_py_file): proc = subprocess.Popen([sys.executable, - SYMBOL_FILE, + GEN_SYMBOL_FILE, grammar_file, target_symbol_py_file], stderr=subprocess.PIPE) stderr = proc.communicate()[1] diff --git a/Tools/scripts/generate_symbol_py.py b/Tools/scripts/generate_symbol_py.py index c64757b1597c85..9219b096e4d67d 100755 --- a/Tools/scripts/generate_symbol_py.py +++ b/Tools/scripts/generate_symbol_py.py @@ -8,7 +8,7 @@ def main(inFileName="Include/graminit.h", outFileName="Lib/symbol.py"): try: fp = open(inFileName) except OSError as err: - sys.stdout.write("I/O error: %s\n" % str(err)) + sys.stderr.write("I/O error: %s\n" % str(err)) sys.exit(1) with fp: lines = fp.read().split("\n") @@ -49,4 +49,5 @@ def main(inFileName="Include/graminit.h", outFileName="Lib/symbol.py"): with fp: fp.write("\n".join(format)) -main(*sys.argv[1:]) +if __name__ == '__main__': + main(*sys.argv[1:]) From 7a38bf0abd72cd9f1c6515c7490a74ac30b60987 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 7 Oct 2018 09:08:19 +0300 Subject: [PATCH 12/15] Remove ASYNC and AWAIT. --- Doc/library/token-list.inc | 2 -- Include/token.h | 6 ++---- Lib/token.py | 2 -- Parser/token.c | 2 -- 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/Doc/library/token-list.inc b/Doc/library/token-list.inc index 50f4bdf5848811..624b5170d90130 100644 --- a/Doc/library/token-list.inc +++ b/Doc/library/token-list.inc @@ -54,8 +54,6 @@ RARROW ELLIPSIS OP - AWAIT - ASYNC ERRORTOKEN N_TOKENS NT_OFFSET diff --git a/Include/token.h b/Include/token.h index b24d4290982fbf..4ea367de597b19 100644 --- a/Include/token.h +++ b/Include/token.h @@ -64,10 +64,8 @@ extern "C" { #define RARROW 51 #define ELLIPSIS 52 #define OP 53 -#define AWAIT 54 -#define ASYNC 55 -#define ERRORTOKEN 56 -#define N_TOKENS 60 +#define ERRORTOKEN 54 +#define N_TOKENS 58 #define NT_OFFSET 256 /* Special definitions for cooperation with parser */ diff --git a/Lib/token.py b/Lib/token.py index bbcc16bce512ca..63ebd36df7c28a 100644 --- a/Lib/token.py +++ b/Lib/token.py @@ -59,8 +59,6 @@ ('ELLIPSIS', '...'), 'OP', - 'AWAIT', - 'ASYNC', 'ERRORTOKEN', # These aren't used by the C tokenizer but are needed for tokenize.py diff --git a/Parser/token.c b/Parser/token.c index 18426f600579f2..5bf189c36ebf04 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -60,8 +60,6 @@ const char * const _PyParser_TokenNames[] = { "RARROW", "ELLIPSIS", "OP", - "AWAIT", - "ASYNC", "", "", "", From d5f6f0ba15e3214b7b7d9c0dc0cd58ac1aa5776e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 18 Nov 2018 18:47:00 +0200 Subject: [PATCH 13/15] Use a single generating script. --- Doc/library/token-list.inc | 2 +- Include/token.h | 2 +- Makefile.pre.in | 18 +-- Parser/token.c | 2 +- Tools/scripts/generate_token.py | 200 ++++++++++++++++++++++++++++ Tools/scripts/generate_token_c.py | 103 -------------- Tools/scripts/generate_token_h.py | 62 --------- Tools/scripts/generate_token_rst.py | 33 ----- 8 files changed, 212 insertions(+), 210 deletions(-) create mode 100644 Tools/scripts/generate_token.py delete mode 100755 Tools/scripts/generate_token_c.py delete mode 100755 Tools/scripts/generate_token_h.py delete mode 100755 Tools/scripts/generate_token_rst.py diff --git a/Doc/library/token-list.inc b/Doc/library/token-list.inc index 624b5170d90130..62b3db3d2a4cdc 100644 --- a/Doc/library/token-list.inc +++ b/Doc/library/token-list.inc @@ -1,4 +1,4 @@ -.. Auto-generated by Tools/scripts/generate_token_rst.py +.. Auto-generated by Tools/scripts/generate_token.py .. data:: ENDMARKER NAME diff --git a/Include/token.h b/Include/token.h index 4ea367de597b19..2d491e6927d1a9 100644 --- a/Include/token.h +++ b/Include/token.h @@ -1,4 +1,4 @@ -/* Auto-generated by Tools/scripts/generate_token_h.py */ +/* Auto-generated by Tools/scripts/generate_token.py */ /* Token types */ #ifndef Py_LIMITED_API diff --git a/Makefile.pre.in b/Makefile.pre.in index d7a3f904fc224f..834e4122feef09 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -845,21 +845,21 @@ regen-opcode: .PHONY: regen-token regen-token: + # Regenerate Doc/library/token-list.inc from Lib/token.py + # using Tools/scripts/generate_token.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py rst \ + $(srcdir)/Lib/token.py \ + $(srcdir)/Doc/library/token-list.inc # Regenerate Include/token.h from Lib/token.py - # using Tools/scripts/generate_token_h.py - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token_h.py \ + # using Tools/scripts/generate_token.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py h \ $(srcdir)/Lib/token.py \ $(srcdir)/Include/token.h # Regenerate Parser/token.c from Lib/token.py - # using Tools/scripts/generate_token_c.py - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token_c.py \ + # using Tools/scripts/generate_token.py + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py c \ $(srcdir)/Lib/token.py \ $(srcdir)/Parser/token.c - # Regenerate Doc/library/token-list.inc from Lib/token.py - # using Tools/scripts/generate_token_rst.py - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token_rst.py \ - $(srcdir)/Lib/token.py \ - $(srcdir)/Doc/library/token-list.inc .PHONY: regen-symbol regen-symbol: $(srcdir)/Include/graminit.h diff --git a/Parser/token.c b/Parser/token.c index 5bf189c36ebf04..35519aa4b61161 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -1,4 +1,4 @@ -/* Auto-generated by Tools/scripts/generate_token_c.py */ +/* Auto-generated by Tools/scripts/generate_token.py */ #include "Python.h" #include "token.h" diff --git a/Tools/scripts/generate_token.py b/Tools/scripts/generate_token.py new file mode 100644 index 00000000000000..ea905d2d74af1d --- /dev/null +++ b/Tools/scripts/generate_token.py @@ -0,0 +1,200 @@ +#! /usr/bin/env python3 +# This script generates token related files from Grammar/Tokens: +# +# Doc/library/token-list.inc +# Include/token.h +# Parser/token.c +# Lib/token.py + + +def load_module(path): + module = type('Namespace', (), {})() + with open(path, 'rb') as fp: + code = fp.read() + exec(code, module.__dict__) + return module + +def load_tokens(path): + global NT_OFFSET + token = load_module(path) + tok_names = [token.tok_name[i] for i in range(token.N_TOKENS)] + NT_OFFSET = token.NT_OFFSET + ERRORTOKEN = token.ERRORTOKEN + string_to_tok = dict(token.EXACT_TOKEN_TYPES) + return tok_names, ERRORTOKEN, string_to_tok + + +def update_file(file, content): + try: + with open(file, 'r') as fobj: + if fobj.read() == content: + return False + except FileNotFound: + return False + with open(file, 'w') as fobj: + fobj.write(content) + return True + + +token_h_template = """\ +/* Auto-generated by Tools/scripts/generate_token.py */ + +/* Token types */ +#ifndef Py_LIMITED_API +#ifndef Py_TOKEN_H +#define Py_TOKEN_H +#ifdef __cplusplus +extern "C" { +#endif + +#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ + +%s\ +#define N_TOKENS %d +#define NT_OFFSET %d + +/* Special definitions for cooperation with parser */ + +#define ISTERMINAL(x) ((x) < NT_OFFSET) +#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) +#define ISEOF(x) ((x) == ENDMARKER) + + +PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ +PyAPI_FUNC(int) PyToken_OneChar(int); +PyAPI_FUNC(int) PyToken_TwoChars(int, int); +PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_TOKEN_H */ +#endif /* Py_LIMITED_API */ +""" + +def make_h(infile, outfile='Include/token.h'): + tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) + + defines = [] + for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): + defines.append("#define %-15s %d\n" % (name, value)) + + if update_file(outfile, token_h_template % ( + ''.join(defines), + len(tok_names), + NT_OFFSET + )): + print("%s regenerated from %s" % (outfile, infile), file=sys.stderr) + + +token_c_template = """\ +/* Auto-generated by Tools/scripts/generate_token.py */ + +#include "Python.h" +#include "token.h" + +/* Token names */ + +const char * const _PyParser_TokenNames[] = { +%s\ +}; + +/* Return the token corresponding to a single character */ + +int +PyToken_OneChar(int c1) +{ +%s\ + return OP; +} + +int +PyToken_TwoChars(int c1, int c2) +{ +%s\ + return OP; +} + +int +PyToken_ThreeChars(int c1, int c2, int c3) +{ +%s\ + return OP; +} +""" + +def generate_chars_to_token(mapping, n=1): + result = [] + write = result.append + indent = ' ' * n + write(indent) + write('switch (c%d) {\n' % (n,)) + for c in sorted(mapping): + write(indent) + value = mapping[c] + if isinstance(value, dict): + write("case '%s':\n" % (c,)) + write(generate_chars_to_token(value, n + 1)) + write(indent) + write(' break;\n') + else: + write("case '%s': return %s;\n" % (c, value)) + write(indent) + write('}\n') + return ''.join(result) + +def make_c(infile, outfile='Parser/token.c'): + tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) + string_to_tok['<>'] = string_to_tok['!='] + chars_to_token = {} + for string, value in string_to_tok.items(): + assert 1 <= len(string) <= 3 + name = tok_names[value] + m = chars_to_token.setdefault(len(string), {}) + for c in string[:-1]: + m = m.setdefault(c, {}) + m[string[-1]] = name + + names = [] + for value, name in enumerate(tok_names): + if value >= ERRORTOKEN: + name = '<%s>' % name + names.append(' "%s",\n' % name) + names.append(' "",\n') + + if update_file(outfile, token_c_template % ( + ''.join(names), + generate_chars_to_token(chars_to_token[1]), + generate_chars_to_token(chars_to_token[2]), + generate_chars_to_token(chars_to_token[3]) + )): + print("%s regenerated from %s" % (outfile, infile)) + + +token_inc_template = """\ +.. Auto-generated by Tools/scripts/generate_token.py +.. data:: +%s\ + N_TOKENS + NT_OFFSET +""" + +def make_rst(infile, outfile='Doc/library/token-list.inc'): + tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) + + names = [] + for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): + names.append(" %s\n" % (name,)) + + if update_file(outfile, token_inc_template % ''.join(names)): + print("%s regenerated from %s" % (outfile, infile)) + + +def main(op, infile='Lib/token.py', *args): + make = globals()['make_' + op] + make(infile, *args) + + +if __name__ == '__main__': + import sys + main(*sys.argv[1:]) diff --git a/Tools/scripts/generate_token_c.py b/Tools/scripts/generate_token_c.py deleted file mode 100755 index 8bd4343773b837..00000000000000 --- a/Tools/scripts/generate_token_c.py +++ /dev/null @@ -1,103 +0,0 @@ -#! /usr/bin/env python3 -# This script generates the token.c source file. - -template = """\ -/* Auto-generated by Tools/scripts/generate_token_c.py */ - -#include "Python.h" -#include "token.h" - -/* Token names */ - -const char * const _PyParser_TokenNames[] = { -%s\ -}; - -/* Return the token corresponding to a single character */ - -int -PyToken_OneChar(int c1) -{ -%s\ - return OP; -} - -int -PyToken_TwoChars(int c1, int c2) -{ -%s\ - return OP; -} - -int -PyToken_ThreeChars(int c1, int c2, int c3) -{ -%s\ - return OP; -} -""" - -def load_module(path): - module = type('Namespace', (), {})() - with open(path, 'rb') as fp: - code = fp.read() - exec(code, module.__dict__) - return module - -def generate_chars_to_token(mapping, n=1): - result = [] - write = result.append - indent = ' ' * n - write(indent) - write('switch (c%d) {\n' % (n,)) - for c in sorted(mapping): - write(indent) - value = mapping[c] - if isinstance(value, dict): - write("case '%s':\n" % (c,)) - write(generate_chars_to_token(value, n + 1)) - write(indent) - write(' break;\n') - else: - write("case '%s': return %s;\n" % (c, value)) - write(indent) - write('}\n') - return ''.join(result) - -def main(token_py='Lib/token.py', outfile='Parser/token.c'): - token = load_module(token_py) - tok_name = token.tok_name - ERRORTOKEN = token.ERRORTOKEN - N_TOKENS = token.N_TOKENS - EXACT_TOKEN_TYPES = dict(token.EXACT_TOKEN_TYPES) - EXACT_TOKEN_TYPES['<>'] = EXACT_TOKEN_TYPES['!='] - chars_to_token = {} - for string, value in EXACT_TOKEN_TYPES.items(): - assert 1 <= len(string) <= 3 - name = tok_name[value] - m = chars_to_token.setdefault(len(string), {}) - for c in string[:-1]: - m = m.setdefault(c, {}) - m[string[-1]] = name - - names = [] - for value in range(N_TOKENS + 1): - name = tok_name[value] - if value >= ERRORTOKEN: - name = '<%s>' % name - names.append(' "%s",\n' % name) - - with open(outfile, 'w') as fobj: - fobj.write(template % ( - ''.join(names), - generate_chars_to_token(chars_to_token[1]), - generate_chars_to_token(chars_to_token[2]), - generate_chars_to_token(chars_to_token[3]) - )) - - print("%s regenerated from %s" % (outfile, token_py)) - - -if __name__ == '__main__': - import sys - main(*sys.argv[1:]) diff --git a/Tools/scripts/generate_token_h.py b/Tools/scripts/generate_token_h.py deleted file mode 100755 index c1fb19c03e2910..00000000000000 --- a/Tools/scripts/generate_token_h.py +++ /dev/null @@ -1,62 +0,0 @@ -#! /usr/bin/env python3 -# This script generates the token.h header file. - -header = """/* Auto-generated by Tools/scripts/generate_token_h.py */ - -/* Token types */ -#ifndef Py_LIMITED_API -#ifndef Py_TOKEN_H -#define Py_TOKEN_H -#ifdef __cplusplus -extern "C" { -#endif - -#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ - -""" - -footer = """ -/* Special definitions for cooperation with parser */ - -#define ISTERMINAL(x) ((x) < NT_OFFSET) -#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) -#define ISEOF(x) ((x) == ENDMARKER) - - -PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ -PyAPI_FUNC(int) PyToken_OneChar(int); -PyAPI_FUNC(int) PyToken_TwoChars(int, int); -PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_TOKEN_H */ -#endif /* Py_LIMITED_API */ -""" - - -def load_module(path): - module = type('Namespace', (), {})() - with open(path, 'rb') as fp: - code = fp.read() - exec(code, module.__dict__) - return module - -def main(token_py='Lib/token.py', outfile='Include/token.h'): - token = load_module(token_py) - tok_name = token.tok_name - with open(outfile, 'w') as fobj: - fobj.write(header) - for value in sorted(tok_name): - if token.ERRORTOKEN < value < token.N_TOKENS: - continue - name = tok_name[value] - fobj.write("#define %-15s %d\n" % (name, value)) - fobj.write(footer) - print("%s regenerated from %s" % (outfile, token_py)) - - -if __name__ == '__main__': - import sys - main(*sys.argv[1:]) diff --git a/Tools/scripts/generate_token_rst.py b/Tools/scripts/generate_token_rst.py deleted file mode 100755 index 57a1e5291fab11..00000000000000 --- a/Tools/scripts/generate_token_rst.py +++ /dev/null @@ -1,33 +0,0 @@ -#! /usr/bin/env python3 -# This script generates the token-list.inc documentation file. - -header = """\ -.. Auto-generated by Tools/scripts/generate_token_rst.py -.. data:: -""" - - -def load_module(path): - module = type('Namespace', (), {})() - with open(path, 'rb') as fp: - code = fp.read() - exec(code, module.__dict__) - return module - -def main(token_py='Lib/token.py', outfile='Doc/library/token-list.inc'): - token = load_module(token_py) - tok_name = token.tok_name - with open(outfile, 'w') as fobj: - fobj.write(header) - for value in sorted(tok_name): - if token.ERRORTOKEN < value < token.N_TOKENS: - continue - name = tok_name[value] - fobj.write(" %s\n" % (name,)) - - print("%s regenerated from %s" % (outfile, token_py)) - - -if __name__ == '__main__': - import sys - main(*sys.argv[1:]) From 5b71cf40e3e6294070389dfd25ed4abe41cb0bdd Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 18 Nov 2018 18:51:18 +0200 Subject: [PATCH 14/15] Generate descriptions for punctuation and operators. --- Doc/library/token-list.inc | 263 +++++++++++++++++++++++++------- Tools/scripts/generate_token.py | 17 ++- 2 files changed, 216 insertions(+), 64 deletions(-) diff --git a/Doc/library/token-list.inc b/Doc/library/token-list.inc index 62b3db3d2a4cdc..cd6e0f26968eea 100644 --- a/Doc/library/token-list.inc +++ b/Doc/library/token-list.inc @@ -1,59 +1,206 @@ .. Auto-generated by Tools/scripts/generate_token.py -.. data:: - ENDMARKER - NAME - NUMBER - STRING - NEWLINE - INDENT - DEDENT - LPAR - RPAR - LSQB - RSQB - COLON - COMMA - SEMI - PLUS - MINUS - STAR - SLASH - VBAR - AMPER - LESS - GREATER - EQUAL - DOT - PERCENT - LBRACE - RBRACE - EQEQUAL - NOTEQUAL - LESSEQUAL - GREATEREQUAL - TILDE - CIRCUMFLEX - LEFTSHIFT - RIGHTSHIFT - DOUBLESTAR - PLUSEQUAL - MINEQUAL - STAREQUAL - SLASHEQUAL - PERCENTEQUAL - AMPEREQUAL - VBAREQUAL - CIRCUMFLEXEQUAL - LEFTSHIFTEQUAL - RIGHTSHIFTEQUAL - DOUBLESTAREQUAL - DOUBLESLASH - DOUBLESLASHEQUAL - AT - ATEQUAL - RARROW - ELLIPSIS - OP - ERRORTOKEN - N_TOKENS - NT_OFFSET +.. data:: ENDMARKER + +.. data:: NAME + +.. data:: NUMBER + +.. data:: STRING + +.. data:: NEWLINE + +.. data:: INDENT + +.. data:: DEDENT + +.. data:: LPAR + + Token value for ``"("``. + +.. data:: RPAR + + Token value for ``")"``. + +.. data:: LSQB + + Token value for ``"["``. + +.. data:: RSQB + + Token value for ``"]"``. + +.. data:: COLON + + Token value for ``":"``. + +.. data:: COMMA + + Token value for ``","``. + +.. data:: SEMI + + Token value for ``";"``. + +.. data:: PLUS + + Token value for ``"+"``. + +.. data:: MINUS + + Token value for ``"-"``. + +.. data:: STAR + + Token value for ``"*"``. + +.. data:: SLASH + + Token value for ``"/"``. + +.. data:: VBAR + + Token value for ``"|"``. + +.. data:: AMPER + + Token value for ``"&"``. + +.. data:: LESS + + Token value for ``"<"``. + +.. data:: GREATER + + Token value for ``">"``. + +.. data:: EQUAL + + Token value for ``"="``. + +.. data:: DOT + + Token value for ``"."``. + +.. data:: PERCENT + + Token value for ``"%"``. + +.. data:: LBRACE + + Token value for ``"{"``. + +.. data:: RBRACE + + Token value for ``"}"``. + +.. data:: EQEQUAL + + Token value for ``"=="``. + +.. data:: NOTEQUAL + + Token value for ``"!="``. + +.. data:: LESSEQUAL + + Token value for ``"<="``. + +.. data:: GREATEREQUAL + + Token value for ``">="``. + +.. data:: TILDE + + Token value for ``"~"``. + +.. data:: CIRCUMFLEX + + Token value for ``"^"``. + +.. data:: LEFTSHIFT + + Token value for ``"<<"``. + +.. data:: RIGHTSHIFT + + Token value for ``">>"``. + +.. data:: DOUBLESTAR + + Token value for ``"**"``. + +.. data:: PLUSEQUAL + + Token value for ``"+="``. + +.. data:: MINEQUAL + + Token value for ``"-="``. + +.. data:: STAREQUAL + + Token value for ``"*="``. + +.. data:: SLASHEQUAL + + Token value for ``"/="``. + +.. data:: PERCENTEQUAL + + Token value for ``"%="``. + +.. data:: AMPEREQUAL + + Token value for ``"&="``. + +.. data:: VBAREQUAL + + Token value for ``"|="``. + +.. data:: CIRCUMFLEXEQUAL + + Token value for ``"^="``. + +.. data:: LEFTSHIFTEQUAL + + Token value for ``"<<="``. + +.. data:: RIGHTSHIFTEQUAL + + Token value for ``">>="``. + +.. data:: DOUBLESTAREQUAL + + Token value for ``"**="``. + +.. data:: DOUBLESLASH + + Token value for ``"//"``. + +.. data:: DOUBLESLASHEQUAL + + Token value for ``"//="``. + +.. data:: AT + + Token value for ``"@"``. + +.. data:: ATEQUAL + + Token value for ``"@="``. + +.. data:: RARROW + + Token value for ``"->"``. + +.. data:: ELLIPSIS + + Token value for ``"..."``. + +.. data:: OP + +.. data:: ERRORTOKEN + +.. data:: N_TOKENS + +.. data:: NT_OFFSET diff --git a/Tools/scripts/generate_token.py b/Tools/scripts/generate_token.py index ea905d2d74af1d..2ac931771f112a 100644 --- a/Tools/scripts/generate_token.py +++ b/Tools/scripts/generate_token.py @@ -173,20 +173,25 @@ def make_c(infile, outfile='Parser/token.c'): token_inc_template = """\ .. Auto-generated by Tools/scripts/generate_token.py -.. data:: -%s\ - N_TOKENS - NT_OFFSET +%s +.. data:: N_TOKENS + +.. data:: NT_OFFSET """ def make_rst(infile, outfile='Doc/library/token-list.inc'): tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) + tok_to_string = {value: s for s, value in string_to_tok.items()} names = [] for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): - names.append(" %s\n" % (name,)) + names.append('.. data:: %s' % (name,)) + if value in tok_to_string: + names.append('') + names.append(' Token value for ``"%s"``.' % tok_to_string[value]) + names.append('') - if update_file(outfile, token_inc_template % ''.join(names)): + if update_file(outfile, token_inc_template % '\n'.join(names)): print("%s regenerated from %s" % (outfile, infile)) From 7746abea28beb89abec3425d041ebc222132f0b1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 18 Nov 2018 18:59:24 +0200 Subject: [PATCH 15/15] Add generated files to .gitattributes. --- .gitattributes | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitattributes b/.gitattributes index 4a487c3c2a14e5..274c2da3ab85b5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -54,3 +54,6 @@ Include/opcode.h linguist-generated=true Python/opcode_targets.h linguist-generated=true Objects/typeslots.inc linguist-generated=true Modules/unicodedata_db.h linguist-generated=true +Doc/library/token-list.inc linguist-generated=true +Include/token.h linguist-generated=true +Parser/token.c linguist-generated=true