diff --git a/Include/internal/pycore_token.h b/Include/internal/pycore_token.h index 571cd6249f2812..8e1f8aef59ad3b 100644 --- a/Include/internal/pycore_token.h +++ b/Include/internal/pycore_token.h @@ -93,6 +93,8 @@ extern "C" { #define ISSTRINGLIT(x) ((x) == STRING || \ (x) == FSTRING_MIDDLE) +#define GENERATE_2CHAR_CODE(x, y) ((int)((x) << 8 | (y))) // Generate a 16-bit integer from 2 8-bit characters +#define GENERATE_3CHAR_CODE(x, y, z) ((int)((x) << 16 | (y) << 8 | (z))) // Generate a 24-bit integer from 3 8-bit characters // Export these 4 symbols for 'test_peg_generator' PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ diff --git a/Parser/token.c b/Parser/token.c index 4f163f21609a0a..609d484840cb82 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -81,30 +81,30 @@ int _PyToken_OneChar(int c1) { switch (c1) { - case '!': return EXCLAMATION; - case '%': return PERCENT; - case '&': return AMPER; - case '(': return LPAR; - case ')': return RPAR; - case '*': return STAR; - case '+': return PLUS; - case ',': return COMMA; - case '-': return MINUS; - case '.': return DOT; - case '/': return SLASH; - case ':': return COLON; - case ';': return SEMI; - case '<': return LESS; - case '=': return EQUAL; - case '>': return GREATER; - case '@': return AT; - case '[': return LSQB; - case ']': return RSQB; - case '^': return CIRCUMFLEX; - case '{': return LBRACE; - case '|': return VBAR; - case '}': return RBRACE; - case '~': return TILDE; + case '!': return EXCLAMATION; + case '%': return PERCENT; + case '&': return AMPER; + case '(': return LPAR; + case ')': return RPAR; + case '*': return STAR; + case '+': return PLUS; + case ',': return COMMA; + case '-': return MINUS; + case '.': return DOT; + case '/': return SLASH; + case ':': return COLON; + case ';': return SEMI; + case '<': return LESS; + case '=': return EQUAL; + case '>': return GREATER; + case '@': return AT; + case '[': return LSQB; + case ']': return RSQB; + case '^': return CIRCUMFLEX; + case '{': return LBRACE; + case '|': return VBAR; + case '}': return RBRACE; + case '~': return TILDE; } return OP; } @@ -112,83 +112,27 @@ _PyToken_OneChar(int c1) int _PyToken_TwoChars(int c1, int c2) { - switch (c1) { - case '!': - switch (c2) { - case '=': return NOTEQUAL; - } - break; - case '%': - switch (c2) { - case '=': return PERCENTEQUAL; - } - break; - case '&': - switch (c2) { - case '=': return AMPEREQUAL; - } - break; - case '*': - switch (c2) { - case '*': return DOUBLESTAR; - case '=': return STAREQUAL; - } - break; - case '+': - switch (c2) { - case '=': return PLUSEQUAL; - } - break; - case '-': - switch (c2) { - case '=': return MINEQUAL; - case '>': return RARROW; - } - break; - case '/': - switch (c2) { - case '/': return DOUBLESLASH; - case '=': return SLASHEQUAL; - } - break; - case ':': - switch (c2) { - case '=': return COLONEQUAL; - } - break; - case '<': - switch (c2) { - case '<': return LEFTSHIFT; - case '=': return LESSEQUAL; - case '>': return NOTEQUAL; - } - break; - case '=': - switch (c2) { - case '=': return EQEQUAL; - } - break; - case '>': - switch (c2) { - case '=': return GREATEREQUAL; - case '>': return RIGHTSHIFT; - } - break; - case '@': - switch (c2) { - case '=': return ATEQUAL; - } - break; - case '^': - switch (c2) { - case '=': return CIRCUMFLEXEQUAL; - } - break; - case '|': - switch (c2) { - case '=': return VBAREQUAL; - } - break; + switch (GENERATE_2CHAR_CODE(c1, c2)) { + case GENERATE_2CHAR_CODE('!', '='): return NOTEQUAL; + case GENERATE_2CHAR_CODE('%', '='): return PERCENTEQUAL; + case GENERATE_2CHAR_CODE('&', '='): return AMPEREQUAL; + case GENERATE_2CHAR_CODE('*', '*'): return DOUBLESTAR; + case GENERATE_2CHAR_CODE('*', '='): return STAREQUAL; + case GENERATE_2CHAR_CODE('+', '='): return PLUSEQUAL; + case GENERATE_2CHAR_CODE('-', '='): return MINEQUAL; + case GENERATE_2CHAR_CODE('-', '>'): return RARROW; + case GENERATE_2CHAR_CODE('/', '/'): return DOUBLESLASH; + case GENERATE_2CHAR_CODE('/', '='): return SLASHEQUAL; + case GENERATE_2CHAR_CODE(':', '='): return COLONEQUAL; + case GENERATE_2CHAR_CODE('<', '<'): return LEFTSHIFT; + case GENERATE_2CHAR_CODE('<', '='): return LESSEQUAL; + case GENERATE_2CHAR_CODE('<', '>'): return NOTEQUAL; + case GENERATE_2CHAR_CODE('=', '='): return EQEQUAL; + case GENERATE_2CHAR_CODE('>', '='): return GREATEREQUAL; + case GENERATE_2CHAR_CODE('>', '>'): return RIGHTSHIFT; + case GENERATE_2CHAR_CODE('@', '='): return ATEQUAL; + case GENERATE_2CHAR_CODE('^', '='): return CIRCUMFLEXEQUAL; + case GENERATE_2CHAR_CODE('|', '='): return VBAREQUAL; } return OP; } @@ -196,52 +140,12 @@ _PyToken_TwoChars(int c1, int c2) int _PyToken_ThreeChars(int c1, int c2, int c3) { - switch (c1) { - case '*': - switch (c2) { - case '*': - switch (c3) { - case '=': return DOUBLESTAREQUAL; - } - break; - } - break; - case '.': - switch (c2) { - case '.': - switch (c3) { - case '.': return ELLIPSIS; - } - break; - } - break; - case '/': - switch (c2) { - case '/': - switch (c3) { - case '=': return DOUBLESLASHEQUAL; - } - break; - } - break; - case '<': - switch (c2) { - case '<': - switch (c3) { - case '=': return LEFTSHIFTEQUAL; - } - break; - } - break; - case '>': - switch (c2) { - case '>': - switch (c3) { - case '=': return RIGHTSHIFTEQUAL; - } - break; - } - break; + switch (GENERATE_3CHAR_CODE(c1, c2, c3)) { + case GENERATE_3CHAR_CODE('*', '*', '='): return DOUBLESTAREQUAL; + case GENERATE_3CHAR_CODE('.', '.', '.'): return ELLIPSIS; + case GENERATE_3CHAR_CODE('/', '/', '='): return DOUBLESLASHEQUAL; + case GENERATE_3CHAR_CODE('<', '<', '='): return LEFTSHIFTEQUAL; + case GENERATE_3CHAR_CODE('>', '>', '='): return RIGHTSHIFTEQUAL; } return OP; } diff --git a/Tools/build/generate_token.py b/Tools/build/generate_token.py index 16c38841e44a4d..5b8d1c331fe277 100755 --- a/Tools/build/generate_token.py +++ b/Tools/build/generate_token.py @@ -83,6 +83,8 @@ def update_file(file, content): #define ISSTRINGLIT(x) ((x) == STRING || \\ (x) == FSTRING_MIDDLE) +#define GENERATE_2CHAR_CODE(x, y) ((int)((x) << 8 | (y))) // Generate a 16-bit integer from 2 8-bit characters +#define GENERATE_3CHAR_CODE(x, y, z) ((int)((x) << 16 | (y) << 8 | (z))) // Generate a 24-bit integer from 3 8-bit characters // Export these 4 symbols for 'test_peg_generator' PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ @@ -149,6 +151,31 @@ def make_h(infile, outfile='Include/internal/pycore_token.h'): } """ +def generate_one_char_tokens(tokens): + result = [] + result.append(' switch (c1) {\n') + for c, name in sorted(tokens.items()): + result.append(" case '%s': return %s;\n" % (c, name)) + result.append(' }\n') + return ''.join(result) + +def generate_two_char_tokens(tokens): + result = [] + result.append(' switch (GENERATE_2CHAR_CODE(c1, c2)) {\n') + for (c1, c2), name in sorted(tokens.items()): + result.append(" case GENERATE_2CHAR_CODE('%s', '%s'): return %s;\n" % (c1, c2, name)) + result.append(' }\n') + return ''.join(result) + +def generate_three_char_tokens(tokens): + result = [] + result.append(' switch (GENERATE_3CHAR_CODE(c1, c2, c3)) {\n') + for (c1, c2, c3), name in sorted(tokens.items()): + result.append(" case GENERATE_3CHAR_CODE('%s', '%s', '%s'): return %s;\n" % (c1, c2, c3, name)) + result.append(' }\n') + return ''.join(result) + + def generate_chars_to_token(mapping, n=1): result = [] write = result.append @@ -172,14 +199,15 @@ def generate_chars_to_token(mapping, n=1): def make_c(infile, outfile='Parser/token.c'): tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) string_to_tok['<>'] = string_to_tok['!='] - chars_to_token = {} + chars_to_token = { + 1: {}, + 2: {}, + 3: {}, + } for string, value in string_to_tok.items(): assert 1 <= len(string) <= 3 name = tok_names[value] - m = chars_to_token.setdefault(len(string), {}) - for c in string[:-1]: - m = m.setdefault(c, {}) - m[string[-1]] = name + chars_to_token[len(string)][string] = name names = [] for value, name in enumerate(tok_names): @@ -190,9 +218,9 @@ def make_c(infile, outfile='Parser/token.c'): if update_file(outfile, token_c_template % ( ''.join(names), - generate_chars_to_token(chars_to_token[1]), - generate_chars_to_token(chars_to_token[2]), - generate_chars_to_token(chars_to_token[3]) + generate_one_char_tokens(chars_to_token[1]), + generate_two_char_tokens(chars_to_token[2]), + generate_three_char_tokens(chars_to_token[3]) )): print("%s regenerated from %s" % (outfile, infile))