From 9e152a52b942daa5d6d940ad06c545d328b0e89f Mon Sep 17 00:00:00 2001 From: DefinitelyNotAnOrca Date: Wed, 23 Oct 2024 18:29:25 -0500 Subject: [PATCH 1/7] Updated the token.c file to use predefined values instead of nested switch structure for improved performance --- Parser/token.c | 176 +++++++++++++++---------------------------------- 1 file changed, 53 insertions(+), 123 deletions(-) diff --git a/Parser/token.c b/Parser/token.c index 4f163f21609a0a..00f9d0fe6d2799 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -109,139 +109,69 @@ _PyToken_OneChar(int c1) return OP; } +#define NOTEQUAL_CODE (int)('!' << 8 | '=') +#define PERCENTEQUAL_CODE (int)('%' << 8 | '=') +#define AMPEREQUAL_CODE (int)('&' << 8 | '=') +#define DOUBLESTAR_CODE (int)('*' << 8 | '*') +#define STAREQUAL_CODE (int)('*' << 8 | '=') +#define PLUSEQUAL_CODE (int)('+' << 8 | '=') +#define MINEQUAL_CODE (int)('-' << 8 | '=') +#define RARROW_CODE (int)('-' << 8 | '>') +#define DOUBLESLASH_CODE (int)('/' << 8 | '/') +#define SLASHEQUAL_CODE (int)('/' << 8 | '=') +#define COLONEQUAL_CODE (int)(':' << 8 | '=') +#define LEFTSHIFT_CODE (int)('<' << 8 | '<') +#define LESSEQUAL_CODE (int)('<' << 8 | '=') +#define EQEQUAL_CODE (int)('=' << 8 | '=') +#define GREATEREQUAL_CODE (int)('>' << 8 | '=') +#define RIGHTSHIFT_CODE (int)('>' << 8 | '>') +#define ATEQUAL_CODE (int)('@' << 8 | '=') +#define CIRCUMFLEXEQUAL_CODE (int)('^' << 8 | '=') +#define VBAREQUAL_CODE (int)('|' << 8 | '=') + int _PyToken_TwoChars(int c1, int c2) { - switch (c1) { - case '!': - switch (c2) { - case '=': return NOTEQUAL; - } - break; - case '%': - switch (c2) { - case '=': return PERCENTEQUAL; - } - break; - case '&': - switch (c2) { - case '=': return AMPEREQUAL; - } - break; - case '*': - switch (c2) { - case '*': return DOUBLESTAR; - case '=': return STAREQUAL; - } - break; - case '+': - switch (c2) { - case '=': return PLUSEQUAL; - } - break; - case '-': - switch (c2) { - case '=': return MINEQUAL; - case '>': return RARROW; - } - break; - case '/': - switch (c2) { - case '/': return DOUBLESLASH; - case '=': return SLASHEQUAL; - } - break; - case ':': - switch (c2) { - case '=': return COLONEQUAL; - } - break; - case '<': - switch (c2) { - case '<': return LEFTSHIFT; - case '=': return LESSEQUAL; - case '>': return NOTEQUAL; - } - break; - case '=': - switch (c2) { - case '=': return EQEQUAL; - } - break; - case '>': - switch (c2) { - case '=': return GREATEREQUAL; - case '>': return RIGHTSHIFT; - } - break; - case '@': - switch (c2) { - case '=': return ATEQUAL; - } - break; - case '^': - switch (c2) { - case '=': return CIRCUMFLEXEQUAL; - } - break; - case '|': - switch (c2) { - case '=': return VBAREQUAL; - } - break; + switch (c1 << 8 | c2) { + case NOTEQUAL_CODE: return NOTEQUAL; + case PERCENTEQUAL_CODE: return PERCENTEQUAL; + case AMPEREQUAL_CODE: return AMPEREQUAL; + case DOUBLESTAR_CODE: return DOUBLESTAR; + case STAREQUAL_CODE: return STAREQUAL; + case PLUSEQUAL_CODE: return PLUSEQUAL; + case MINEQUAL_CODE: return MINEQUAL; + case RARROW_CODE: return RARROW; + case DOUBLESLASH_CODE: return DOUBLESLASH; + case SLASHEQUAL_CODE: return SLASHEQUAL; + case COLONEQUAL_CODE: return COLONEQUAL; + case LEFTSHIFT_CODE: return LEFTSHIFT; + case LESSEQUAL_CODE: return LESSEQUAL; + case EQEQUAL_CODE: return EQEQUAL; + case GREATEREQUAL_CODE: return GREATEREQUAL; + case RIGHTSHIFT_CODE: return RIGHTSHIFT; + case ATEQUAL_CODE: return ATEQUAL; + case CIRCUMFLEXEQUAL_CODE: return CIRCUMFLEXEQUAL; + case VBAREQUAL_CODE: return VBAREQUAL; } return OP; } +#define DOUBLESTAREQUAL_CODE (int)('*' << 16 | '*' << 8 | '=') +#define ELLIPSIS_CODE (int)('.' << 16 | '.' << 8 | '.') +#define DOUBLESLASHEQUAL_CODE (int)('/' << 16 | '/' << 8 | '=') +#define LEFTSHIFTEQUAL_CODE (int)('<' << 16 | '<' << 8 | '=') +#define RIGHTSHIFTEQUAL_CODE (int)('>' << 16 | '>' << 8 | '=') + + int _PyToken_ThreeChars(int c1, int c2, int c3) { - switch (c1) { - case '*': - switch (c2) { - case '*': - switch (c3) { - case '=': return DOUBLESTAREQUAL; - } - break; - } - break; - case '.': - switch (c2) { - case '.': - switch (c3) { - case '.': return ELLIPSIS; - } - break; - } - break; - case '/': - switch (c2) { - case '/': - switch (c3) { - case '=': return DOUBLESLASHEQUAL; - } - break; - } - break; - case '<': - switch (c2) { - case '<': - switch (c3) { - case '=': return LEFTSHIFTEQUAL; - } - break; - } - break; - case '>': - switch (c2) { - case '>': - switch (c3) { - case '=': return RIGHTSHIFTEQUAL; - } - break; - } - break; + switch (c1 << 16 | c2 << 8 | c3) { + case DOUBLESTAREQUAL_CODE: return DOUBLESTAREQUAL; + case ELLIPSIS_CODE: return ELLIPSIS; + case DOUBLESLASHEQUAL_CODE: return DOUBLESLASHEQUAL; + case LEFTSHIFTEQUAL_CODE: return LEFTSHIFTEQUAL; + case RIGHTSHIFTEQUAL_CODE: return RIGHTSHIFTEQUAL; } return OP; } From 20a6c8946c14d2ee51adaa2586b03149db6ae05b Mon Sep 17 00:00:00 2001 From: DefinitelyNotAnOrca Date: Wed, 23 Oct 2024 19:06:59 -0500 Subject: [PATCH 2/7] Added Comments and Checks to see if characters are out of range --- Parser/token.c | 64 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/Parser/token.c b/Parser/token.c index 00f9d0fe6d2799..9980d3301a8f6d 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -109,30 +109,36 @@ _PyToken_OneChar(int c1) return OP; } -#define NOTEQUAL_CODE (int)('!' << 8 | '=') -#define PERCENTEQUAL_CODE (int)('%' << 8 | '=') -#define AMPEREQUAL_CODE (int)('&' << 8 | '=') -#define DOUBLESTAR_CODE (int)('*' << 8 | '*') -#define STAREQUAL_CODE (int)('*' << 8 | '=') -#define PLUSEQUAL_CODE (int)('+' << 8 | '=') -#define MINEQUAL_CODE (int)('-' << 8 | '=') -#define RARROW_CODE (int)('-' << 8 | '>') -#define DOUBLESLASH_CODE (int)('/' << 8 | '/') -#define SLASHEQUAL_CODE (int)('/' << 8 | '=') -#define COLONEQUAL_CODE (int)(':' << 8 | '=') -#define LEFTSHIFT_CODE (int)('<' << 8 | '<') -#define LESSEQUAL_CODE (int)('<' << 8 | '=') -#define EQEQUAL_CODE (int)('=' << 8 | '=') -#define GREATEREQUAL_CODE (int)('>' << 8 | '=') -#define RIGHTSHIFT_CODE (int)('>' << 8 | '>') -#define ATEQUAL_CODE (int)('@' << 8 | '=') -#define CIRCUMFLEXEQUAL_CODE (int)('^' << 8 | '=') -#define VBAREQUAL_CODE (int)('|' << 8 | '=') +// Return the token corresponding to two tokens +// The code is a 16-bit integer with the first character in the high byte and the second character in the low byte. +#define NOTEQUAL_CODE (int)('!' << 8 | '=') // != +#define PERCENTEQUAL_CODE (int)('%' << 8 | '=') // %= +#define AMPEREQUAL_CODE (int)('&' << 8 | '=') // &= +#define DOUBLESTAR_CODE (int)('*' << 8 | '*') // ** +#define STAREQUAL_CODE (int)('*' << 8 | '=') // *= +#define PLUSEQUAL_CODE (int)('+' << 8 | '=') // += +#define MINEQUAL_CODE (int)('-' << 8 | '=') // -= +#define RARROW_CODE (int)('-' << 8 | '>') // -> +#define DOUBLESLASH_CODE (int)('/' << 8 | '/') // // +#define SLASHEQUAL_CODE (int)('/' << 8 | '=') // /= +#define COLONEQUAL_CODE (int)(':' << 8 | '=') // := +#define LEFTSHIFT_CODE (int)('<' << 8 | '<') // << +#define LESSEQUAL_CODE (int)('<' << 8 | '=') // <= +#define EQEQUAL_CODE (int)('=' << 8 | '=') // == +#define GREATEREQUAL_CODE (int)('>' << 8 | '=') // >= +#define RIGHTSHIFT_CODE (int)('>' << 8 | '>') // >> +#define ATEQUAL_CODE (int)('@' << 8 | '=') // @= +#define CIRCUMFLEXEQUAL_CODE (int)('^' << 8 | '=') // ^= +#define VBAREQUAL_CODE (int)('|' << 8 | '=') // |= int _PyToken_TwoChars(int c1, int c2) { - switch (c1 << 8 | c2) { + if(c1 > 255 || c2 > 255) { // handle to see if tokens are out of range + return OP; + } + + switch (c1 << 8 | c2) { // Combine the two tokens into a 16-bit integer case NOTEQUAL_CODE: return NOTEQUAL; case PERCENTEQUAL_CODE: return PERCENTEQUAL; case AMPEREQUAL_CODE: return AMPEREQUAL; @@ -156,17 +162,23 @@ _PyToken_TwoChars(int c1, int c2) return OP; } -#define DOUBLESTAREQUAL_CODE (int)('*' << 16 | '*' << 8 | '=') -#define ELLIPSIS_CODE (int)('.' << 16 | '.' << 8 | '.') -#define DOUBLESLASHEQUAL_CODE (int)('/' << 16 | '/' << 8 | '=') -#define LEFTSHIFTEQUAL_CODE (int)('<' << 16 | '<' << 8 | '=') -#define RIGHTSHIFTEQUAL_CODE (int)('>' << 16 | '>' << 8 | '=') +// Return the token corresponding to three tokens +// The code is a 24-bit integer with the first character in the high byte, the second character in the middle byte, and the third character in the low byte. +#define DOUBLESTAREQUAL_CODE (int)('*' << 16 | '*' << 8 | '=') // **= +#define ELLIPSIS_CODE (int)('.' << 16 | '.' << 8 | '.') // ... +#define DOUBLESLASHEQUAL_CODE (int)('/' << 16 | '/' << 8 | '=') // //= +#define LEFTSHIFTEQUAL_CODE (int)('<' << 16 | '<' << 8 | '=') // <<= +#define RIGHTSHIFTEQUAL_CODE (int)('>' << 16 | '>' << 8 | '=') // >>= int _PyToken_ThreeChars(int c1, int c2, int c3) { - switch (c1 << 16 | c2 << 8 | c3) { + if(c1 > 255 || c2 > 255 || c3 > 255) { // handle to see if tokens are out of range + return OP; + } + + switch (c1 << 16 | c2 << 8 | c3) { // Combine the three tokens into a 24-bit integer case DOUBLESTAREQUAL_CODE: return DOUBLESTAREQUAL; case ELLIPSIS_CODE: return ELLIPSIS; case DOUBLESLASHEQUAL_CODE: return DOUBLESLASHEQUAL; From b4164cf2b4424486f522a258672468df30381ed5 Mon Sep 17 00:00:00 2001 From: DefinitelyNotAnOrca Date: Wed, 23 Oct 2024 21:31:42 -0500 Subject: [PATCH 3/7] Fixed Issue with Missing not equal operator and removed redundant checks --- Parser/token.c | 48 +++++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/Parser/token.c b/Parser/token.c index 9980d3301a8f6d..a49717e1d70e9a 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -124,6 +124,7 @@ _PyToken_OneChar(int c1) #define COLONEQUAL_CODE (int)(':' << 8 | '=') // := #define LEFTSHIFT_CODE (int)('<' << 8 | '<') // << #define LESSEQUAL_CODE (int)('<' << 8 | '=') // <= +#define NOTEQUAL_OTHER_CODE (int)('<' << 8 | '>') // <> #define EQEQUAL_CODE (int)('=' << 8 | '=') // == #define GREATEREQUAL_CODE (int)('>' << 8 | '=') // >= #define RIGHTSHIFT_CODE (int)('>' << 8 | '>') // >> @@ -134,30 +135,27 @@ _PyToken_OneChar(int c1) int _PyToken_TwoChars(int c1, int c2) { - if(c1 > 255 || c2 > 255) { // handle to see if tokens are out of range - return OP; - } - switch (c1 << 8 | c2) { // Combine the two tokens into a 16-bit integer - case NOTEQUAL_CODE: return NOTEQUAL; - case PERCENTEQUAL_CODE: return PERCENTEQUAL; - case AMPEREQUAL_CODE: return AMPEREQUAL; - case DOUBLESTAR_CODE: return DOUBLESTAR; - case STAREQUAL_CODE: return STAREQUAL; - case PLUSEQUAL_CODE: return PLUSEQUAL; - case MINEQUAL_CODE: return MINEQUAL; - case RARROW_CODE: return RARROW; - case DOUBLESLASH_CODE: return DOUBLESLASH; - case SLASHEQUAL_CODE: return SLASHEQUAL; - case COLONEQUAL_CODE: return COLONEQUAL; - case LEFTSHIFT_CODE: return LEFTSHIFT; - case LESSEQUAL_CODE: return LESSEQUAL; - case EQEQUAL_CODE: return EQEQUAL; - case GREATEREQUAL_CODE: return GREATEREQUAL; - case RIGHTSHIFT_CODE: return RIGHTSHIFT; - case ATEQUAL_CODE: return ATEQUAL; - case CIRCUMFLEXEQUAL_CODE: return CIRCUMFLEXEQUAL; - case VBAREQUAL_CODE: return VBAREQUAL; + case NOTEQUAL_CODE: + case NOTEQUAL_OTHER_CODE: return NOTEQUAL; + case PERCENTEQUAL_CODE: return PERCENTEQUAL; + case AMPEREQUAL_CODE: return AMPEREQUAL; + case DOUBLESTAR_CODE: return DOUBLESTAR; + case STAREQUAL_CODE: return STAREQUAL; + case PLUSEQUAL_CODE: return PLUSEQUAL; + case MINEQUAL_CODE: return MINEQUAL; + case RARROW_CODE: return RARROW; + case DOUBLESLASH_CODE: return DOUBLESLASH; + case SLASHEQUAL_CODE: return SLASHEQUAL; + case COLONEQUAL_CODE: return COLONEQUAL; + case LEFTSHIFT_CODE: return LEFTSHIFT; + case LESSEQUAL_CODE: return LESSEQUAL; + case EQEQUAL_CODE: return EQEQUAL; + case GREATEREQUAL_CODE: return GREATEREQUAL; + case RIGHTSHIFT_CODE: return RIGHTSHIFT; + case ATEQUAL_CODE: return ATEQUAL; + case CIRCUMFLEXEQUAL_CODE: return CIRCUMFLEXEQUAL; + case VBAREQUAL_CODE: return VBAREQUAL; } return OP; } @@ -174,10 +172,6 @@ _PyToken_TwoChars(int c1, int c2) int _PyToken_ThreeChars(int c1, int c2, int c3) { - if(c1 > 255 || c2 > 255 || c3 > 255) { // handle to see if tokens are out of range - return OP; - } - switch (c1 << 16 | c2 << 8 | c3) { // Combine the three tokens into a 24-bit integer case DOUBLESTAREQUAL_CODE: return DOUBLESTAREQUAL; case ELLIPSIS_CODE: return ELLIPSIS; From 7f719c916c59e4cbf6ba7c13416d65fde2f58776 Mon Sep 17 00:00:00 2001 From: DefinitelyNotAnOrca Date: Fri, 25 Oct 2024 07:01:39 -0500 Subject: [PATCH 4/7] Cleaned up Code For Defenitions --- Include/internal/pycore_token.h | 32 +++++++++++++ Parser/token.c | 84 +++++++++++---------------------- 2 files changed, 60 insertions(+), 56 deletions(-) diff --git a/Include/internal/pycore_token.h b/Include/internal/pycore_token.h index 571cd6249f2812..282a6e85220ac4 100644 --- a/Include/internal/pycore_token.h +++ b/Include/internal/pycore_token.h @@ -93,6 +93,38 @@ extern "C" { #define ISSTRINGLIT(x) ((x) == STRING || \ (x) == FSTRING_MIDDLE) +#define GENERATE_2CHAR_CODE(x, y) ((int)((x) << 8 | (y))) +#define GENERATE_3CHAR_CODE(x, y, z) ((int)((x) << 16 | (y) << 8 | (z))) + + +// The code is a 16-bit integer with the first character in the high byte and the second character in the low byte. +#define NOTEQUAL_CODE GENERATE_2CHAR_CODE('!', '=') +#define PERCENTEQUAL_CODE GENERATE_2CHAR_CODE('%', '=') +#define AMPEREQUAL_CODE GENERATE_2CHAR_CODE('&', '=') +#define DOUBLESTAR_CODE GENERATE_2CHAR_CODE('*', '*') +#define STAREQUAL_CODE GENERATE_2CHAR_CODE('*', '=') +#define PLUSEQUAL_CODE GENERATE_2CHAR_CODE('+', '=') +#define MINEQUAL_CODE GENERATE_2CHAR_CODE('-', '=') +#define RARROW_CODE GENERATE_2CHAR_CODE('-', '>') +#define DOUBLESLASH_CODE GENERATE_2CHAR_CODE('/', '/') +#define SLASHEQUAL_CODE GENERATE_2CHAR_CODE('/', '=') +#define COLONEQUAL_CODE GENERATE_2CHAR_CODE(':', '=') +#define LEFTSHIFT_CODE GENERATE_2CHAR_CODE('<', '<') +#define LESSEQUAL_CODE GENERATE_2CHAR_CODE('<', '=') +#define NOTEQUAL_2_CODE GENERATE_2CHAR_CODE('<', '>') +#define EQEQUAL_CODE GENERATE_2CHAR_CODE('=', '=') +#define GREATEREQUAL_CODE GENERATE_2CHAR_CODE('>', '=') +#define RIGHTSHIFT_CODE GENERATE_2CHAR_CODE('>', '>') +#define ATEQUAL_CODE GENERATE_2CHAR_CODE('@', '=') +#define CIRCUMFLEXEQUAL_CODE GENERATE_2CHAR_CODE('^', '=') +#define VBAREQUAL_CODE GENERATE_2CHAR_CODE('|', '=') + +// The code is a 24-bit integer with the first character in the high byte, the second character in the middle byte, and the third character in the low byte. +#define DOUBLESTAREQUAL_CODE GENERATE_3CHAR_CODE('*', '*', '=') +#define ELLIPSIS_CODE GENERATE_3CHAR_CODE('.', '.', '.') +#define DOUBLESLASHEQUAL_CODE GENERATE_3CHAR_CODE('/', '/', '=') +#define LEFTSHIFTEQUAL_CODE GENERATE_3CHAR_CODE('<', '<', '=') +#define RIGHTSHIFTEQUAL_CODE GENERATE_3CHAR_CODE('>', '>', '=') // Export these 4 symbols for 'test_peg_generator' PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ diff --git a/Parser/token.c b/Parser/token.c index a49717e1d70e9a..a87b5cd9be90b8 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -109,75 +109,47 @@ _PyToken_OneChar(int c1) return OP; } -// Return the token corresponding to two tokens -// The code is a 16-bit integer with the first character in the high byte and the second character in the low byte. -#define NOTEQUAL_CODE (int)('!' << 8 | '=') // != -#define PERCENTEQUAL_CODE (int)('%' << 8 | '=') // %= -#define AMPEREQUAL_CODE (int)('&' << 8 | '=') // &= -#define DOUBLESTAR_CODE (int)('*' << 8 | '*') // ** -#define STAREQUAL_CODE (int)('*' << 8 | '=') // *= -#define PLUSEQUAL_CODE (int)('+' << 8 | '=') // += -#define MINEQUAL_CODE (int)('-' << 8 | '=') // -= -#define RARROW_CODE (int)('-' << 8 | '>') // -> -#define DOUBLESLASH_CODE (int)('/' << 8 | '/') // // -#define SLASHEQUAL_CODE (int)('/' << 8 | '=') // /= -#define COLONEQUAL_CODE (int)(':' << 8 | '=') // := -#define LEFTSHIFT_CODE (int)('<' << 8 | '<') // << -#define LESSEQUAL_CODE (int)('<' << 8 | '=') // <= -#define NOTEQUAL_OTHER_CODE (int)('<' << 8 | '>') // <> -#define EQEQUAL_CODE (int)('=' << 8 | '=') // == -#define GREATEREQUAL_CODE (int)('>' << 8 | '=') // >= -#define RIGHTSHIFT_CODE (int)('>' << 8 | '>') // >> -#define ATEQUAL_CODE (int)('@' << 8 | '=') // @= -#define CIRCUMFLEXEQUAL_CODE (int)('^' << 8 | '=') // ^= -#define VBAREQUAL_CODE (int)('|' << 8 | '=') // |= int _PyToken_TwoChars(int c1, int c2) { - switch (c1 << 8 | c2) { // Combine the two tokens into a 16-bit integer - case NOTEQUAL_CODE: - case NOTEQUAL_OTHER_CODE: return NOTEQUAL; - case PERCENTEQUAL_CODE: return PERCENTEQUAL; - case AMPEREQUAL_CODE: return AMPEREQUAL; - case DOUBLESTAR_CODE: return DOUBLESTAR; - case STAREQUAL_CODE: return STAREQUAL; - case PLUSEQUAL_CODE: return PLUSEQUAL; - case MINEQUAL_CODE: return MINEQUAL; - case RARROW_CODE: return RARROW; - case DOUBLESLASH_CODE: return DOUBLESLASH; - case SLASHEQUAL_CODE: return SLASHEQUAL; - case COLONEQUAL_CODE: return COLONEQUAL; - case LEFTSHIFT_CODE: return LEFTSHIFT; - case LESSEQUAL_CODE: return LESSEQUAL; - case EQEQUAL_CODE: return EQEQUAL; - case GREATEREQUAL_CODE: return GREATEREQUAL; - case RIGHTSHIFT_CODE: return RIGHTSHIFT; - case ATEQUAL_CODE: return ATEQUAL; - case CIRCUMFLEXEQUAL_CODE: return CIRCUMFLEXEQUAL; - case VBAREQUAL_CODE: return VBAREQUAL; + switch (GENERATE_2CHAR_CODE(c1, c2)) { // Combine the two tokens into a 16-bit integer + case GENERATE_2CHAR_CODE('!', '='): + case GENERATE_2CHAR_CODE('<', '>'): return NOTEQUAL; + case GENERATE_2CHAR_CODE('%', '='): return PERCENTEQUAL; + case GENERATE_2CHAR_CODE('&', '='): return AMPEREQUAL; + case GENERATE_2CHAR_CODE('*', '*'): return DOUBLESTAR; + case GENERATE_2CHAR_CODE('*', '='): return STAREQUAL; + case GENERATE_2CHAR_CODE('+', '='): return PLUSEQUAL; + case GENERATE_2CHAR_CODE('-', '='): return MINEQUAL; + case GENERATE_2CHAR_CODE('-', '>'): return RARROW; + case GENERATE_2CHAR_CODE('/', '/'): return DOUBLESLASH; + case GENERATE_2CHAR_CODE('/', '='): return SLASHEQUAL; + case GENERATE_2CHAR_CODE(':', '='): return COLONEQUAL; + case GENERATE_2CHAR_CODE('<', '<'): return LEFTSHIFT; + case GENERATE_2CHAR_CODE('<', '='): return LESSEQUAL; + case GENERATE_2CHAR_CODE('=', '='): return EQEQUAL; + case GENERATE_2CHAR_CODE('>', '='): return GREATEREQUAL; + case GENERATE_2CHAR_CODE('>', '>'): return RIGHTSHIFT; + case GENERATE_2CHAR_CODE('@', '='): return ATEQUAL; + case GENERATE_2CHAR_CODE('^', '='): return CIRCUMFLEXEQUAL; + case GENERATE_2CHAR_CODE('|', '='): return VBAREQUAL; } return OP; } -// Return the token corresponding to three tokens -// The code is a 24-bit integer with the first character in the high byte, the second character in the middle byte, and the third character in the low byte. -#define DOUBLESTAREQUAL_CODE (int)('*' << 16 | '*' << 8 | '=') // **= -#define ELLIPSIS_CODE (int)('.' << 16 | '.' << 8 | '.') // ... -#define DOUBLESLASHEQUAL_CODE (int)('/' << 16 | '/' << 8 | '=') // //= -#define LEFTSHIFTEQUAL_CODE (int)('<' << 16 | '<' << 8 | '=') // <<= -#define RIGHTSHIFTEQUAL_CODE (int)('>' << 16 | '>' << 8 | '=') // >>= + int _PyToken_ThreeChars(int c1, int c2, int c3) { - switch (c1 << 16 | c2 << 8 | c3) { // Combine the three tokens into a 24-bit integer - case DOUBLESTAREQUAL_CODE: return DOUBLESTAREQUAL; - case ELLIPSIS_CODE: return ELLIPSIS; - case DOUBLESLASHEQUAL_CODE: return DOUBLESLASHEQUAL; - case LEFTSHIFTEQUAL_CODE: return LEFTSHIFTEQUAL; - case RIGHTSHIFTEQUAL_CODE: return RIGHTSHIFTEQUAL; + switch (GENERATE_3CHAR_CODE(c1, c2, c3)) { // Combine the three tokens into a 24-bit integer + case GENERATE_3CHAR_CODE('*', '*', '='): return DOUBLESTAREQUAL; + case GENERATE_3CHAR_CODE('.', '.', '.'): return ELLIPSIS; + case GENERATE_3CHAR_CODE('/', '/', '='): return DOUBLESLASHEQUAL; + case GENERATE_3CHAR_CODE('<', '<', '='): return LEFTSHIFTEQUAL; + case GENERATE_3CHAR_CODE('>', '>', '='): return RIGHTSHIFTEQUAL; } return OP; } From 219eeba8381dd10c527a65e79b89b794475567cb Mon Sep 17 00:00:00 2001 From: DefinitelyNotAnOrca Date: Fri, 25 Oct 2024 11:50:55 -0500 Subject: [PATCH 5/7] Added Missing Header Change --- Include/internal/pycore_token.h | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/Include/internal/pycore_token.h b/Include/internal/pycore_token.h index 282a6e85220ac4..07bd52ccbd8bf7 100644 --- a/Include/internal/pycore_token.h +++ b/Include/internal/pycore_token.h @@ -96,36 +96,6 @@ extern "C" { #define GENERATE_2CHAR_CODE(x, y) ((int)((x) << 8 | (y))) #define GENERATE_3CHAR_CODE(x, y, z) ((int)((x) << 16 | (y) << 8 | (z))) - -// The code is a 16-bit integer with the first character in the high byte and the second character in the low byte. -#define NOTEQUAL_CODE GENERATE_2CHAR_CODE('!', '=') -#define PERCENTEQUAL_CODE GENERATE_2CHAR_CODE('%', '=') -#define AMPEREQUAL_CODE GENERATE_2CHAR_CODE('&', '=') -#define DOUBLESTAR_CODE GENERATE_2CHAR_CODE('*', '*') -#define STAREQUAL_CODE GENERATE_2CHAR_CODE('*', '=') -#define PLUSEQUAL_CODE GENERATE_2CHAR_CODE('+', '=') -#define MINEQUAL_CODE GENERATE_2CHAR_CODE('-', '=') -#define RARROW_CODE GENERATE_2CHAR_CODE('-', '>') -#define DOUBLESLASH_CODE GENERATE_2CHAR_CODE('/', '/') -#define SLASHEQUAL_CODE GENERATE_2CHAR_CODE('/', '=') -#define COLONEQUAL_CODE GENERATE_2CHAR_CODE(':', '=') -#define LEFTSHIFT_CODE GENERATE_2CHAR_CODE('<', '<') -#define LESSEQUAL_CODE GENERATE_2CHAR_CODE('<', '=') -#define NOTEQUAL_2_CODE GENERATE_2CHAR_CODE('<', '>') -#define EQEQUAL_CODE GENERATE_2CHAR_CODE('=', '=') -#define GREATEREQUAL_CODE GENERATE_2CHAR_CODE('>', '=') -#define RIGHTSHIFT_CODE GENERATE_2CHAR_CODE('>', '>') -#define ATEQUAL_CODE GENERATE_2CHAR_CODE('@', '=') -#define CIRCUMFLEXEQUAL_CODE GENERATE_2CHAR_CODE('^', '=') -#define VBAREQUAL_CODE GENERATE_2CHAR_CODE('|', '=') - -// The code is a 24-bit integer with the first character in the high byte, the second character in the middle byte, and the third character in the low byte. -#define DOUBLESTAREQUAL_CODE GENERATE_3CHAR_CODE('*', '*', '=') -#define ELLIPSIS_CODE GENERATE_3CHAR_CODE('.', '.', '.') -#define DOUBLESLASHEQUAL_CODE GENERATE_3CHAR_CODE('/', '/', '=') -#define LEFTSHIFTEQUAL_CODE GENERATE_3CHAR_CODE('<', '<', '=') -#define RIGHTSHIFTEQUAL_CODE GENERATE_3CHAR_CODE('>', '>', '=') - // Export these 4 symbols for 'test_peg_generator' PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ PyAPI_FUNC(int) _PyToken_OneChar(int); From 10ed2f79698651e7c79d92dd7b648205c3e3d55f Mon Sep 17 00:00:00 2001 From: DefinitelyNotAnOrca Date: Fri, 25 Oct 2024 12:16:17 -0500 Subject: [PATCH 6/7] Updated Formatting of token.c and adding comments in header file --- Include/internal/pycore_token.h | 4 +-- Parser/token.c | 48 ++++++++++++++++----------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/Include/internal/pycore_token.h b/Include/internal/pycore_token.h index 07bd52ccbd8bf7..8e1f8aef59ad3b 100644 --- a/Include/internal/pycore_token.h +++ b/Include/internal/pycore_token.h @@ -93,8 +93,8 @@ extern "C" { #define ISSTRINGLIT(x) ((x) == STRING || \ (x) == FSTRING_MIDDLE) -#define GENERATE_2CHAR_CODE(x, y) ((int)((x) << 8 | (y))) -#define GENERATE_3CHAR_CODE(x, y, z) ((int)((x) << 16 | (y) << 8 | (z))) +#define GENERATE_2CHAR_CODE(x, y) ((int)((x) << 8 | (y))) // Generate a 16-bit integer from 2 8-bit characters +#define GENERATE_3CHAR_CODE(x, y, z) ((int)((x) << 16 | (y) << 8 | (z))) // Generate a 24-bit integer from 3 8-bit characters // Export these 4 symbols for 'test_peg_generator' PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ diff --git a/Parser/token.c b/Parser/token.c index a87b5cd9be90b8..449675a0d4e12c 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -81,30 +81,30 @@ int _PyToken_OneChar(int c1) { switch (c1) { - case '!': return EXCLAMATION; - case '%': return PERCENT; - case '&': return AMPER; - case '(': return LPAR; - case ')': return RPAR; - case '*': return STAR; - case '+': return PLUS; - case ',': return COMMA; - case '-': return MINUS; - case '.': return DOT; - case '/': return SLASH; - case ':': return COLON; - case ';': return SEMI; - case '<': return LESS; - case '=': return EQUAL; - case '>': return GREATER; - case '@': return AT; - case '[': return LSQB; - case ']': return RSQB; - case '^': return CIRCUMFLEX; - case '{': return LBRACE; - case '|': return VBAR; - case '}': return RBRACE; - case '~': return TILDE; + case '!': return EXCLAMATION; + case '%': return PERCENT; + case '&': return AMPER; + case '(': return LPAR; + case ')': return RPAR; + case '*': return STAR; + case '+': return PLUS; + case ',': return COMMA; + case '-': return MINUS; + case '.': return DOT; + case '/': return SLASH; + case ':': return COLON; + case ';': return SEMI; + case '<': return LESS; + case '=': return EQUAL; + case '>': return GREATER; + case '@': return AT; + case '[': return LSQB; + case ']': return RSQB; + case '^': return CIRCUMFLEX; + case '{': return LBRACE; + case '|': return VBAR; + case '}': return RBRACE; + case '~': return TILDE; } return OP; } From ff781c88ea15b17c9466871f575eac6ce59dd027 Mon Sep 17 00:00:00 2001 From: DefinitelyNotAnOrca Date: Fri, 25 Oct 2024 14:51:27 -0500 Subject: [PATCH 7/7] Updated generate_token.py to generate the correct token.c --- Parser/token.c | 12 ++++------ Tools/build/generate_token.py | 44 ++++++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/Parser/token.c b/Parser/token.c index 449675a0d4e12c..609d484840cb82 100644 --- a/Parser/token.c +++ b/Parser/token.c @@ -109,13 +109,11 @@ _PyToken_OneChar(int c1) return OP; } - int _PyToken_TwoChars(int c1, int c2) { - switch (GENERATE_2CHAR_CODE(c1, c2)) { // Combine the two tokens into a 16-bit integer - case GENERATE_2CHAR_CODE('!', '='): - case GENERATE_2CHAR_CODE('<', '>'): return NOTEQUAL; + switch (GENERATE_2CHAR_CODE(c1, c2)) { + case GENERATE_2CHAR_CODE('!', '='): return NOTEQUAL; case GENERATE_2CHAR_CODE('%', '='): return PERCENTEQUAL; case GENERATE_2CHAR_CODE('&', '='): return AMPEREQUAL; case GENERATE_2CHAR_CODE('*', '*'): return DOUBLESTAR; @@ -128,6 +126,7 @@ _PyToken_TwoChars(int c1, int c2) case GENERATE_2CHAR_CODE(':', '='): return COLONEQUAL; case GENERATE_2CHAR_CODE('<', '<'): return LEFTSHIFT; case GENERATE_2CHAR_CODE('<', '='): return LESSEQUAL; + case GENERATE_2CHAR_CODE('<', '>'): return NOTEQUAL; case GENERATE_2CHAR_CODE('=', '='): return EQEQUAL; case GENERATE_2CHAR_CODE('>', '='): return GREATEREQUAL; case GENERATE_2CHAR_CODE('>', '>'): return RIGHTSHIFT; @@ -138,13 +137,10 @@ _PyToken_TwoChars(int c1, int c2) return OP; } - - - int _PyToken_ThreeChars(int c1, int c2, int c3) { - switch (GENERATE_3CHAR_CODE(c1, c2, c3)) { // Combine the three tokens into a 24-bit integer + switch (GENERATE_3CHAR_CODE(c1, c2, c3)) { case GENERATE_3CHAR_CODE('*', '*', '='): return DOUBLESTAREQUAL; case GENERATE_3CHAR_CODE('.', '.', '.'): return ELLIPSIS; case GENERATE_3CHAR_CODE('/', '/', '='): return DOUBLESLASHEQUAL; diff --git a/Tools/build/generate_token.py b/Tools/build/generate_token.py index 16c38841e44a4d..5b8d1c331fe277 100755 --- a/Tools/build/generate_token.py +++ b/Tools/build/generate_token.py @@ -83,6 +83,8 @@ def update_file(file, content): #define ISSTRINGLIT(x) ((x) == STRING || \\ (x) == FSTRING_MIDDLE) +#define GENERATE_2CHAR_CODE(x, y) ((int)((x) << 8 | (y))) // Generate a 16-bit integer from 2 8-bit characters +#define GENERATE_3CHAR_CODE(x, y, z) ((int)((x) << 16 | (y) << 8 | (z))) // Generate a 24-bit integer from 3 8-bit characters // Export these 4 symbols for 'test_peg_generator' PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ @@ -149,6 +151,31 @@ def make_h(infile, outfile='Include/internal/pycore_token.h'): } """ +def generate_one_char_tokens(tokens): + result = [] + result.append(' switch (c1) {\n') + for c, name in sorted(tokens.items()): + result.append(" case '%s': return %s;\n" % (c, name)) + result.append(' }\n') + return ''.join(result) + +def generate_two_char_tokens(tokens): + result = [] + result.append(' switch (GENERATE_2CHAR_CODE(c1, c2)) {\n') + for (c1, c2), name in sorted(tokens.items()): + result.append(" case GENERATE_2CHAR_CODE('%s', '%s'): return %s;\n" % (c1, c2, name)) + result.append(' }\n') + return ''.join(result) + +def generate_three_char_tokens(tokens): + result = [] + result.append(' switch (GENERATE_3CHAR_CODE(c1, c2, c3)) {\n') + for (c1, c2, c3), name in sorted(tokens.items()): + result.append(" case GENERATE_3CHAR_CODE('%s', '%s', '%s'): return %s;\n" % (c1, c2, c3, name)) + result.append(' }\n') + return ''.join(result) + + def generate_chars_to_token(mapping, n=1): result = [] write = result.append @@ -172,14 +199,15 @@ def generate_chars_to_token(mapping, n=1): def make_c(infile, outfile='Parser/token.c'): tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) string_to_tok['<>'] = string_to_tok['!='] - chars_to_token = {} + chars_to_token = { + 1: {}, + 2: {}, + 3: {}, + } for string, value in string_to_tok.items(): assert 1 <= len(string) <= 3 name = tok_names[value] - m = chars_to_token.setdefault(len(string), {}) - for c in string[:-1]: - m = m.setdefault(c, {}) - m[string[-1]] = name + chars_to_token[len(string)][string] = name names = [] for value, name in enumerate(tok_names): @@ -190,9 +218,9 @@ def make_c(infile, outfile='Parser/token.c'): if update_file(outfile, token_c_template % ( ''.join(names), - generate_chars_to_token(chars_to_token[1]), - generate_chars_to_token(chars_to_token[2]), - generate_chars_to_token(chars_to_token[3]) + generate_one_char_tokens(chars_to_token[1]), + generate_two_char_tokens(chars_to_token[2]), + generate_three_char_tokens(chars_to_token[3]) )): print("%s regenerated from %s" % (outfile, infile))