-
-
Notifications
You must be signed in to change notification settings - Fork 33.2k
gh-121130: Fix f-string format specifiers with debug expressions #121150
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c371cc9
89bd65a
f4aed72
93ab4a6
b97f4fd
3333a49
899cf9a
4114e0c
88a0296
e6e0f26
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| Fix f-strings with debug expressions in format specifiers. Patch by Pablo | ||
| Galindo |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -967,6 +967,8 @@ _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv) | |
| return result_token_with_metadata(p, conv, conv_token->metadata); | ||
| } | ||
|
|
||
| static asdl_expr_seq * | ||
| unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions); | ||
| ResultTokenWithMetadata * | ||
| _PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset, | ||
| int end_lineno, int end_col_offset, PyArena *arena) | ||
|
|
@@ -1005,8 +1007,15 @@ _PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, in | |
| assert(j == non_empty_count); | ||
| spec = resized_spec; | ||
| } | ||
| expr_ty res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno, | ||
| end_col_offset, p->arena); | ||
| expr_ty res; | ||
| if (asdl_seq_LEN(spec) == 0) { | ||
| res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno, | ||
| end_col_offset, p->arena); | ||
| } else { | ||
| res = _PyPegen_concatenate_strings(p, spec, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code is so we merge and concatenate the Constant and JoinedStr nodes. See how the tree originally looks like here: #121150 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand why the tree in #121150 is false, but also running that same example with the latest status of your branch gives me the same exact one. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry for the confusion. That tree is the correct one, but without this call it basically has a bunch of Constants all together and the Joined strings are nested (comment the code and check it out to see what I mean). |
||
| lineno, col_offset, end_lineno, | ||
| end_col_offset, arena); | ||
| } | ||
| if (!res) { | ||
| return NULL; | ||
| } | ||
|
|
@@ -1306,6 +1315,7 @@ unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions) | |
|
|
||
| expr_ty | ||
| _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) { | ||
|
|
||
| asdl_expr_seq *expr = unpack_top_level_joined_strs(p, raw_expressions); | ||
| Py_ssize_t n_items = asdl_seq_LEN(expr); | ||
|
|
||
|
|
@@ -1470,7 +1480,6 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, Re | |
| debug_end_offset = end_col_offset; | ||
| debug_metadata = closing_brace->metadata; | ||
| } | ||
|
|
||
| expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line, | ||
| debug_end_offset - 1, p->arena); | ||
| if (!debug_text) { | ||
|
|
@@ -1503,16 +1512,23 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, | |
| Py_ssize_t n_flattened_elements = 0; | ||
| for (i = 0; i < len; i++) { | ||
| expr_ty elem = asdl_seq_GET(strings, i); | ||
| if (elem->kind == Constant_kind) { | ||
| if (PyBytes_CheckExact(elem->v.Constant.value)) { | ||
| bytes_found = 1; | ||
| } else { | ||
| unicode_string_found = 1; | ||
| } | ||
| n_flattened_elements++; | ||
| } else { | ||
| n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values); | ||
| f_string_found = 1; | ||
| switch(elem->kind) { | ||
| case Constant_kind: | ||
| if (PyBytes_CheckExact(elem->v.Constant.value)) { | ||
| bytes_found = 1; | ||
| } else { | ||
| unicode_string_found = 1; | ||
| } | ||
| n_flattened_elements++; | ||
| break; | ||
| case JoinedStr_kind: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code is to accommodate the case when we call this function with other things than There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How would this happen? It probably shouldn't, but I may be missing something. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because I am flattening collections that contain new nodes in https://github.com/python/cpython/pull/121150/files#r1662410566 (it's a new call over a new array that was not happening before) |
||
| n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values); | ||
| f_string_found = 1; | ||
| break; | ||
| default: | ||
| n_flattened_elements++; | ||
| f_string_found = 1; | ||
| break; | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1554,16 +1570,19 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, | |
| Py_ssize_t j = 0; | ||
| for (i = 0; i < len; i++) { | ||
| expr_ty elem = asdl_seq_GET(strings, i); | ||
| if (elem->kind == Constant_kind) { | ||
| asdl_seq_SET(flattened, current_pos++, elem); | ||
| } else { | ||
| for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) { | ||
| expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j); | ||
| if (subvalue == NULL) { | ||
| return NULL; | ||
| switch(elem->kind) { | ||
| case JoinedStr_kind: | ||
| for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) { | ||
| expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j); | ||
| if (subvalue == NULL) { | ||
| return NULL; | ||
| } | ||
| asdl_seq_SET(flattened, current_pos++, subvalue); | ||
| } | ||
| asdl_seq_SET(flattened, current_pos++, subvalue); | ||
| } | ||
| break; | ||
| default: | ||
| asdl_seq_SET(flattened, current_pos++, elem); | ||
| break; | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -989,6 +989,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t | |
| the_current_tok->last_expr_buffer = NULL; | ||
| the_current_tok->last_expr_size = 0; | ||
| the_current_tok->last_expr_end = -1; | ||
| the_current_tok->in_format_spec = 0; | ||
| the_current_tok->f_string_debug = 0; | ||
|
|
||
| switch (*tok->start) { | ||
|
|
@@ -1137,15 +1138,20 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t | |
| * by the `{` case, so for ensuring that we are on the 0th level, we need | ||
| * to adjust it manually */ | ||
| int cursor = current_tok->curly_bracket_depth - (c != '{'); | ||
| if (cursor == 0 && !_PyLexer_update_fstring_expr(tok, c)) { | ||
| int in_format_spec = current_tok->in_format_spec; | ||
| int cursor_in_format_with_debug = | ||
| cursor == 1 && (current_tok->f_string_debug || in_format_spec); | ||
| int cursor_valid = cursor == 0 || cursor_in_format_with_debug; | ||
| if ((cursor_valid) && !_PyLexer_update_fstring_expr(tok, c)) { | ||
| return MAKE_TOKEN(ENDMARKER); | ||
| } | ||
| if (cursor == 0 && c != '{' && set_fstring_expr(tok, token, c)) { | ||
| if ((cursor_valid) && c != '{' && set_fstring_expr(tok, token, c)) { | ||
| return MAKE_TOKEN(ERRORTOKEN); | ||
| } | ||
|
|
||
| if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) { | ||
| current_tok->kind = TOK_FSTRING_MODE; | ||
| current_tok->in_format_spec = 1; | ||
| p_start = tok->start; | ||
| p_end = tok->cur; | ||
| return MAKE_TOKEN(_PyToken_OneChar(c)); | ||
|
|
@@ -1235,6 +1241,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t | |
| if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) { | ||
| current_tok->curly_bracket_expr_start_depth--; | ||
| current_tok->kind = TOK_FSTRING_MODE; | ||
| current_tok->in_format_spec = 0; | ||
| current_tok->f_string_debug = 0; | ||
| } | ||
| } | ||
|
|
@@ -1317,11 +1324,11 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct | |
| tok->multi_line_start = tok->line_start; | ||
| while (end_quote_size != current_tok->f_string_quote_size) { | ||
| int c = tok_nextc(tok); | ||
| if (tok->done == E_ERROR) { | ||
| if (tok->done == E_ERROR || tok->done == E_DECODE) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was a bug, as if we enter There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Recently came across this in another branch I'm working on. If we check for this here, we can probably remove the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea, will do |
||
| return MAKE_TOKEN(ERRORTOKEN); | ||
| } | ||
| int in_format_spec = ( | ||
| current_tok->last_expr_end != -1 | ||
| current_tok->in_format_spec | ||
| && | ||
| INSIDE_FSTRING_EXPR(current_tok) | ||
| ); | ||
|
|
@@ -1337,6 +1344,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct | |
| if (in_format_spec && c == '\n') { | ||
| tok_backup(tok, c); | ||
| TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; | ||
| current_tok->in_format_spec = 0; | ||
| p_start = tok->start; | ||
| p_end = tok->cur; | ||
| return MAKE_TOKEN(FSTRING_MIDDLE); | ||
|
|
@@ -1378,6 +1386,9 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct | |
| } | ||
|
|
||
| if (c == '{') { | ||
| if (!_PyLexer_update_fstring_expr(tok, c)) { | ||
| return MAKE_TOKEN(ENDMARKER); | ||
| } | ||
| int peek = tok_nextc(tok); | ||
| if (peek != '{' || in_format_spec) { | ||
| tok_backup(tok, peek); | ||
|
|
@@ -1387,6 +1398,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct | |
| return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expressions nested too deeply")); | ||
| } | ||
| TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; | ||
| current_tok->in_format_spec = 0; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this always be 0 here, if we've correctly reset it when the format spec is ending? |
||
| p_start = tok->start; | ||
| p_end = tok->cur; | ||
| } else { | ||
|
|
@@ -1406,13 +1418,15 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct | |
| // scanning (indicated by the end of the expression being set) and we are not at the top level | ||
| // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double | ||
| // brackets, we can bypass it here. | ||
| if (peek == '}' && !in_format_spec) { | ||
| int cursor = current_tok->curly_bracket_depth; | ||
| if (peek == '}' && !in_format_spec && cursor == 0) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need the cursor check here? |
||
| p_start = tok->start; | ||
| p_end = tok->cur - 1; | ||
| } else { | ||
| tok_backup(tok, peek); | ||
| tok_backup(tok, c); | ||
| TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; | ||
| current_tok->in_format_spec = 0; | ||
| p_start = tok->start; | ||
| p_end = tok->cur; | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.