Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-30465: Fix lineno and col_offset in fstring AST nodes #1800

Merged
merged 1 commit into from
Sep 7, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 248 additions & 0 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,253 @@ def __call__(self):
# Make sure x was called.
self.assertTrue(x.called)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about some test that checks the output of eval()? This code doesn't work as-is, because the NameError doesn't include the line number. But I assume there's a way to capture it and inspect it.

I suggest this because this is the behavior that the end user really cares about.

    def test_error_line_numbers(self):
        expr = '''f"""
some text
other text
{x}
"""'''
        with self.assertRaisesRegex(NameError, ', line 5,'):
            eval(expr)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait, never mind. I was conflating this with bpo-31140.

def test_ast_line_numbers(self):
expr = """
a = 10
f'{a * x()}'"""
t = ast.parse(expr)
self.assertEqual(type(t), ast.Module)
self.assertEqual(len(t.body), 2)
# check `a = 10`
self.assertEqual(type(t.body[0]), ast.Assign)
self.assertEqual(t.body[0].lineno, 2)
# check `f'...'`
self.assertEqual(type(t.body[1]), ast.Expr)
self.assertEqual(type(t.body[1].value), ast.JoinedStr)
self.assertEqual(len(t.body[1].value.values), 1)
self.assertEqual(type(t.body[1].value.values[0]), ast.FormattedValue)
self.assertEqual(t.body[1].lineno, 3)
self.assertEqual(t.body[1].value.lineno, 3)
self.assertEqual(t.body[1].value.values[0].lineno, 3)
# check the binop location
binop = t.body[1].value.values[0].value
self.assertEqual(type(binop), ast.BinOp)
self.assertEqual(type(binop.left), ast.Name)
self.assertEqual(type(binop.op), ast.Mult)
self.assertEqual(type(binop.right), ast.Call)
self.assertEqual(binop.lineno, 3)
self.assertEqual(binop.left.lineno, 3)
self.assertEqual(binop.right.lineno, 3)
self.assertEqual(binop.col_offset, 3)
self.assertEqual(binop.left.col_offset, 3)
self.assertEqual(binop.right.col_offset, 7)

def test_ast_line_numbers_multiple_formattedvalues(self):
expr = """
f'no formatted values'
f'eggs {a * x()} spam {b + y()}'"""
t = ast.parse(expr)
self.assertEqual(type(t), ast.Module)
self.assertEqual(len(t.body), 2)
# check `f'no formatted value'`
self.assertEqual(type(t.body[0]), ast.Expr)
self.assertEqual(type(t.body[0].value), ast.JoinedStr)
self.assertEqual(t.body[0].lineno, 2)
# check `f'...'`
self.assertEqual(type(t.body[1]), ast.Expr)
self.assertEqual(type(t.body[1].value), ast.JoinedStr)
self.assertEqual(len(t.body[1].value.values), 4)
self.assertEqual(type(t.body[1].value.values[0]), ast.Str)
self.assertEqual(type(t.body[1].value.values[1]), ast.FormattedValue)
self.assertEqual(type(t.body[1].value.values[2]), ast.Str)
self.assertEqual(type(t.body[1].value.values[3]), ast.FormattedValue)
self.assertEqual(t.body[1].lineno, 3)
self.assertEqual(t.body[1].value.lineno, 3)
self.assertEqual(t.body[1].value.values[0].lineno, 3)
self.assertEqual(t.body[1].value.values[1].lineno, 3)
self.assertEqual(t.body[1].value.values[2].lineno, 3)
self.assertEqual(t.body[1].value.values[3].lineno, 3)
# check the first binop location
binop1 = t.body[1].value.values[1].value
self.assertEqual(type(binop1), ast.BinOp)
self.assertEqual(type(binop1.left), ast.Name)
self.assertEqual(type(binop1.op), ast.Mult)
self.assertEqual(type(binop1.right), ast.Call)
self.assertEqual(binop1.lineno, 3)
self.assertEqual(binop1.left.lineno, 3)
self.assertEqual(binop1.right.lineno, 3)
self.assertEqual(binop1.col_offset, 8)
self.assertEqual(binop1.left.col_offset, 8)
self.assertEqual(binop1.right.col_offset, 12)
# check the second binop location
binop2 = t.body[1].value.values[3].value
self.assertEqual(type(binop2), ast.BinOp)
self.assertEqual(type(binop2.left), ast.Name)
self.assertEqual(type(binop2.op), ast.Add)
self.assertEqual(type(binop2.right), ast.Call)
self.assertEqual(binop2.lineno, 3)
self.assertEqual(binop2.left.lineno, 3)
self.assertEqual(binop2.right.lineno, 3)
self.assertEqual(binop2.col_offset, 23)
self.assertEqual(binop2.left.col_offset, 23)
self.assertEqual(binop2.right.col_offset, 27)

def test_ast_line_numbers_nested(self):
expr = """
a = 10
f'{a * f"-{x()}-"}'"""
t = ast.parse(expr)
self.assertEqual(type(t), ast.Module)
self.assertEqual(len(t.body), 2)
# check `a = 10`
self.assertEqual(type(t.body[0]), ast.Assign)
self.assertEqual(t.body[0].lineno, 2)
# check `f'...'`
self.assertEqual(type(t.body[1]), ast.Expr)
self.assertEqual(type(t.body[1].value), ast.JoinedStr)
self.assertEqual(len(t.body[1].value.values), 1)
self.assertEqual(type(t.body[1].value.values[0]), ast.FormattedValue)
self.assertEqual(t.body[1].lineno, 3)
self.assertEqual(t.body[1].value.lineno, 3)
self.assertEqual(t.body[1].value.values[0].lineno, 3)
# check the binop location
binop = t.body[1].value.values[0].value
self.assertEqual(type(binop), ast.BinOp)
self.assertEqual(type(binop.left), ast.Name)
self.assertEqual(type(binop.op), ast.Mult)
self.assertEqual(type(binop.right), ast.JoinedStr)
self.assertEqual(binop.lineno, 3)
self.assertEqual(binop.left.lineno, 3)
self.assertEqual(binop.right.lineno, 3)
self.assertEqual(binop.col_offset, 3)
self.assertEqual(binop.left.col_offset, 3)
self.assertEqual(binop.right.col_offset, 7)
# check the nested call location
self.assertEqual(len(binop.right.values), 3)
self.assertEqual(type(binop.right.values[0]), ast.Str)
self.assertEqual(type(binop.right.values[1]), ast.FormattedValue)
self.assertEqual(type(binop.right.values[2]), ast.Str)
self.assertEqual(binop.right.values[0].lineno, 3)
self.assertEqual(binop.right.values[1].lineno, 3)
self.assertEqual(binop.right.values[2].lineno, 3)
call = binop.right.values[1].value
self.assertEqual(type(call), ast.Call)
self.assertEqual(call.lineno, 3)
self.assertEqual(call.col_offset, 11)

def test_ast_line_numbers_duplicate_expression(self):
"""Duplicate expression

NOTE: this is currently broken, always sets location of the first
expression.
"""
expr = """
a = 10
f'{a * x()} {a * x()} {a * x()}'
"""
t = ast.parse(expr)
self.assertEqual(type(t), ast.Module)
self.assertEqual(len(t.body), 2)
# check `a = 10`
self.assertEqual(type(t.body[0]), ast.Assign)
self.assertEqual(t.body[0].lineno, 2)
# check `f'...'`
self.assertEqual(type(t.body[1]), ast.Expr)
self.assertEqual(type(t.body[1].value), ast.JoinedStr)
self.assertEqual(len(t.body[1].value.values), 5)
self.assertEqual(type(t.body[1].value.values[0]), ast.FormattedValue)
self.assertEqual(type(t.body[1].value.values[1]), ast.Str)
self.assertEqual(type(t.body[1].value.values[2]), ast.FormattedValue)
self.assertEqual(type(t.body[1].value.values[3]), ast.Str)
self.assertEqual(type(t.body[1].value.values[4]), ast.FormattedValue)
self.assertEqual(t.body[1].lineno, 3)
self.assertEqual(t.body[1].value.lineno, 3)
self.assertEqual(t.body[1].value.values[0].lineno, 3)
self.assertEqual(t.body[1].value.values[1].lineno, 3)
self.assertEqual(t.body[1].value.values[2].lineno, 3)
self.assertEqual(t.body[1].value.values[3].lineno, 3)
self.assertEqual(t.body[1].value.values[4].lineno, 3)
# check the first binop location
binop = t.body[1].value.values[0].value
self.assertEqual(type(binop), ast.BinOp)
self.assertEqual(type(binop.left), ast.Name)
self.assertEqual(type(binop.op), ast.Mult)
self.assertEqual(type(binop.right), ast.Call)
self.assertEqual(binop.lineno, 3)
self.assertEqual(binop.left.lineno, 3)
self.assertEqual(binop.right.lineno, 3)
self.assertEqual(binop.col_offset, 3)
self.assertEqual(binop.left.col_offset, 3)
self.assertEqual(binop.right.col_offset, 7)
# check the second binop location
binop = t.body[1].value.values[2].value
self.assertEqual(type(binop), ast.BinOp)
self.assertEqual(type(binop.left), ast.Name)
self.assertEqual(type(binop.op), ast.Mult)
self.assertEqual(type(binop.right), ast.Call)
self.assertEqual(binop.lineno, 3)
self.assertEqual(binop.left.lineno, 3)
self.assertEqual(binop.right.lineno, 3)
self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong
self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong
self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong
# check the third binop location
binop = t.body[1].value.values[4].value
self.assertEqual(type(binop), ast.BinOp)
self.assertEqual(type(binop.left), ast.Name)
self.assertEqual(type(binop.op), ast.Mult)
self.assertEqual(type(binop.right), ast.Call)
self.assertEqual(binop.lineno, 3)
self.assertEqual(binop.left.lineno, 3)
self.assertEqual(binop.right.lineno, 3)
self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong
self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong
self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong

def test_ast_line_numbers_multiline_fstring(self):
# FIXME: This test demonstrates invalid behavior due to JoinedStr's
# immediate child nodes containing the wrong lineno. The enclosed
# expressions have valid line information and column offsets.
# See bpo-16806 and bpo-30465 for details.
expr = """
a = 10
f'''
{a
*
x()}
non-important content
'''
"""
t = ast.parse(expr)
self.assertEqual(type(t), ast.Module)
self.assertEqual(len(t.body), 2)
# check `a = 10`
self.assertEqual(type(t.body[0]), ast.Assign)
self.assertEqual(t.body[0].lineno, 2)
# check `f'...'`
self.assertEqual(type(t.body[1]), ast.Expr)
self.assertEqual(type(t.body[1].value), ast.JoinedStr)
self.assertEqual(len(t.body[1].value.values), 3)
self.assertEqual(type(t.body[1].value.values[0]), ast.Str)
self.assertEqual(type(t.body[1].value.values[1]), ast.FormattedValue)
self.assertEqual(type(t.body[1].value.values[2]), ast.Str)
# NOTE: the following invalid behavior is described in bpo-16806.
# - line number should be the *first* line (3), not the *last* (8)
# - column offset should not be -1
self.assertEqual(t.body[1].lineno, 8)
self.assertEqual(t.body[1].value.lineno, 8)
self.assertEqual(t.body[1].value.values[0].lineno, 8)
self.assertEqual(t.body[1].value.values[1].lineno, 8)
self.assertEqual(t.body[1].value.values[2].lineno, 8)
self.assertEqual(t.body[1].col_offset, -1)
self.assertEqual(t.body[1].value.col_offset, -1)
self.assertEqual(t.body[1].value.values[0].col_offset, -1)
self.assertEqual(t.body[1].value.values[1].col_offset, -1)
self.assertEqual(t.body[1].value.values[2].col_offset, -1)
# NOTE: the following lineno information and col_offset is correct for
# expressions within FormattedValues.
binop = t.body[1].value.values[1].value
self.assertEqual(type(binop), ast.BinOp)
self.assertEqual(type(binop.left), ast.Name)
self.assertEqual(type(binop.op), ast.Mult)
self.assertEqual(type(binop.right), ast.Call)
self.assertEqual(binop.lineno, 4)
self.assertEqual(binop.left.lineno, 4)
self.assertEqual(binop.right.lineno, 6)
self.assertEqual(binop.col_offset, 3)
self.assertEqual(binop.left.col_offset, 3)
self.assertEqual(binop.right.col_offset, 7)

def test_docstring(self):
def f():
f'''Not a docstring'''
Expand Down Expand Up @@ -786,5 +1033,6 @@ def test_backslash_char(self):
self.assertEqual(eval('f"\\\n"'), '')
self.assertEqual(eval('f"\\\r"'), '')


if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Location information (``lineno`` and ``col_offset``) in f-strings is now
(mostly) correct. This fixes tools like flake8 from showing warnings on the
wrong line (typically the first line of the file).
72 changes: 67 additions & 5 deletions Python/ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "node.h"
#include "ast.h"
#include "token.h"
#include "pythonrun.h"

#include <assert.h>

Expand Down Expand Up @@ -4270,6 +4271,55 @@ decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
return result;
}

/* Shift locations for the given node and all its children by adding `lineno`
and `col_offset` to existing locations. */
static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
{
n->n_col_offset = n->n_col_offset + col_offset;
for (int i = 0; i < NCH(n); ++i) {
if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When n->n_lineno is zero?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some nodes don't contain col_offset and lineno information at all. I don't recall now which ones those are but the tests were failing without this additional check.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should n->n_col_offset (above) and n->n_lineno (below) be updated for these nodes? They become non-zero, but does the new value make sense?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

n->n_lineno is zero for eval_input nodes only. Interestingly, n->n_col_offset is uninitialized then (holds value -875836469 which is 0xCBCBCBCB..., one of the magic byte patterns debug PyMalloc initializes fresh memory, too). So, yes, I think setting new values is better than leaving garbage coordinates.

/* Shifting column offsets unnecessary if there's been newlines. */
col_offset = 0;
}
fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
}
n->n_lineno = n->n_lineno + lineno;
}

/* Fix locations for the given node and its children.

`parent` is the enclosing node.
`n` is the node which locations are going to be fixed relative to parent.
`expr_str` is the child node's string representation, incuding braces.
*/
static void
fstring_fix_node_location(const node *parent, node *n, char *expr_str)
{
char *substr = NULL;
char *start;
int lines = LINENO(parent) - 1;
int cols = parent->n_col_offset;
/* Find the full fstring to fix location information in `n`. */
while (parent && parent->n_type != STRING)
parent = parent->n_child;
if (parent && parent->n_str) {
substr = strstr(parent->n_str, expr_str);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if the f-string contains several equivalent subexpressions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then we'll report line number and column information for the first occurence in all occurences. This is not currently solvable without tracking already applied occurences, etc. I don't think it's worth fixing, that would complicate this patch.

If we ever redo f-strings to be untokenized properly on first pass, then this will work as expected. The hack to do a separate pass on formatted values is the reason why this entire workaround is necessary in the first place.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't expr_start/expr_end give a position of a subexpression in the string?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried that. Unfortunately, you'd need to pass the entire string as a new argument through all the fstring_* functions to make this work. Even then, the full string doesn't have the string prefix or the leading quote. That can be fixed, but even then, the full string is different from expr_start if compile-time concatenation is used.

In other words, this edge case requires an inordinate amount of work to get right. Let's treat this as a separate improvement in the future?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that it's not worth complicating the implementation. Especially because the largest use case is to get error reporting working, and I don't see how the error could be on a second instance of the exact same expression.

if (substr) {
start = substr;
while (start > parent->n_str) {
if (start[0] == '\n')
break;
start--;
}
cols += substr - start;
/* Fix lineno in mulitline strings. */
while ((substr = strchr(substr + 1, '\n')))
lines--;
}
}
fstring_shift_node_locations(n, lines, cols);
}

/* Compile this expression in to an expr_ty. Add parens around the
expression, in order to allow leading spaces in the expression. */
static expr_ty
Expand All @@ -4278,6 +4328,7 @@ fstring_compile_expr(const char *expr_start, const char *expr_end,

{
PyCompilerFlags cf;
node *mod_n;
mod_ty mod;
char *str;
Py_ssize_t len;
Expand All @@ -4287,9 +4338,10 @@ fstring_compile_expr(const char *expr_start, const char *expr_end,
assert(*(expr_start-1) == '{');
assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');

/* If the substring is all whitespace, it's an error. We need to catch
this here, and not when we call PyParser_ASTFromString, because turning
the expression '' in to '()' would go from being invalid to valid. */
/* If the substring is all whitespace, it's an error. We need to catch this
here, and not when we call PyParser_SimpleParseStringFlagsFilename,
because turning the expression '' in to '()' would go from being invalid
to valid. */
for (s = expr_start; s != expr_end; s++) {
char c = *s;
/* The Python parser ignores only the following whitespace
Expand All @@ -4315,9 +4367,19 @@ fstring_compile_expr(const char *expr_start, const char *expr_end,
str[len+2] = 0;

cf.cf_flags = PyCF_ONLY_AST;
mod = PyParser_ASTFromString(str, "<fstring>",
Py_eval_input, &cf, c->c_arena);
mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mentioning PyParser_ASTFromString() in the comment above no longer valid.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, will fix.

Py_eval_input, 0);
if (!mod_n) {
PyMem_RawFree(str);
return NULL;
}
/* Reuse str to find the correct column offset. */
str[0] = '{';
str[len+1] = '}';
fstring_fix_node_location(n, mod_n, str);
mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
PyMem_RawFree(str);
PyNode_Free(mod_n);
if (!mod)
return NULL;
return mod->v.Expression.body;
Expand Down