Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Grammar/python.gram
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@ invalid_assignment:
| expression ':' expression ['=' annotated_rhs] {
RAISE_SYNTAX_ERROR("illegal target for annotation") }
| a=expression ('=' | augassign) (yield_expr | star_expressions) {
RAISE_SYNTAX_ERROR("cannot assign to %s", _PyPegen_get_expr_name(a)) }
RAISE_SYNTAX_ERROR_NO_COL_OFFSET("cannot assign to %s", _PyPegen_get_expr_name(a)) }
invalid_block:
| NEWLINE !INDENT { RAISE_INDENTATION_ERROR("expected an indented block") }
invalid_comprehension:
Expand Down
6 changes: 3 additions & 3 deletions Lib/test/test_cmd_line_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ def test_syntaxerror_unindented_caret_position(self):
exitcode, stdout, stderr = assert_python_failure(script_name)
text = io.TextIOWrapper(io.BytesIO(stderr), 'ascii').read()
# Confirm that the caret is located under the first 1 character
self.assertIn("\n 1 + 1 = 2\n ^", text)
self.assertIn("\n 1 + 1 = 2\n ^", text)

def test_syntaxerror_indented_caret_position(self):
script = textwrap.dedent("""\
Expand All @@ -611,7 +611,7 @@ def test_syntaxerror_indented_caret_position(self):
exitcode, stdout, stderr = assert_python_failure(script_name)
text = io.TextIOWrapper(io.BytesIO(stderr), 'ascii').read()
# Confirm that the caret is located under the first 1 character
self.assertIn("\n 1 + 1 = 2\n ^", text)
self.assertIn("\n 1 + 1 = 2\n ^", text)

# Try the same with a form feed at the start of the indented line
script = (
Expand All @@ -622,7 +622,7 @@ def test_syntaxerror_indented_caret_position(self):
exitcode, stdout, stderr = assert_python_failure(script_name)
text = io.TextIOWrapper(io.BytesIO(stderr), "ascii").read()
self.assertNotIn("\f", text)
self.assertIn("\n 1 + 1 = 2\n ^", text)
self.assertIn("\n 1 + 1 = 2\n ^", text)

def test_syntaxerror_multi_line_fstring(self):
script = 'foo = f"""{}\nfoo"""\n'
Expand Down
42 changes: 21 additions & 21 deletions Lib/test/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,19 +178,19 @@ def ckmsg(src, msg, exception=SyntaxError):
s = '''if True:\n print()\n\texec "mixed tabs and spaces"'''
ckmsg(s, "inconsistent use of tabs and spaces in indentation", TabError)

@support.skip_if_new_parser("Pegen column offsets might be different")
def testSyntaxErrorOffset(self):
def check(src, lineno, offset, encoding='utf-8'):
with self.assertRaises(SyntaxError) as cm:
compile(src, '<fragment>', 'exec')
self.assertEqual(cm.exception.lineno, lineno)
self.assertEqual(cm.exception.offset, offset)
if cm.exception.text is not None:
if not isinstance(src, str):
src = src.decode(encoding, 'replace')
line = src.split('\n')[lineno-1]
self.assertIn(line, cm.exception.text)
def check(self, src, lineno, offset, encoding='utf-8'):
with self.assertRaises(SyntaxError) as cm:
compile(src, '<fragment>', 'exec')
self.assertEqual(cm.exception.lineno, lineno)
self.assertEqual(cm.exception.offset, offset)
if cm.exception.text is not None:
if not isinstance(src, str):
src = src.decode(encoding, 'replace')
line = src.split('\n')[lineno-1]
self.assertIn(line, cm.exception.text)

def testSyntaxErrorOffset(self):
check = self.check
check('def fact(x):\n\treturn x!\n', 2, 10)
check('1 +\n', 1, 4)
check('def spam():\n print(1)\n print(2)', 3, 10)
Expand Down Expand Up @@ -238,20 +238,20 @@ def baz():
check('nonlocal x', 1, 1)
check('def f():\n global x\n nonlocal x', 2, 3)

# Errors thrown by ast.c
check('for 1 in []: pass', 1, 5)
check('def f(*):\n pass', 1, 7)
check('[*x for x in xs]', 1, 2)
check('def f():\n x, y: int', 2, 3)
check('(yield i) = 2', 1, 1)
check('foo(x for x in range(10), 100)', 1, 5)
check('foo(1=2)', 1, 5)

# Errors thrown by future.c
check('from __future__ import doesnt_exist', 1, 1)
check('from __future__ import braces', 1, 1)
check('x=1\nfrom __future__ import division', 2, 1)

@support.skip_if_new_parser("Pegen column offsets might be different")
def testSyntaxErrorOffsetCustom(self):
self.check('for 1 in []: pass', 1, 5)
self.check('def f(*):\n pass', 1, 7)
self.check('[*x for x in xs]', 1, 2)
self.check('def f():\n x, y: int', 2, 3)
self.check('(yield i) = 2', 1, 1)
self.check('foo(x for x in range(10), 100)', 1, 5)
self.check('foo(1=2)', 1, 5)

@cpython_only
def testSettingException(self):
Expand Down
2 changes: 1 addition & 1 deletion Parser/pegen/parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -10515,7 +10515,7 @@ invalid_assignment_rule(Parser *p)
(_tmp_132_var = _tmp_132_rule(p))
)
{
res = RAISE_SYNTAX_ERROR ( "cannot assign to %s" , _PyPegen_get_expr_name ( a ) );
res = RAISE_SYNTAX_ERROR_NO_COL_OFFSET ( "cannot assign to %s" , _PyPegen_get_expr_name ( a ) );
if (res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
return NULL;
Expand Down
127 changes: 47 additions & 80 deletions Parser/pegen/pegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,15 @@ byte_offset_to_character_offset(PyObject *line, int col_offset)
if (!str) {
return 0;
}
PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, NULL);
PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
if (!text) {
return 0;
}
Py_ssize_t size = PyUnicode_GET_LENGTH(text);
str = PyUnicode_AsUTF8(text);
if (str != NULL && (int)strlen(str) == col_offset) {
size = strlen(str);
}
Py_DECREF(text);
return size;
}
Expand Down Expand Up @@ -297,66 +301,21 @@ raise_tokenizer_init_error(PyObject *filename)
}

static inline PyObject *
get_error_line(char *buffer)
{
char *newline = strchr(buffer, '\n');
if (newline) {
return PyUnicode_FromStringAndSize(buffer, newline - buffer);
}
else {
return PyUnicode_FromString(buffer);
}
}

static int
tokenizer_error_with_col_offset(Parser *p, PyObject *errtype, const char *errmsg)
get_error_line(char *buffer, int is_file)
{
PyObject *errstr = NULL;
PyObject *value = NULL;
size_t col_number = -1;

errstr = PyUnicode_FromString(errmsg);
if (!errstr) {
return -1;
}

PyObject *loc = NULL;
if (p->start_rule == Py_file_input) {
loc = PyErr_ProgramTextObject(p->tok->filename, p->tok->lineno);
}
if (!loc) {
loc = get_error_line(p->tok->buf);
const char *newline;
if (is_file) {
newline = strrchr(buffer, '\n');
} else {
newline = strchr(buffer, '\n');
}

if (loc) {
col_number = p->tok->cur - p->tok->buf;
if (newline) {
return PyUnicode_DecodeUTF8(buffer, newline - buffer, "replace");
}
else {
Py_INCREF(Py_None);
loc = Py_None;
return PyUnicode_DecodeUTF8(buffer, strlen(buffer), "replace");
}

PyObject *tmp = Py_BuildValue("(OiiN)", p->tok->filename, p->tok->lineno,
col_number, loc);
if (!tmp) {
goto error;
}

value = PyTuple_Pack(2, errstr, tmp);
Py_DECREF(tmp);
if (!value) {
goto error;
}
PyErr_SetObject(errtype, value);

Py_XDECREF(value);
Py_XDECREF(errstr);
return -1;

error:
Py_XDECREF(errstr);
Py_XDECREF(loc);
return -1;
}

static int
Expand All @@ -376,20 +335,20 @@ tokenizer_error(Parser *p)
msg = "invalid character in identifier";
break;
case E_BADPREFIX:
return tokenizer_error_with_col_offset(p,
errtype, "invalid string prefix");
RAISE_SYNTAX_ERROR("invalid string prefix");
return -1;
case E_EOFS:
return tokenizer_error_with_col_offset(p,
errtype, "EOF while scanning triple-quoted string literal");
RAISE_SYNTAX_ERROR("EOF while scanning triple-quoted string literal");
return -1;
case E_EOLS:
return tokenizer_error_with_col_offset(p,
errtype, "EOL while scanning string literal");
RAISE_SYNTAX_ERROR("EOL while scanning string literal");
return -1;
case E_EOF:
return tokenizer_error_with_col_offset(p,
errtype, "unexpected EOF while parsing");
RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
return -1;
case E_DEDENT:
return tokenizer_error_with_col_offset(p,
PyExc_IndentationError, "unindent does not match any outer indentation level");
RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
return -1;
case E_INTR:
if (!PyErr_Occurred()) {
PyErr_SetNone(PyExc_KeyboardInterrupt);
Expand Down Expand Up @@ -421,14 +380,14 @@ tokenizer_error(Parser *p)
}

void *
_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
_PyPegen_raise_error(Parser *p, PyObject *errtype, int with_col_number, const char *errmsg, ...)
{
PyObject *value = NULL;
PyObject *errstr = NULL;
PyObject *loc = NULL;
PyObject *tmp = NULL;
Token *t = p->tokens[p->fill - 1];
Py_ssize_t col_number = 0;
Py_ssize_t col_number = !with_col_number;
va_list va;

va_start(va, errmsg);
Expand All @@ -443,14 +402,20 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
}

if (!loc) {
loc = get_error_line(p->tok->buf);
loc = get_error_line(p->tok->buf, p->start_rule == Py_file_input);
}

if (loc) {
int col_offset = t->col_offset == -1 ? 0 : t->col_offset;
col_number = byte_offset_to_character_offset(loc, col_offset) + 1;
if (loc && with_col_number) {
int col_offset;
if (t->col_offset == -1) {
col_offset = Py_SAFE_DOWNCAST(p->tok->cur - p->tok->buf,
intptr_t, int);
} else {
col_offset = t->col_offset + 1;
}
col_number = byte_offset_to_character_offset(loc, col_offset);
}
else {
else if (!loc) {
Py_INCREF(Py_None);
loc = Py_None;
}
Expand Down Expand Up @@ -632,14 +597,6 @@ _PyPegen_fill_token(Parser *p)
type = PyTokenizer_Get(p->tok, &start, &end);
}

if (type == ERRORTOKEN) {
if (p->tok->done == E_DECODE) {
return raise_decode_error(p);
}
else {
return tokenizer_error(p);
}
}
if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) {
type = NEWLINE; /* Add an extra newline */
p->parsing_started = 0;
Expand Down Expand Up @@ -700,6 +657,16 @@ _PyPegen_fill_token(Parser *p)
t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;

p->fill += 1;

if (type == ERRORTOKEN) {
if (p->tok->done == E_DECODE) {
return raise_decode_error(p);
}
else {
return tokenizer_error(p);
}
}

return 0;
}

Expand Down
11 changes: 6 additions & 5 deletions Parser/pegen/pegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,15 @@ expr_ty _PyPegen_name_token(Parser *p);
expr_ty _PyPegen_number_token(Parser *p);
void *_PyPegen_string_token(Parser *p);
const char *_PyPegen_get_expr_name(expr_ty);
void *_PyPegen_raise_error(Parser *p, PyObject *, const char *errmsg, ...);
void *_PyPegen_raise_error(Parser *p, PyObject *errtype, int with_col_number, const char *errmsg, ...);
void *_PyPegen_dummy_name(Parser *p, ...);

#define UNUSED(expr) do { (void)(expr); } while (0)
#define EXTRA_EXPR(head, tail) head->lineno, head->col_offset, tail->end_lineno, tail->end_col_offset, p->arena
#define EXTRA start_lineno, start_col_offset, end_lineno, end_col_offset, p->arena
#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__)
#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__)
#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 1, msg, ##__VA_ARGS__)
#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, 1, msg, ##__VA_ARGS__)
#define RAISE_SYNTAX_ERROR_NO_COL_OFFSET(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 0, msg, ##__VA_ARGS__)

Py_LOCAL_INLINE(void *)
CHECK_CALL(Parser *p, void *result)
Expand Down Expand Up @@ -190,8 +191,8 @@ INVALID_VERSION_CHECK(Parser *p, int version, char *msg, void *node)
}
if (p->feature_version < version) {
p->error_indicator = 1;
return _PyPegen_raise_error(p, PyExc_SyntaxError, "%s only supported in Python 3.%i and greater",
msg, version);
return RAISE_SYNTAX_ERROR("%s only supported in Python 3.%i and greater",
msg, version);
}
return node;
}
Expand Down