From 37630751e431ee3a32b79cc8fa1831bd27f49522 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 13 Apr 2018 12:00:56 +0300 Subject: [PATCH 1/2] bpo-33306: Improve syntax error messages for unbalanced parentheses. --- Lib/test/test_fstring.py | 12 ++++--- Lib/test/test_site.py | 4 +-- .../2018-04-18-12-23-30.bpo-33306.tSM3cp.rst | 1 + Parser/tokenizer.c | 32 +++++++++++++++++++ Parser/tokenizer.h | 5 ++- 5 files changed, 47 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-04-18-12-23-30.bpo-33306.tSM3cp.rst diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 09b5ae1fdaee64..fe3804b2215d57 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1004,10 +1004,14 @@ def test_str_format_differences(self): self.assertEqual('{d[0]}'.format(d=d), 'integer') def test_invalid_expressions(self): - self.assertAllRaise(SyntaxError, 'invalid syntax', - [r"f'{a[4)}'", - r"f'{a(4]}'", - ]) + self.assertAllRaise(SyntaxError, + r"closing parenthesis '\)' does not match " + r"opening parenthesis '\[' \(, line 1\)", + [r"f'{a[4)}'"]) + self.assertAllRaise(SyntaxError, + r"closing parenthesis '\]' does not match " + r"opening parenthesis '\(' \(, line 1\)", + [r"f'{a(4]}'"]) def test_errors(self): # see issue 26287 diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index f38e8d853adabd..735651ec7d7550 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -133,7 +133,7 @@ def make_pth(self, contents, pth_dir='.', pth_name=TESTFN): def test_addpackage_import_bad_syntax(self): # Issue 10642 - pth_dir, pth_fn = self.make_pth("import bad)syntax\n") + pth_dir, pth_fn = self.make_pth("import bad-syntax\n") with captured_stderr() as err_out: site.addpackage(pth_dir, pth_fn, set()) self.assertRegex(err_out.getvalue(), "line 1") @@ -143,7 +143,7 @@ def test_addpackage_import_bad_syntax(self): # order doesn't matter. The next three could be a single check # but my regex foo isn't good enough to write it. self.assertRegex(err_out.getvalue(), 'Traceback') - self.assertRegex(err_out.getvalue(), r'import bad\)syntax') + self.assertRegex(err_out.getvalue(), r'import bad-syntax') self.assertRegex(err_out.getvalue(), 'SyntaxError') def test_addpackage_import_bad_exec(self): diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-04-18-12-23-30.bpo-33306.tSM3cp.rst b/Misc/NEWS.d/next/Core and Builtins/2018-04-18-12-23-30.bpo-33306.tSM3cp.rst new file mode 100644 index 00000000000000..2d891062607c16 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-04-18-12-23-30.bpo-33306.tSM3cp.rst @@ -0,0 +1 @@ +Improved syntax error messages for unbalanced parentheses. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index d319a4c90a9e39..d5f80f99cb2d58 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1842,12 +1842,44 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) case '(': case '[': case '{': +#ifndef PGEN + if (tok->level >= MAXLEVEL) { + return syntaxerror(tok, "too many nested parenthesis"); + } + tok->parenstack[tok->level] = c; + tok->parenlinenostack[tok->level] = tok->lineno; +#endif tok->level++; break; case ')': case ']': case '}': +#ifndef PGEN + if (!tok->level) { + return syntaxerror(tok, "unmatched '%c'", c); + } +#endif tok->level--; +#ifndef PGEN + int opening = tok->parenstack[tok->level]; + if (!((opening == '(' && c == ')') || + (opening == '[' && c == ']') || + (opening == '{' && c == '}'))) + { + if (tok->parenlinenostack[tok->level] != tok->lineno) { + return syntaxerror(tok, + "closing parenthesis '%c' does not match " + "opening parenthesis '%c' on line %d", + c, opening, tok->parenlinenostack[tok->level]); + } + else { + return syntaxerror(tok, + "closing parenthesis '%c' does not match " + "opening parenthesis '%c'", + c, opening); + } + } +#endif break; } diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 2e31d8624da7ca..cd18d25dc192ea 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -11,6 +11,7 @@ extern "C" { #include "token.h" /* For token types */ #define MAXINDENT 100 /* Max indentation level */ +#define MAXLEVEL 200 /* Max parentheses level */ enum decoding_state { STATE_INIT, @@ -39,14 +40,16 @@ struct tok_state { int lineno; /* Current line number */ int level; /* () [] {} Parentheses nesting level */ /* Used to allow free continuations inside them */ - /* Stuff for checking on different tab sizes */ #ifndef PGEN + char parenstack[MAXLEVEL]; + int parenlinenostack[MAXLEVEL]; /* pgen doesn't have access to Python codecs, it cannot decode the input filename. The bytes filename might be kept, but it is only used by indenterror() and it is not really needed: pgen only compiles one file (Grammar/Grammar). */ PyObject *filename; #endif + /* Stuff for checking on different tab sizes */ int altindstack[MAXINDENT]; /* Stack of alternate indents */ /* Stuff for PEP 0263 */ enum decoding_state decoding_state; From d057c386c1fd69bd28f9616758775bceb59374c4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 17 Dec 2018 08:10:19 +0200 Subject: [PATCH 2/2] Fix grammar. --- Parser/tokenizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index d5f80f99cb2d58..c246ee204c5d7e 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1844,7 +1844,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) case '{': #ifndef PGEN if (tok->level >= MAXLEVEL) { - return syntaxerror(tok, "too many nested parenthesis"); + return syntaxerror(tok, "too many nested parentheses"); } tok->parenstack[tok->level] = c; tok->parenlinenostack[tok->level] = tok->lineno;