From a698d52c3975c80b45b139b2f08402ec514dce75 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Thu, 21 Jan 2021 00:38:47 +0300 Subject: [PATCH] bpo-40176: Improve error messages for unclosed string literals (GH-19346) Automerge-Triggered-By: GH:isidentical --- Include/errcode.h | 2 -- Lib/test/test_eof.py | 24 +++++++++-------- Lib/test/test_exceptions.py | 4 +-- Lib/test/test_fstring.py | 2 +- .../2021-01-20-22-31-01.bpo-40176.anjyWw.rst | 2 ++ Parser/pegen.c | 6 ----- Parser/tokenizer.c | 26 ++++++++++++------- 7 files changed, 34 insertions(+), 32 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst diff --git a/Include/errcode.h b/Include/errcode.h index 790518b8b7730e..f2671d6c9b30b4 100644 --- a/Include/errcode.h +++ b/Include/errcode.h @@ -26,8 +26,6 @@ extern "C" { #define E_TOODEEP 20 /* Too many indentation levels */ #define E_DEDENT 21 /* No matching outer block for dedent */ #define E_DECODE 22 /* Error in decoding into Unicode */ -#define E_EOFS 23 /* EOF in triple-quoted string */ -#define E_EOLS 24 /* EOL in single-quoted string */ #define E_LINECONT 25 /* Unexpected characters after a line continuation */ #define E_BADSINGLE 27 /* Ill-formed single statement input */ diff --git a/Lib/test/test_eof.py b/Lib/test/test_eof.py index 2cf263d27463c4..b370e27161cee6 100644 --- a/Lib/test/test_eof.py +++ b/Lib/test/test_eof.py @@ -7,23 +7,25 @@ import unittest class EOFTestCase(unittest.TestCase): - def test_EOFC(self): - expect = "EOL while scanning string literal (, line 1)" - try: - eval("""'this is a test\ - """) - except SyntaxError as msg: - self.assertEqual(str(msg), expect) - else: - raise support.TestFailed + def test_EOF_single_quote(self): + expect = "unterminated string literal (detected at line 1) (, line 1)" + for quote in ("'", "\""): + try: + eval(f"""{quote}this is a test\ + """) + except SyntaxError as msg: + self.assertEqual(str(msg), expect) + self.assertEqual(msg.offset, 1) + else: + raise support.TestFailed def test_EOFS(self): - expect = ("EOF while scanning triple-quoted string literal " - "(, line 1)") + expect = ("unterminated triple-quoted string literal (detected at line 1) (, line 1)") try: eval("""'''this is a test""") except SyntaxError as msg: self.assertEqual(str(msg), expect) + self.assertEqual(msg.offset, 1) else: raise support.TestFailed diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index eb70d7b4e49724..21878c39f4fec9 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -206,7 +206,7 @@ def testSyntaxErrorOffset(self): check(b'# -*- coding: cp1251 -*-\nPython = "\xcf\xb3\xf2\xee\xed" +', 2, 19, encoding='cp1251') check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18) - check('x = "a', 1, 7) + check('x = "a', 1, 5) check('lambda x: x = 2', 1, 1) check('f{a + b + c}', 1, 2) check('[file for str(file) in []\n])', 1, 11) @@ -238,7 +238,7 @@ def bar(): def baz(): '''quux''' - """, 9, 20) + """, 9, 24) check("pass\npass\npass\n(1+)\npass\npass\npass", 4, 4) check("(1+)", 1, 4) diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 2345832abce624..7ca1512ebbf1bf 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -661,7 +661,7 @@ def test_parens_in_expressions(self): ["f'{3)+(4}'", ]) - self.assertAllRaise(SyntaxError, 'EOL while scanning string literal', + self.assertAllRaise(SyntaxError, 'unterminated string literal', ["f'{\n}'", ]) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst b/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst new file mode 100644 index 00000000000000..df7de3bdf37bc2 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst @@ -0,0 +1,2 @@ +Syntax errors for unterminated string literals now point to the start +of the string instead of reporting EOF/EOL. diff --git a/Parser/pegen.c b/Parser/pegen.c index 0d39030ea6ed18..0e7f86bc99e451 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -327,12 +327,6 @@ tokenizer_error(Parser *p) case E_TOKEN: msg = "invalid token"; break; - case E_EOFS: - RAISE_SYNTAX_ERROR("EOF while scanning triple-quoted string literal"); - return -1; - case E_EOLS: - RAISE_SYNTAX_ERROR("EOL while scanning string literal"); - return -1; case E_EOF: if (p->tok->level) { raise_unclosed_parentheses_error(p); diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index d3e846c0a5a126..d9334aaf148ba2 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1739,20 +1739,26 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) /* Get rest of string */ while (end_quote_size != quote_size) { c = tok_nextc(tok); - if (c == EOF) { + if (c == EOF || (quote_size == 1 && c == '\n')) { + // shift the tok_state's location into + // the start of string, and report the error + // from the initial quote character + tok->cur = (char *)tok->start; + tok->cur++; + tok->line_start = tok->multi_line_start; + int start = tok->lineno; + tok->lineno = tok->first_lineno; + if (quote_size == 3) { - tok->done = E_EOFS; + return syntaxerror(tok, + "unterminated triple-quoted string literal" + " (detected at line %d)", start); } else { - tok->done = E_EOLS; + return syntaxerror(tok, + "unterminated string literal (detected at" + " line %d)", start); } - tok->cur = tok->inp; - return ERRORTOKEN; - } - if (quote_size == 1 && c == '\n') { - tok->done = E_EOLS; - tok->cur = tok->inp; - return ERRORTOKEN; } if (c == quote) { end_quote_size += 1;