Skip to content

Commit

Permalink
gh-96611: Fix error message for invalid UTF-8 in mid-multiline string (
Browse files Browse the repository at this point in the history
…GH-96623)

(cherry picked from commit 05692c6)

Co-authored-by: Michael Droettboom <mdboom@gmail.com>
  • Loading branch information
miss-islington and mdboom committed Sep 6, 2022
1 parent a389fdb commit bb0dab5
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 0 deletions.
12 changes: 12 additions & 0 deletions Lib/test/test_source_encoding.py
Expand Up @@ -148,6 +148,18 @@ def test_error_from_string(self):
self.assertTrue(c.exception.args[0].startswith(expected),
msg=c.exception.args[0])

def test_file_parse_error_multiline(self):
# gh96611:
with open(TESTFN, "wb") as fd:
fd.write(b'print("""\n\xb1""")\n')

try:
retcode, stdout, stderr = script_helper.assert_python_failure(TESTFN)

self.assertGreater(retcode, 0)
self.assertIn(b"Non-UTF-8 code starting with '\\xb1'", stderr)
finally:
os.unlink(TESTFN)

class AbstractSourceEncodingTest:

Expand Down
@@ -0,0 +1,2 @@
When loading a file with invalid UTF-8 inside a multi-line string, a correct
SyntaxError is emitted.
2 changes: 2 additions & 0 deletions Parser/tokenizer.c
Expand Up @@ -1945,6 +1945,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Get rest of string */
while (end_quote_size != quote_size) {
c = tok_nextc(tok);
if (tok->done == E_DECODE)
break;
if (c == EOF || (quote_size == 1 && c == '\n')) {
assert(tok->multi_line_start != NULL);
// shift the tok_state's location into
Expand Down

0 comments on commit bb0dab5

Please sign in to comment.