From 4daedab4521abb05c20d1d233ec8bbb5af30e6b6 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Sat, 25 Oct 2025 17:01:52 +0300 Subject: [PATCH 1/6] Crash fixed --- Python/Python-tokenize.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 152d61c686722e..c2a534d6bc3729 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -251,7 +251,7 @@ tokenizeriter_next(PyObject *op) int type = _PyTokenizer_Get(it->tok, &token); if (type == ERRORTOKEN) { - if(!PyErr_Occurred()) { + if (!PyErr_Occurred()) { _tokenizer_error(it); assert(PyErr_Occurred()); } @@ -268,6 +268,9 @@ tokenizeriter_next(PyObject *op) } else { str = PyUnicode_FromStringAndSize(token.start, token.end - token.start); + if (PyErr_Occurred()) { + goto exit; + } } if (str == NULL) { goto exit; @@ -337,7 +340,9 @@ tokenizeriter_next(PyObject *op) } } + assert(!PyErr_Occurred()); result = Py_BuildValue("(iN(nn)(nn)O)", type, str, lineno, col_offset, end_lineno, end_col_offset, line); + exit: _PyToken_Free(&token); if (type == ENDMARKER) { From ab0cbbeb6f13d0da14ce1dfe11a52e44eeac625c Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Sat, 25 Oct 2025 17:22:40 +0300 Subject: [PATCH 2/6] Tests are added, improve fix --- Lib/test/test_tokenize.py | 2 ++ Python/Python-tokenize.c | 8 +++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index d274726eed2e65..7ced5a9e050594 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -3183,6 +3183,8 @@ def get_tokens(string): f'__{ x:d }__'""", + "def f():\n if x\n\x00" + "class C:\n a\n\x00", ]: with self.subTest(case=case): self.assertRaises(tokenize.TokenError, get_tokens, case) diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index c2a534d6bc3729..27dad55abbe005 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -268,9 +268,6 @@ tokenizeriter_next(PyObject *op) } else { str = PyUnicode_FromStringAndSize(token.start, token.end - token.start); - if (PyErr_Occurred()) { - goto exit; - } } if (str == NULL) { goto exit; @@ -340,8 +337,9 @@ tokenizeriter_next(PyObject *op) } } - assert(!PyErr_Occurred()); - result = Py_BuildValue("(iN(nn)(nn)O)", type, str, lineno, col_offset, end_lineno, end_col_offset, line); + if (!PyErr_Occurred()) { + result = Py_BuildValue("(iN(nn)(nn)O)", type, str, lineno, col_offset, end_lineno, end_col_offset, line); + } exit: _PyToken_Free(&token); From e697c38e95f39f7d03816d6c32d520ec0b059b40 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Sat, 25 Oct 2025 17:38:08 +0300 Subject: [PATCH 3/6] NEWS entry --- .../2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst new file mode 100644 index 00000000000000..a3ab3cbabfccc5 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst @@ -0,0 +1,2 @@ +Fixed crash produced by incorrect result of c:func:`tokenizeriter_next` in +case of error occurred. Patch by Mikhail Efimov. From 2792df86e722d8b842326344adb1049d788f5adb Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Sat, 25 Oct 2025 17:40:57 +0300 Subject: [PATCH 4/6] Fix NEWS --- .../2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst index a3ab3cbabfccc5..418ef1395abe5f 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst @@ -1,2 +1,2 @@ -Fixed crash produced by incorrect result of c:func:`tokenizeriter_next` in +Fixed crash produced by incorrect result of :c:func:`tokenizeriter_next` in case of error occurred. Patch by Mikhail Efimov. From b6e538d824c85746efdb1d23c85f8d319360177c Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Sat, 25 Oct 2025 17:48:22 +0300 Subject: [PATCH 5/6] Fix NEWS 2 --- .../2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst index 418ef1395abe5f..2c27525d9f782c 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-25-17-36-46.gh-issue-140576.kj0SCY.rst @@ -1,2 +1,2 @@ -Fixed crash produced by incorrect result of :c:func:`tokenizeriter_next` in -case of error occurred. Patch by Mikhail Efimov. +Fixed crash in :func:`tokenize.generate_tokens` in case of +specific incorrect input. Patch by Mikhail Efimov. From 6a3311457058454fa60638b8d30abad8e8293d1f Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Sat, 25 Oct 2025 23:19:42 +0300 Subject: [PATCH 6/6] Revert changes in Python-tokenize.c, fix in lexer.c --- Lib/test/test_tokenize.py | 3 +-- Parser/lexer/lexer.c | 3 +++ Python/Python-tokenize.c | 7 ++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 7ced5a9e050594..ca67e381958757 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -3183,8 +3183,7 @@ def get_tokens(string): f'__{ x:d }__'""", - "def f():\n if x\n\x00" - "class C:\n a\n\x00", + " a\n\x00", ]: with self.subTest(case=case): self.assertRaises(tokenize.TokenError, get_tokens, case) diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index a69994e9b3d005..7f25afec302c22 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -539,6 +539,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t return MAKE_TOKEN(ERRORTOKEN); } } + else if (c == EOF && PyErr_Occurred()) { + return MAKE_TOKEN(ERRORTOKEN); + } else { break; } diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 27dad55abbe005..152d61c686722e 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -251,7 +251,7 @@ tokenizeriter_next(PyObject *op) int type = _PyTokenizer_Get(it->tok, &token); if (type == ERRORTOKEN) { - if (!PyErr_Occurred()) { + if(!PyErr_Occurred()) { _tokenizer_error(it); assert(PyErr_Occurred()); } @@ -337,10 +337,7 @@ tokenizeriter_next(PyObject *op) } } - if (!PyErr_Occurred()) { - result = Py_BuildValue("(iN(nn)(nn)O)", type, str, lineno, col_offset, end_lineno, end_col_offset, line); - } - + result = Py_BuildValue("(iN(nn)(nn)O)", type, str, lineno, col_offset, end_lineno, end_col_offset, line); exit: _PyToken_Free(&token); if (type == ENDMARKER) {