From 81a459c57f36f7ea21d10daf7c60b936f1f48a1b Mon Sep 17 00:00:00 2001 From: alperyoney Date: Mon, 24 Nov 2025 11:32:47 -0800 Subject: [PATCH 1/2] gh-116738: Test re module for free threading --- Lib/test/test_free_threading/test_re.py | 62 +++++++++++++++++++++++++ Modules/_sre/sre.c | 14 ++++-- 2 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 Lib/test/test_free_threading/test_re.py diff --git a/Lib/test/test_free_threading/test_re.py b/Lib/test/test_free_threading/test_re.py new file mode 100644 index 00000000000000..56f25045d1bf8e --- /dev/null +++ b/Lib/test/test_free_threading/test_re.py @@ -0,0 +1,62 @@ +import re +import unittest + +from test.support import threading_helper +from test.support.threading_helper import run_concurrently + + +NTHREADS = 10 + + +@threading_helper.requires_working_threading() +class TestRe(unittest.TestCase): + def test_pattern_sub(self): + """Pattern substitution should work across threads""" + pattern = re.compile(r"\w+@\w+\.\w+") + text = "e-mail: test@python.org or user@pycon.org. " * 5 + results = [] + + def worker(): + substituted = pattern.sub("(redacted)", text) + results.append(substituted.count("(redacted)")) + + run_concurrently(worker_func=worker, nthreads=NTHREADS) + self.assertEqual(results, [2 * 5] * NTHREADS) + + def test_pattern_search(self): + """Pattern search should work across threads.""" + emails = ["alice@python.org", "bob@pycon.org"] * 10 + pattern = re.compile(r"\w+@\w+\.\w+") + results = [] + + def worker(): + matches = [pattern.search(e).group() for e in emails] + results.append(len(matches)) + + run_concurrently(worker_func=worker, nthreads=NTHREADS) + self.assertEqual(results, [2 * 10] * NTHREADS) + + def test_scanner_concurrent_access(self): + """Shared scanner should reject concurrent access.""" + pattern = re.compile(r"\w+") + scanner = pattern.scanner("word " * 10) + + def worker(): + for _ in range(100): + try: + scanner.search() + except ValueError as e: + if "already executing" in str(e): + pass + else: + raise + + run_concurrently(worker_func=worker, nthreads=NTHREADS) + # This test has no assertions. Its purpose is to catch crashes and + # enable thread sanitizer to detect race conditions. While "already + # executing" errors are very likely, they're not guaranteed due to + # non-deterministic thread scheduling, so we can't assert errors > 0. + + +if __name__ == "__main__": + unittest.main() diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index 4e97101b699876..0487a6cd6dd80b 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -2841,20 +2841,28 @@ scanner_dealloc(PyObject *self) static int scanner_begin(ScannerObject* self) { + int result; + Py_BEGIN_CRITICAL_SECTION(self); if (self->executing) { PyErr_SetString(PyExc_ValueError, "regular expression scanner already executing"); - return 0; + result = 0; } - self->executing = 1; - return 1; + else { + self->executing = 1; + result = 1; + } + Py_END_CRITICAL_SECTION(); + return result; } static void scanner_end(ScannerObject* self) { + Py_BEGIN_CRITICAL_SECTION(self); assert(self->executing); self->executing = 0; + Py_END_CRITICAL_SECTION(); } /*[clinic input] From 4c1e940720c92b2c064e60369aee6cc9172b2165 Mon Sep 17 00:00:00 2001 From: alperyoney Date: Tue, 25 Nov 2025 13:19:25 -0800 Subject: [PATCH 2/2] Replace critical sections with atomic operations --- .../internal/pycore_pyatomic_ft_wrappers.h | 6 +++++ ...-11-25-13-13-34.gh-issue-116738.MnZRdV.rst | 2 ++ Modules/_sre/sre.c | 25 ++++++++----------- 3 files changed, 19 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index c31c33657002ec..2ae0185226f847 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -77,6 +77,10 @@ extern "C" { _Py_atomic_store_ushort_relaxed(&value, new_value) #define FT_ATOMIC_LOAD_USHORT_RELAXED(value) \ _Py_atomic_load_ushort_relaxed(&value) +#define FT_ATOMIC_LOAD_INT(value) \ + _Py_atomic_load_int(&value) +#define FT_ATOMIC_STORE_INT(value, new_value) \ + _Py_atomic_store_int(&value, new_value) #define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) \ _Py_atomic_store_int_relaxed(&value, new_value) #define FT_ATOMIC_LOAD_INT_RELAXED(value) \ @@ -144,6 +148,8 @@ extern "C" { #define FT_ATOMIC_STORE_SHORT_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_LOAD_USHORT_RELAXED(value) value #define FT_ATOMIC_STORE_USHORT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_INT(value) value +#define FT_ATOMIC_STORE_INT(value, new_value) value = new_value #define FT_ATOMIC_LOAD_INT_RELAXED(value) value #define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_LOAD_UINT_RELAXED(value) value diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst new file mode 100644 index 00000000000000..151f8968292a61 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst @@ -0,0 +1,2 @@ +Fix thread safety issue with :mod:`re` scanner objects in free-threaded +builds. diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index 0487a6cd6dd80b..59ff9078e6cff4 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -2841,28 +2841,25 @@ scanner_dealloc(PyObject *self) static int scanner_begin(ScannerObject* self) { - int result; - Py_BEGIN_CRITICAL_SECTION(self); - if (self->executing) { +#ifdef Py_GIL_DISABLED + int was_executing = _Py_atomic_exchange_int(&self->executing, 1); +#else + int was_executing = self->executing; + self->executing = 1; +#endif + if (was_executing) { PyErr_SetString(PyExc_ValueError, "regular expression scanner already executing"); - result = 0; - } - else { - self->executing = 1; - result = 1; + return 0; } - Py_END_CRITICAL_SECTION(); - return result; + return 1; } static void scanner_end(ScannerObject* self) { - Py_BEGIN_CRITICAL_SECTION(self); - assert(self->executing); - self->executing = 0; - Py_END_CRITICAL_SECTION(); + assert(FT_ATOMIC_LOAD_INT_RELAXED(self->executing)); + FT_ATOMIC_STORE_INT(self->executing, 0); } /*[clinic input]