Skip to content

Commit 9ffbbd4

Browse files
committed
gh-140746: Fix Thread.start() can hang forever
When a new thread is started (`Thread.start()`), the current thread waits for the new thread's `_started` signal (`self._started.wait()`). If the new thread doesn't have enough memory, it might crash before signaling its start to the parent thread, causing the parent thread to wait indefinitely (still in `Thread.start()`). To fix the issue, remove the _started python attribute from the Thread class and moved the logic on the _os_thread_handle. WIP We also change `Thread._delete()` to use `pop` to remove the thread from `_active`, as there is no guarantee that the thread exists in `_active[get_ident()]`, thus avoiding a potential `KeyError`. This can happen if `_bootstrap_inner` crashes before `_active[self._ident] = self` executes. We use `self._ident` because we know `set_ident()` has already been called. Moreover, remove the old comment in `_delete` because `_active_limbo_lock` became reentrant in commit 243fd01.
1 parent 909f76d commit 9ffbbd4

File tree

4 files changed

+101
-27
lines changed

4 files changed

+101
-27
lines changed

Lib/test/test_threading.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,6 @@ def test_various_ops(self):
215215
self.assertRegex(repr(t), r'^<TestThread\(.*, initial\)>$')
216216
t.start()
217217

218-
if hasattr(threading, 'get_native_id'):
219-
native_ids = set(t.native_id for t in threads) | {threading.get_native_id()}
220-
self.assertNotIn(None, native_ids)
221-
self.assertEqual(len(native_ids), NUMTASKS + 1)
222-
223218
if verbose:
224219
print('waiting for all tasks to complete')
225220
for t in threads:
@@ -228,6 +223,12 @@ def test_various_ops(self):
228223
self.assertNotEqual(t.ident, 0)
229224
self.assertIsNotNone(t.ident)
230225
self.assertRegex(repr(t), r'^<TestThread\(.*, stopped -?\d+\)>$')
226+
227+
if hasattr(threading, 'get_native_id'):
228+
native_ids = set(t.native_id for t in threads) | {threading.get_native_id()}
229+
self.assertNotIn(None, native_ids)
230+
self.assertEqual(len(native_ids), NUMTASKS + 1)
231+
231232
if verbose:
232233
print('all tasks done')
233234
self.assertEqual(numrunning.get(), 0)
@@ -307,7 +308,7 @@ def f(mutex):
307308
# Issue gh-106236:
308309
with self.assertRaises(RuntimeError):
309310
dummy_thread.join()
310-
dummy_thread._started.clear()
311+
dummy_thread._os_thread_handle._set_done()
311312
with self.assertRaises(RuntimeError):
312313
dummy_thread.is_alive()
313314
# Busy wait for the following condition: after the thread dies, the
@@ -1457,6 +1458,32 @@ def run_in_bg():
14571458
self.assertEqual(err, b"")
14581459
self.assertEqual(out.strip(), b"Exiting...")
14591460

1461+
def test_memory_error_bootstrap(self):
1462+
# gh-140746: Test that Thread.start() doesn't hang indefinitely if
1463+
# the new thread fails (MemoryError) during its initialization
1464+
1465+
def serving_thread():
1466+
1467+
def nothing():
1468+
pass
1469+
1470+
def _set_ident_memory_error():
1471+
raise MemoryError()
1472+
1473+
thread = threading.Thread(target=nothing)
1474+
with (
1475+
support.catch_unraisable_exception(),
1476+
mock.patch.object(thread, '_set_ident', _set_ident_memory_error)
1477+
):
1478+
thread.start()
1479+
self.assertFalse(thread.is_alive())
1480+
1481+
serving_thread = threading.Thread(target=serving_thread)
1482+
serving_thread.start()
1483+
serving_thread.join(0.1)
1484+
self.assertFalse(serving_thread.is_alive())
1485+
1486+
14601487
class ThreadJoinOnShutdown(BaseTestCase):
14611488

14621489
def _run_and_join(self, script):

Lib/threading.py

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -930,7 +930,6 @@ class is implemented.
930930
if _HAVE_THREAD_NATIVE_ID:
931931
self._native_id = None
932932
self._os_thread_handle = _ThreadHandle()
933-
self._started = Event()
934933
self._initialized = True
935934
# Copy of sys.stderr used by self._invoke_excepthook()
936935
self._stderr = _sys.stderr
@@ -940,7 +939,6 @@ class is implemented.
940939

941940
def _after_fork(self, new_ident=None):
942941
# Private! Called by threading._after_fork().
943-
self._started._at_fork_reinit()
944942
if new_ident is not None:
945943
# This thread is alive.
946944
self._ident = new_ident
@@ -955,7 +953,7 @@ def _after_fork(self, new_ident=None):
955953
def __repr__(self):
956954
assert self._initialized, "Thread.__init__() was not called"
957955
status = "initial"
958-
if self._started.is_set():
956+
if self._os_thread_handle.is_running():
959957
status = "started"
960958
if self._os_thread_handle.is_done():
961959
status = "stopped"
@@ -978,7 +976,7 @@ def start(self):
978976
if not self._initialized:
979977
raise RuntimeError("thread.__init__() not called")
980978

981-
if self._started.is_set():
979+
if self._os_thread_handle.is_running():
982980
raise RuntimeError("threads can only be started once")
983981

984982
with _active_limbo_lock:
@@ -1001,7 +999,13 @@ def start(self):
1001999
with _active_limbo_lock:
10021000
del _limbo[self]
10031001
raise
1004-
self._started.wait() # Will set ident and native_id
1002+
self._os_thread_handle.wait_bootstrap_done()
1003+
1004+
# It's possible that the _started event never occurs from the new Thread;
1005+
# e.g., it didn't have enough memory to call the initialization part of _bootstrap_inner.
1006+
if self._os_thread_handle.is_done():
1007+
with _active_limbo_lock:
1008+
_limbo.pop(self, None)
10051009

10061010
def run(self):
10071011
"""Method representing the thread's activity.
@@ -1061,7 +1065,7 @@ def _bootstrap_inner(self):
10611065
if _HAVE_THREAD_NATIVE_ID:
10621066
self._set_native_id()
10631067
self._set_os_name()
1064-
self._started.set()
1068+
self._os_thread_handle.set_bootstrap_done()
10651069
with _active_limbo_lock:
10661070
_active[self._ident] = self
10671071
del _limbo[self]
@@ -1081,11 +1085,7 @@ def _bootstrap_inner(self):
10811085
def _delete(self):
10821086
"Remove current thread from the dict of currently running threads."
10831087
with _active_limbo_lock:
1084-
del _active[get_ident()]
1085-
# There must not be any python code between the previous line
1086-
# and after the lock is released. Otherwise a tracing function
1087-
# could try to acquire the lock again in the same thread, (in
1088-
# current_thread()), and would block.
1088+
_active.pop(self._ident, None)
10891089

10901090
def join(self, timeout=None):
10911091
"""Wait until the thread terminates.
@@ -1113,7 +1113,7 @@ def join(self, timeout=None):
11131113
"""
11141114
if not self._initialized:
11151115
raise RuntimeError("Thread.__init__() not called")
1116-
if not self._started.is_set():
1116+
if not self._os_thread_handle.is_done() and not self._os_thread_handle.is_running():
11171117
raise RuntimeError("cannot join thread before it is started")
11181118
if self is current_thread():
11191119
raise RuntimeError("cannot join current thread")
@@ -1176,7 +1176,7 @@ def is_alive(self):
11761176
11771177
"""
11781178
assert self._initialized, "Thread.__init__() not called"
1179-
return self._started.is_set() and not self._os_thread_handle.is_done()
1179+
return self._os_thread_handle.is_running() and not self._os_thread_handle.is_done()
11801180

11811181
@property
11821182
def daemon(self):
@@ -1199,7 +1199,7 @@ def daemon(self, daemonic):
11991199
raise RuntimeError("Thread.__init__() not called")
12001200
if daemonic and not _daemon_threads_allowed():
12011201
raise RuntimeError('daemon threads are disabled in this interpreter')
1202-
if self._started.is_set():
1202+
if self._os_thread_handle.is_running() or self._os_thread_handle.is_done():
12031203
raise RuntimeError("cannot set daemon status of active thread")
12041204
self._daemonic = daemonic
12051205

@@ -1385,7 +1385,6 @@ class _MainThread(Thread):
13851385

13861386
def __init__(self):
13871387
Thread.__init__(self, name="MainThread", daemon=False)
1388-
self._started.set()
13891388
self._ident = _get_main_thread_ident()
13901389
self._os_thread_handle = _make_thread_handle(self._ident)
13911390
if _HAVE_THREAD_NATIVE_ID:
@@ -1433,7 +1432,6 @@ class _DummyThread(Thread):
14331432
def __init__(self):
14341433
Thread.__init__(self, name=_newname("Dummy-%d"),
14351434
daemon=_daemon_threads_allowed())
1436-
self._started.set()
14371435
self._set_ident()
14381436
self._os_thread_handle = _make_thread_handle(self._ident)
14391437
if _HAVE_THREAD_NATIVE_ID:
@@ -1443,7 +1441,7 @@ def __init__(self):
14431441
_DeleteDummyThreadOnDel(self)
14441442

14451443
def is_alive(self):
1446-
if not self._os_thread_handle.is_done() and self._started.is_set():
1444+
if self._os_thread_handle.is_running() and not self._os_thread_handle.is_done():
14471445
return True
14481446
raise RuntimeError("thread is not alive")
14491447

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix :func:`threading.Thread.start` that can hang indefinitely in case of heap memory exhaustion during initialization of the new thread.

Modules/_threadmodule.c

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ typedef enum {
103103
THREAD_HANDLE_NOT_STARTED = 1,
104104
THREAD_HANDLE_STARTING = 2,
105105
THREAD_HANDLE_RUNNING = 3,
106-
THREAD_HANDLE_DONE = 4,
106+
THREAD_HANDLE_BOOTSTRAPED = 4,
107+
THREAD_HANDLE_DONE = 5,
107108
} ThreadHandleState;
108109

109110
// A handle to wait for thread completion.
@@ -139,6 +140,7 @@ typedef struct {
139140

140141
PyMutex mutex;
141142

143+
PyEvent thread_is_bootstraped;
142144
// Set immediately before `thread_run` returns to indicate that the OS
143145
// thread is about to exit. This is used to avoid false positives when
144146
// detecting self-join attempts. See the comment in `ThreadHandle_join()`
@@ -231,6 +233,7 @@ ThreadHandle_new(void)
231233
self->os_handle = 0;
232234
self->has_os_handle = 0;
233235
self->thread_is_exiting = (PyEvent){0};
236+
self->thread_is_bootstraped = (PyEvent){0};
234237
self->mutex = (PyMutex){_Py_UNLOCKED};
235238
self->once = (_PyOnceFlag){0};
236239
self->state = THREAD_HANDLE_NOT_STARTED;
@@ -322,6 +325,7 @@ _PyThread_AfterFork(struct _pythread_runtime_state *state)
322325
handle->once = (_PyOnceFlag){_Py_ONCE_INITIALIZED};
323326
handle->mutex = (PyMutex){_Py_UNLOCKED};
324327
_PyEvent_Notify(&handle->thread_is_exiting);
328+
_PyEvent_Notify(&handle->thread_is_bootstraped);
325329
llist_remove(node);
326330
remove_from_shutdown_handles(handle);
327331
}
@@ -393,6 +397,8 @@ thread_run(void *boot_raw)
393397
PyErr_FormatUnraisable(
394398
"Exception ignored in thread started by %R", boot->func);
395399
}
400+
set_thread_handle_state(handle, THREAD_HANDLE_DONE);
401+
_PyEvent_Notify(&handle->thread_is_bootstraped);
396402
}
397403
else {
398404
Py_DECREF(res);
@@ -424,6 +430,7 @@ force_done(void *arg)
424430
assert(get_thread_handle_state(handle) == THREAD_HANDLE_STARTING);
425431
_PyEvent_Notify(&handle->thread_is_exiting);
426432
set_thread_handle_state(handle, THREAD_HANDLE_DONE);
433+
_PyEvent_Notify(&handle->thread_is_bootstraped);
427434
return 0;
428435
}
429436

@@ -488,7 +495,7 @@ ThreadHandle_start(ThreadHandle *self, PyObject *func, PyObject *args,
488495
self->state = THREAD_HANDLE_RUNNING;
489496
PyMutex_Unlock(&self->mutex);
490497

491-
// Unblock the thread
498+
// Unblock the thread and wait the running signal
492499
_PyEvent_Notify(&boot->handle_ready);
493500

494501
return 0;
@@ -502,7 +509,7 @@ static int
502509
join_thread(void *arg)
503510
{
504511
ThreadHandle *handle = (ThreadHandle*)arg;
505-
assert(get_thread_handle_state(handle) == THREAD_HANDLE_RUNNING);
512+
assert(get_thread_handle_state(handle) >= THREAD_HANDLE_RUNNING);
506513
PyThread_handle_t os_handle;
507514
if (ThreadHandle_get_os_handle(handle, &os_handle)) {
508515
int err = 0;
@@ -596,7 +603,9 @@ static int
596603
set_done(void *arg)
597604
{
598605
ThreadHandle *handle = (ThreadHandle*)arg;
599-
assert(get_thread_handle_state(handle) == THREAD_HANDLE_RUNNING);
606+
assert(
607+
get_thread_handle_state(handle) == THREAD_HANDLE_RUNNING ||
608+
get_thread_handle_state(handle) == THREAD_HANDLE_BOOTSTRAPED);
600609
if (detach_thread(handle) < 0) {
601610
PyErr_SetString(ThreadError, "failed detaching handle");
602611
return -1;
@@ -621,6 +630,13 @@ ThreadHandle_set_done(ThreadHandle *self)
621630
return 0;
622631
}
623632

633+
static void
634+
ThreadHandle_set_bootstrap_done(ThreadHandle *self)
635+
{
636+
set_thread_handle_state(self, THREAD_HANDLE_BOOTSTRAPED);
637+
_PyEvent_Notify(&self->thread_is_bootstraped);
638+
}
639+
624640
// A wrapper around a ThreadHandle.
625641
typedef struct {
626642
PyObject_HEAD
@@ -707,6 +723,35 @@ PyThreadHandleObject_join(PyObject *op, PyObject *args)
707723
Py_RETURN_NONE;
708724
}
709725

726+
static PyObject *
727+
PyThreadHandleObject_is_running(PyObject *op, PyObject *Py_UNUSED(dummy))
728+
{
729+
PyThreadHandleObject *self = PyThreadHandleObject_CAST(op);
730+
ThreadHandleState state = get_thread_handle_state(self->handle);
731+
if (state == THREAD_HANDLE_RUNNING || state == THREAD_HANDLE_BOOTSTRAPED) {
732+
Py_RETURN_TRUE;
733+
}
734+
else {
735+
Py_RETURN_FALSE;
736+
}
737+
}
738+
739+
static PyObject *
740+
PyThreadHandleObject_wait_bootstrap_done(PyObject *op, PyObject *Py_UNUSED(dummy))
741+
{
742+
PyThreadHandleObject *self = PyThreadHandleObject_CAST(op);
743+
PyEvent_Wait(&self->handle->thread_is_bootstraped);
744+
Py_RETURN_NONE;
745+
}
746+
747+
static PyObject *
748+
PyThreadHandleObject_set_bootstrap_done(PyObject *op, PyObject *Py_UNUSED(dummy))
749+
{
750+
PyThreadHandleObject *self = PyThreadHandleObject_CAST(op);
751+
ThreadHandle_set_bootstrap_done(self->handle);
752+
Py_RETURN_NONE;
753+
}
754+
710755
static PyObject *
711756
PyThreadHandleObject_is_done(PyObject *op, PyObject *Py_UNUSED(dummy))
712757
{
@@ -740,6 +785,9 @@ static PyGetSetDef ThreadHandle_getsetlist[] = {
740785
static PyMethodDef ThreadHandle_methods[] = {
741786
{"join", PyThreadHandleObject_join, METH_VARARGS, NULL},
742787
{"_set_done", PyThreadHandleObject_set_done, METH_NOARGS, NULL},
788+
{"wait_bootstrap_done", PyThreadHandleObject_wait_bootstrap_done, METH_NOARGS, NULL},
789+
{"set_bootstrap_done", PyThreadHandleObject_set_bootstrap_done, METH_NOARGS, NULL},
790+
{"is_running", PyThreadHandleObject_is_running, METH_NOARGS, NULL},
743791
{"is_done", PyThreadHandleObject_is_done, METH_NOARGS, NULL},
744792
{0, 0}
745793
};

0 commit comments

Comments
 (0)