Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion Lib/test/test_threading.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def f(mutex):
# Issue gh-106236:
with self.assertRaises(RuntimeError):
dummy_thread.join()
dummy_thread._started.clear()
dummy_thread._os_thread_handle._set_done()
with self.assertRaises(RuntimeError):
dummy_thread.is_alive()
# Busy wait for the following condition: after the thread dies, the
Expand Down Expand Up @@ -1457,6 +1457,37 @@ def run_in_bg():
self.assertEqual(err, b"")
self.assertEqual(out.strip(), b"Exiting...")

def test_memory_error_bootstrap(self):
# gh-140746: Test that Thread.start() doesn't hang indefinitely if
# the new thread fails (MemoryError) during its initialization

def serving_thread():

def nothing():
pass

def _set_ident_memory_error():
raise MemoryError()

thread = threading.Thread(target=nothing)
with (
support.catch_unraisable_exception(),
mock.patch.object(thread, '_set_ident', _set_ident_memory_error)
):
thread.start()
thread.join()
self.assertFalse(thread.is_alive())
self.assertFalse(thread in threading._limbo)
self.assertFalse(thread in threading._active)

serving_thread = threading.Thread(target=serving_thread)
serving_thread.start()
serving_thread.join(0.1)
self.assertFalse(serving_thread.is_alive())
self.assertFalse(serving_thread in threading._limbo)
self.assertFalse(serving_thread in threading._active)


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that worth testing a use case where the _bootstrap method will be never called just to check that the thread is not running ?
Is that worth testing a use case where the an exception is raised in the _start_joinable_thread just to check that the thread is not running ? This last one could simulate your initial issue.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My initial issue is when the new thread crashes inside the _bootstrap/_bootstrap_inner before signaling that it starts.

Is that worth testing a use case where the _bootstrap method will be never called just to check that the thread is not running ?
Is that worth testing a use case where the an exception is raised in the _start_joinable_thread just to check that the thread is not running ? This last one could simulate your initial issue.

There is already test_start_new_thread_failed and that sounds a bit out of scope IMO.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My initial issue is when the new thread crashes inside the _bootstrap/_bootstrap_inner before signaling that it starts.
There is already test_start_new_thread_failed and that sounds a bit out of scope IMO.

I agree, sorry for the misunderstood.

Copy link
Contributor

@YvesDup YvesDup Nov 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From one comment: IMHO, your proposal is actually less robust since it is possible that Python raises a memory error before calling _bootstrap_inner at all and then it will lead to the same issue. That's why the "recovery" code added is on the parent thread.
My test isn't perfect and it doesn't cover all case: Memory can happen before calling _bootstrap from the PyObject_Call C method, for example

From the previous: My initial issue is when the new thread crashes inside the _bootstrap/_bootstrap_inner before signaling that it starts.

Where is this issue actually located ? Is your example with few memory (ulimit -v 1000000) the only one that often fails ?
It bothers me that we don't have a reproducible example, even though I understand that this is a complex issue.
If you agree, I will try to work on a reproductible example.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this issue actually located ?

It can be located at every place that allocates memory before self._started.is_set() in the new Thread. PyObject_Call in thread_run is only one example; it can happen calling _bootstrap_inner, self._set_ident(), self._set_native_id(), and so on. In my test, I chose one of them to demonstrate the problem and test it.

class ThreadJoinOnShutdown(BaseTestCase):

def _run_and_join(self, script):
Expand Down
34 changes: 17 additions & 17 deletions Lib/threading.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,6 @@ class is implemented.
if _HAVE_THREAD_NATIVE_ID:
self._native_id = None
self._os_thread_handle = _ThreadHandle()
self._started = Event()
self._initialized = True
# Copy of sys.stderr used by self._invoke_excepthook()
self._stderr = _sys.stderr
Expand All @@ -940,7 +939,6 @@ class is implemented.

def _after_fork(self, new_ident=None):
# Private! Called by threading._after_fork().
self._started._at_fork_reinit()
if new_ident is not None:
# This thread is alive.
self._ident = new_ident
Expand All @@ -955,7 +953,7 @@ def _after_fork(self, new_ident=None):
def __repr__(self):
assert self._initialized, "Thread.__init__() was not called"
status = "initial"
if self._started.is_set():
if self._os_thread_handle.is_bootstraped():
status = "started"
if self._os_thread_handle.is_done():
status = "stopped"
Expand All @@ -978,7 +976,7 @@ def start(self):
if not self._initialized:
raise RuntimeError("thread.__init__() not called")

if self._started.is_set():
if self._os_thread_handle.is_bootstraped():
raise RuntimeError("threads can only be started once")

with _active_limbo_lock:
Expand All @@ -1001,7 +999,15 @@ def start(self):
with _active_limbo_lock:
del _limbo[self]
raise
self._started.wait() # Will set ident and native_id
self._os_thread_handle.wait_bootstraped()

# It's possible that the _bootstrap(_inner) fails in the new Thread (e.g. Memory Error);
# We have to clean `_limbo` and `_active` here to avoid inconsistent state as much as possible
if self._os_thread_handle.is_failed():
with _active_limbo_lock:
_limbo.pop(self, None)
if self._ident:
_active.pop(self._ident, None)

def run(self):
"""Method representing the thread's activity.
Expand Down Expand Up @@ -1061,7 +1067,7 @@ def _bootstrap_inner(self):
if _HAVE_THREAD_NATIVE_ID:
self._set_native_id()
self._set_os_name()
self._started.set()
self._os_thread_handle.set_bootstraped()
with _active_limbo_lock:
_active[self._ident] = self
del _limbo[self]
Expand All @@ -1081,11 +1087,7 @@ def _bootstrap_inner(self):
def _delete(self):
"Remove current thread from the dict of currently running threads."
with _active_limbo_lock:
del _active[get_ident()]
# There must not be any python code between the previous line
# and after the lock is released. Otherwise a tracing function
# could try to acquire the lock again in the same thread, (in
# current_thread()), and would block.
_active.pop(self._ident, None)

def join(self, timeout=None):
"""Wait until the thread terminates.
Expand Down Expand Up @@ -1113,7 +1115,7 @@ def join(self, timeout=None):
"""
if not self._initialized:
raise RuntimeError("Thread.__init__() not called")
if not self._started.is_set():
if not self._os_thread_handle.is_done() and not self._os_thread_handle.is_bootstraped():
raise RuntimeError("cannot join thread before it is started")
if self is current_thread():
raise RuntimeError("cannot join current thread")
Expand Down Expand Up @@ -1176,7 +1178,7 @@ def is_alive(self):

"""
assert self._initialized, "Thread.__init__() not called"
return self._started.is_set() and not self._os_thread_handle.is_done()
return self._os_thread_handle.is_bootstraped() and not self._os_thread_handle.is_done()

@property
def daemon(self):
Expand All @@ -1199,7 +1201,7 @@ def daemon(self, daemonic):
raise RuntimeError("Thread.__init__() not called")
if daemonic and not _daemon_threads_allowed():
raise RuntimeError('daemon threads are disabled in this interpreter')
if self._started.is_set():
if self._os_thread_handle.is_bootstraped() or self._os_thread_handle.is_done():
raise RuntimeError("cannot set daemon status of active thread")
self._daemonic = daemonic

Expand Down Expand Up @@ -1385,7 +1387,6 @@ class _MainThread(Thread):

def __init__(self):
Thread.__init__(self, name="MainThread", daemon=False)
self._started.set()
self._ident = _get_main_thread_ident()
self._os_thread_handle = _make_thread_handle(self._ident)
if _HAVE_THREAD_NATIVE_ID:
Expand Down Expand Up @@ -1433,7 +1434,6 @@ class _DummyThread(Thread):
def __init__(self):
Thread.__init__(self, name=_newname("Dummy-%d"),
daemon=_daemon_threads_allowed())
self._started.set()
self._set_ident()
self._os_thread_handle = _make_thread_handle(self._ident)
if _HAVE_THREAD_NATIVE_ID:
Expand All @@ -1443,7 +1443,7 @@ def __init__(self):
_DeleteDummyThreadOnDel(self)

def is_alive(self):
if not self._os_thread_handle.is_done() and self._started.is_set():
if self._os_thread_handle.is_bootstraped() and not self._os_thread_handle.is_done():
return True
raise RuntimeError("thread is not alive")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix :func:`threading.Thread.start` that can hang indefinitely in case of heap memory exhaustion during initialization of the new thread.
62 changes: 59 additions & 3 deletions Modules/_threadmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ typedef enum {
THREAD_HANDLE_NOT_STARTED = 1,
THREAD_HANDLE_STARTING = 2,
THREAD_HANDLE_RUNNING = 3,
THREAD_HANDLE_DONE = 4,
THREAD_HANDLE_FAILED = 4,
THREAD_HANDLE_DONE = 5,
} ThreadHandleState;

// A handle to wait for thread completion.
Expand Down Expand Up @@ -139,6 +140,7 @@ typedef struct {

PyMutex mutex;

PyEvent thread_is_bootstraped;
// Set immediately before `thread_run` returns to indicate that the OS
// thread is about to exit. This is used to avoid false positives when
// detecting self-join attempts. See the comment in `ThreadHandle_join()`
Expand Down Expand Up @@ -231,6 +233,7 @@ ThreadHandle_new(void)
self->os_handle = 0;
self->has_os_handle = 0;
self->thread_is_exiting = (PyEvent){0};
self->thread_is_bootstraped = (PyEvent){0};
self->mutex = (PyMutex){_Py_UNLOCKED};
self->once = (_PyOnceFlag){0};
self->state = THREAD_HANDLE_NOT_STARTED;
Expand Down Expand Up @@ -286,7 +289,8 @@ ThreadHandle_decref(ThreadHandle *self)
// 1. This is the destructor; nothing else holds a reference.
// 2. The refcount going to zero is a "synchronizes-with" event; all
// changes from other threads are visible.
if (self->state == THREAD_HANDLE_RUNNING && !detach_thread(self)) {
if ((self->state == THREAD_HANDLE_RUNNING || self->state == THREAD_HANDLE_FAILED)
&& !detach_thread(self)) {
self->state = THREAD_HANDLE_DONE;
}

Expand Down Expand Up @@ -322,6 +326,7 @@ _PyThread_AfterFork(struct _pythread_runtime_state *state)
handle->once = (_PyOnceFlag){_Py_ONCE_INITIALIZED};
handle->mutex = (PyMutex){_Py_UNLOCKED};
_PyEvent_Notify(&handle->thread_is_exiting);
_PyEvent_Notify(&handle->thread_is_bootstraped);
llist_remove(node);
remove_from_shutdown_handles(handle);
}
Expand Down Expand Up @@ -393,6 +398,9 @@ thread_run(void *boot_raw)
PyErr_FormatUnraisable(
"Exception ignored in thread started by %R", boot->func);
}
// Notify that the bootstraped is done and failed (e.g. Memory error).
set_thread_handle_state(handle, THREAD_HANDLE_FAILED);
_PyEvent_Notify(&handle->thread_is_bootstraped);
}
else {
Py_DECREF(res);
Expand Down Expand Up @@ -502,7 +510,10 @@ static int
join_thread(void *arg)
{
ThreadHandle *handle = (ThreadHandle*)arg;
assert(get_thread_handle_state(handle) == THREAD_HANDLE_RUNNING);
assert(
get_thread_handle_state(handle) == THREAD_HANDLE_RUNNING ||
get_thread_handle_state(handle) == THREAD_HANDLE_FAILED
);
PyThread_handle_t os_handle;
if (ThreadHandle_get_os_handle(handle, &os_handle)) {
int err = 0;
Expand Down Expand Up @@ -707,6 +718,46 @@ PyThreadHandleObject_join(PyObject *op, PyObject *args)
Py_RETURN_NONE;
}

static PyObject *
PyThreadHandleObject_is_bootstraped(PyObject *op, PyObject *Py_UNUSED(dummy))
{
PyThreadHandleObject *self = PyThreadHandleObject_CAST(op);
if (_PyEvent_IsSet(&self->handle->thread_is_bootstraped)) {
Py_RETURN_TRUE;
}
else {
Py_RETURN_FALSE;
}
}

static PyObject *
PyThreadHandleObject_wait_bootstraped(PyObject *op, PyObject *Py_UNUSED(dummy))
{
PyThreadHandleObject *self = PyThreadHandleObject_CAST(op);
PyEvent_Wait(&self->handle->thread_is_bootstraped);
Py_RETURN_NONE;
}

static PyObject *
PyThreadHandleObject_set_bootstraped(PyObject *op, PyObject *Py_UNUSED(dummy))
{
PyThreadHandleObject *self = PyThreadHandleObject_CAST(op);
_PyEvent_Notify(&self->handle->thread_is_bootstraped);
Py_RETURN_NONE;
}

static PyObject *
PyThreadHandleObject_is_failed(PyObject *op, PyObject *Py_UNUSED(dummy))
{
PyThreadHandleObject *self = PyThreadHandleObject_CAST(op);
if (get_thread_handle_state(self->handle) == THREAD_HANDLE_FAILED) {
Py_RETURN_TRUE;
}
else {
Py_RETURN_FALSE;
}
}

static PyObject *
PyThreadHandleObject_is_done(PyObject *op, PyObject *Py_UNUSED(dummy))
{
Expand Down Expand Up @@ -740,6 +791,10 @@ static PyGetSetDef ThreadHandle_getsetlist[] = {
static PyMethodDef ThreadHandle_methods[] = {
{"join", PyThreadHandleObject_join, METH_VARARGS, NULL},
{"_set_done", PyThreadHandleObject_set_done, METH_NOARGS, NULL},
{"wait_bootstraped", PyThreadHandleObject_wait_bootstraped, METH_NOARGS, NULL},
{"set_bootstraped", PyThreadHandleObject_set_bootstraped, METH_NOARGS, NULL},
{"is_bootstraped", PyThreadHandleObject_is_bootstraped, METH_NOARGS, NULL},
{"is_failed", PyThreadHandleObject_is_failed, METH_NOARGS, NULL},
{"is_done", PyThreadHandleObject_is_done, METH_NOARGS, NULL},
{0, 0}
};
Expand Down Expand Up @@ -2466,6 +2521,7 @@ thread__make_thread_handle(PyObject *module, PyObject *identobj)
hobj->handle->ident = ident;
hobj->handle->state = THREAD_HANDLE_RUNNING;
PyMutex_Unlock(&hobj->handle->mutex);
_PyEvent_Notify(&hobj->handle->thread_is_bootstraped);
return (PyObject*) hobj;
}

Expand Down
Loading