Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix TaskStatus.started() being a no-op when in an effectively cancelled scope #2896

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions newsfragments/2895.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
When :func:`trio.TaskStatus.started` is called in a cancelled scope, it no
longer returns (indicating that the task has been moved and is now running as a
child of the nursery) without actually moving the task. Previously,
:func:`trio.TaskStatus.started` would not move the task in this scenario,
causing the call to :func:`trio.Nursery.start` to incorrectly wait until the
started task finished, which caused deadlocks and other issues.

:func:`trio.TaskStatus.started` is no longer allowed to be called by an
exception handler that is handling one or more :exc:`Cancelled` exceptions;
attempting to do so will now raise a :exc:`RuntimeError`. Note that any code
that did this prior to this ban was already buggy, because it was attempting to
teleport :exc:`Cancelled` exception(s) to a place in the task tree without the
corresponding :class:`CancelScope`(s) to catch them.
37 changes: 29 additions & 8 deletions src/trio/_core/_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,20 +876,35 @@ def started(self: _TaskStatus[StatusT], value: StatusT) -> None:
def started(self, value: StatusT | None = None) -> None:
if self._value is not _NoStatus:
raise RuntimeError("called 'started' twice on the same task status")
self._value = cast(StatusT, value) # If None, StatusT == None

# If the old nursery is cancelled, then quietly quit now; the child
# will eventually exit on its own, and we don't want to risk moving
# children that might have propagating Cancelled exceptions into
# a place with no cancelled cancel scopes to catch them.
assert self._old_nursery._cancel_status is not None
if self._old_nursery._cancel_status.effectively_cancelled:
return
# Make sure we don't move a task with propagating Cancelled exception(s)
# to a place in the tree without the corresponding cancel scope(s).
#
# N.B.: This check is limited to the task that calls started(). If the
# user uses lowlevel.current_task().parent_nursery to add other tasks to
# the private implementation-detail nursery of start(), this won't be
# able to check those tasks. See #1599.
_, exc, _ = sys.exc_info()
while exc is not None:
handling_cancelled = False
if isinstance(exc, Cancelled):
handling_cancelled = True
elif isinstance(exc, BaseExceptionGroup):
matched, _ = exc.split(Cancelled)
if matched:
handling_cancelled = True
if handling_cancelled:
raise RuntimeError(
"task_status.started() cannot be called while handling Cancelled(s)"
)
exc = exc.__context__

# Can't be closed, b/c we checked in start() and then _pending_starts
# should keep it open.
assert not self._new_nursery._closed

self._value = cast(StatusT, value) # If None, StatusT == None

# Move tasks from the old nursery to the new
tasks = self._old_nursery._children
self._old_nursery._children = set()
Expand Down Expand Up @@ -1234,6 +1249,12 @@ async def async_fn(arg1, arg2, *, task_status=trio.TASK_STATUS_IGNORED):

If the child task passes a value to :meth:`task_status.started(value) <TaskStatus.started>`,
then :meth:`start` returns this value. Otherwise, it returns ``None``.

:meth:`task_status.started() <TaskStatus.started>` cannot be called by
an exception handler (or other cleanup code, like ``finally`` blocks,
``__aexit__`` handlers, and so on) that is handling one or more
:exc:`Cancelled` exceptions. (It'll raise a :exc:`RuntimeError` if you
violate this rule.)
"""
if self._closed:
raise RuntimeError("Nursery is closed to new arrivals")
Expand Down
101 changes: 76 additions & 25 deletions src/trio/_core/_tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
Callable,
Generator,
)
from exceptiongroup import catch

if sys.version_info < (3, 11):
from exceptiongroup import BaseExceptionGroup, ExceptionGroup
Expand Down Expand Up @@ -1808,51 +1809,101 @@
await nursery.start(nothing)
assert "exited without calling" in str(excinfo1.value)

# if the call to start() is cancelled, then the call to started() does
# nothing -- the child keeps executing under start(). The value it passed
# is ignored; start() raises Cancelled.
async def just_started(
*,
# if the call to start() is effectively cancelled when the child checkpoints
# (before calling started()), the child raises Cancelled up out of start().
# The value it passed is ignored; start() raises Cancelled.
async def checkpoint_before_started(
task_status: _core.TaskStatus[str] = _core.TASK_STATUS_IGNORED,
) -> None:
task_status.started("hi")
await _core.checkpoint()
raise AssertionError() # pragma: no cover

async with _core.open_nursery() as nursery:
with _core.CancelScope() as cs:
cs.cancel()
with pytest.raises(_core.Cancelled):
await nursery.start(just_started)
await nursery.start(checkpoint_before_started)

# but if the call to start() is effectively cancelled when the child calls
# started(), it is too late to deliver the start() cancellation to the
# child, so the start() call returns and the child continues in the nursery.
async def no_checkpoint_before_started(
eventual_parent_nursery: _core.Nursery,
*,
task_status: _core.TaskStatus[None] = _core.TASK_STATUS_IGNORED,
) -> None:
assert _core.current_task().eventual_parent_nursery is eventual_parent_nursery
task_status.started()
assert _core.current_task().eventual_parent_nursery is None
assert _core.current_task().parent_nursery is eventual_parent_nursery
# This task is no longer in an effectively cancelled scope, so it should
# be able to pass through a checkpoint.
await _core.checkpoint()
raise KeyError(f"I should come out of {eventual_parent_nursery!r}")

with pytest.raises(KeyError): # noqa: PT012
async with _core.open_nursery() as nursery:
with _core.CancelScope() as cs:
cs.cancel()
try:
await nursery.start(no_checkpoint_before_started, nursery)
except KeyError as exc:

Check warning on line 1850 in src/trio/_core/_tests/test_run.py

View check run for this annotation

Codecov / codecov/patch

src/trio/_core/_tests/test_run.py#L1850

Added line #L1850 was not covered by tests
raise AssertionError() from exc # pragma: no cover
assert not cs.cancelled_caught

# calling started() while handling a Cancelled raises an error immediately.
async def started_while_handling_cancelled(
task_status: _core.TaskStatus[None] = _core.TASK_STATUS_IGNORED,
) -> None:
try:
await _core.checkpoint()
except _core.Cancelled:
task_status.started()
raise AssertionError() # pragma: no cover

async with _core.open_nursery() as nursery:
with _core.CancelScope() as cs:
cs.cancel()
with pytest.raises(RuntimeError):
await nursery.start(started_while_handling_cancelled)

# but if the task does not execute any checkpoints, and exits, then start()
# doesn't raise Cancelled, since the task completed successfully.
async def started_with_no_checkpoint(
*, task_status: _core.TaskStatus[None] = _core.TASK_STATUS_IGNORED
# calling started() while handling multiple Cancelleds raises an error
# immediately.
async def started_while_handling_multiple_cancelled(
task_status: _core.TaskStatus[None] = _core.TASK_STATUS_IGNORED,
) -> None:
task_status.started(None)
with catch({_core.Cancelled: lambda _: task_status.started()}):
async with _core.open_nursery() as nursery:
nursery.start_soon(_core.checkpoint)
nursery.start_soon(_core.checkpoint)
raise AssertionError() # pragma: no cover

async with _core.open_nursery() as nursery:
with _core.CancelScope() as cs:
cs.cancel()
await nursery.start(started_with_no_checkpoint)
assert not cs.cancelled_caught

# and since starting in a cancelled context makes started() a no-op, if
# the child crashes after calling started(), the error can *still* come
# out of start()
async def raise_keyerror_after_started(
*, task_status: _core.TaskStatus[None] = _core.TASK_STATUS_IGNORED
with pytest.raises(RuntimeError):
await nursery.start(started_while_handling_multiple_cancelled)

# calling started() while handling an exception while handling Cancelled(s) raises an error immediately.
async def started_while_handling_exc_while_handling_cancelled(
task_status: _core.TaskStatus[None] = _core.TASK_STATUS_IGNORED,
) -> None:
task_status.started()
raise KeyError("whoopsiedaisy")
try:
await _core.checkpoint()
except _core.Cancelled:
try:
raise ValueError
except ValueError:
task_status.started()
raise AssertionError() # pragma: no cover

async with _core.open_nursery() as nursery:
with _core.CancelScope() as cs:
cs.cancel()
with pytest.raises(KeyError):
await nursery.start(raise_keyerror_after_started)
with pytest.raises(RuntimeError):
await nursery.start(started_while_handling_exc_while_handling_cancelled)

# trying to start in a closed nursery raises an error immediately
# trying to start in a closed nursery raises an error immediately.
async with _core.open_nursery() as closed_nursery:
pass
t0 = _core.current_time()
Expand Down
Loading