Skip to content

Commit

Permalink
Task: detect no-answer loop when allow_null_result=True (#504)
Browse files Browse the repository at this point in the history
  • Loading branch information
pchalasani committed Jun 16, 2024
1 parent 1a31f93 commit 5599fa4
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 19 deletions.
45 changes: 30 additions & 15 deletions langroid/agent/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,16 +184,18 @@ def __init__(
Note: erasing can reduce prompt sizes, but results in repetitive
sub-task delegation.
allow_null_result (bool):
If true, allow null (empty or NO_ANSWER) as the result of a step or
overall task result.
If true, create dummy NO_ANSWER response when no valid response is found
in a step.
Optional, default is False.
*Note:* In non-interactive mode, when this is set to True,
you can have a situation where an LLM generates (non-tool) text,
and no other responders have valid responses, and a "Null result"
is inserted as a dummy response from the User entity, so the LLM
will now respond to this Null result, and this will continue
until the LLM emits a DONE signal (if instructed to do so),
otherwise it can result in an infinite loop.
otherwise langroid detects a potential infinite loop after
a certain number of such steps (= `TaskConfig.inf_loop_wait_factor`)
and will raise an InfiniteLoopException.
max_stalled_steps (int): task considered done after this many consecutive
steps with no progress. Default is 3.
done_if_no_response (List[Responder]): consider task done if NULL
Expand Down Expand Up @@ -1047,15 +1049,10 @@ async def step_async(self, turns: int = -1) -> ChatDocument | None:
self._show_pending_message_if_debug()
return self.pending_message

def _process_valid_responder_result(
self,
r: Responder,
parent: ChatDocument | None,
result: ChatDocument,
) -> None:
"""Processes valid result from a responder, during a step"""
def _update_no_answer_vars(self, result: ChatDocument) -> None:
"""Update variables related to NO_ANSWER responses, to aid
in alternating NO_ANSWER infinite-loop detection."""

# in case the valid response was a NO_ANSWER,
if NO_ANSWER in result.content:
if self._no_answer_step == self._step_idx - 2:
# N/A two steps ago
Expand All @@ -1067,6 +1064,16 @@ def _process_valid_responder_result(
# record the last step where the best explicit response was N/A
self._no_answer_step = self._step_idx

def _process_valid_responder_result(
self,
r: Responder,
parent: ChatDocument | None,
result: ChatDocument,
) -> None:
"""Processes valid result from a responder, during a step"""

self._update_no_answer_vars(result)

# pending_sender is of type Responder,
# i.e. it is either one of the agent's entities
# OR a sub-task, that has produced a valid response.
Expand Down Expand Up @@ -1131,6 +1138,7 @@ def _process_invalid_step_result(self, parent: ChatDocument | None) -> None:
metadata=ChatDocMetaData(sender=responder, parent_id=parent_id),
)
self.pending_sender = responder
self._update_no_answer_vars(self.pending_message)
self.log_message(self.pending_sender, self.pending_message, mark=True)

def _show_pending_message_if_debug(self) -> None:
Expand Down Expand Up @@ -1346,13 +1354,20 @@ def _is_done_response(
def _maybe_infinite_loop(self) -> bool:
"""
Detect possible infinite loop based on message frequencies.
NOTE: This only (attempts to) detect "exact" loops, i.e. a cycle
of messages that repeats exactly, e.g.
NOTE: This detects two types of loops:
- Alternating NO_ANSWER loops, specifically of the form
x1 NO_ANSWER x2 NO_ANSWER x3 NO_ANSWER...
(e.g. an LLM repeatedly saying something different, and another responder
or sub-task saying NO_ANSWER -- i.e. "DO-NOT-KNOW")
- "exact" loops, i.e. a cycle of messages that repeats exactly, e.g.
a r b i t r a t e r a t e r a t e r a t e ...
[It does not detect "approximate" loops, where the LLM is generating a
sequence of messages that are similar, but not exactly the same.]
[It does not detect more general "approximate" loops, where two entities are
responding to each other potentially forever, with (slightly) different
messages each time]
Here is the logic for the exact-loop detection:
Intuition: when you look at a sufficiently long sequence with an m-message
loop, then the frequencies of these m messages will "dominate" those
of all other messages.
Expand Down
24 changes: 20 additions & 4 deletions tests/main/test_task_inf_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,19 +86,35 @@ def user_response(


def test_task_stall():
"""Test that task.run() bails when stalled"""
"""Test that task.run() bails when stalled, i.e. no valid response
for many steps."""

agent = ChatAgent(
ChatAgentConfig(
name="Random",
llm=MockLMConfig(response_fn=lambda x: choice(["1", "2", "3"])),
llm=MockLMConfig(
response_fn=lambda x: choice([str(x) for x in range(30)]),
),
)
)

alice_task = lr.Task(agent, interactive=False)
result = alice_task.run(turns=100)
# interactive=False, so in each step,
# other than LLM, other responders have no response -> stalled
task = lr.Task(agent, interactive=False)
result = task.run(turns=100)
assert result is None

# set allow_null_result=True, so in each step, when no valid response is found,
# we create a dummy NO_ANSWER response from the entity "opposite" to the author
# of the pending message, i.e.
# - if the author was LLM, then the entity is USER
# - if the author was not LLM, then the entity is LLM
# But this should result in an "alternating NA infinite loop", i.e.
# LLM says x1, then USER says NA, then LLM says x2, then USER says NA, ...
task = lr.Task(agent, restart=True, interactive=False, allow_null_result=True)
with pytest.raises(lr.InfiniteLoopException):
task.run(turns=100)


def test_task_alternating_no_answer():
"""Test that task.run() bails when there's a long enough
Expand Down

0 comments on commit 5599fa4

Please sign in to comment.