Task: detect no-answer loop when allow_null_result=True (#504)

langroid · Jun 16, 2024 · 5599fa4 · 5599fa4
1 parent 1a31f93
commit 5599fa4
Show file tree

Hide file tree

Showing 2 changed files with 50 additions and 19 deletions.
diff --git a/langroid/agent/task.py b/langroid/agent/task.py
@@ -184,16 +184,18 @@ def __init__(
                 Note: erasing can reduce prompt sizes, but results in repetitive
                 sub-task delegation.
             allow_null_result (bool):
-                If true, allow null (empty or NO_ANSWER) as the result of a step or
-                overall task result.
+                If true, create dummy NO_ANSWER response when no valid response is found
+                in a step.
                 Optional, default is False.
                 *Note:* In non-interactive mode, when this is set to True,
                 you can have a situation where an LLM generates (non-tool) text,
                 and no other responders have valid responses, and a "Null result"
                 is inserted as a dummy response from the User entity, so the LLM
                 will now respond to this Null result, and this will continue
                 until the LLM emits a DONE signal (if instructed to do so),
-                otherwise it can result in an infinite loop.
+                otherwise langroid detects a potential infinite loop after
+                a certain number of such steps (= `TaskConfig.inf_loop_wait_factor`)
+                and will raise an InfiniteLoopException.
             max_stalled_steps (int): task considered done after this many consecutive
                 steps with no progress. Default is 3.
             done_if_no_response (List[Responder]): consider task done if NULL
@@ -1047,15 +1049,10 @@ async def step_async(self, turns: int = -1) -> ChatDocument | None:
         self._show_pending_message_if_debug()
         return self.pending_message
 
-    def _process_valid_responder_result(
-        self,
-        r: Responder,
-        parent: ChatDocument | None,
-        result: ChatDocument,
-    ) -> None:
-        """Processes valid result from a responder, during a step"""
+    def _update_no_answer_vars(self, result: ChatDocument) -> None:
+        """Update variables related to NO_ANSWER responses, to aid
+        in alternating NO_ANSWER infinite-loop detection."""
 
-        # in case the valid response was a NO_ANSWER,
         if NO_ANSWER in result.content:
             if self._no_answer_step == self._step_idx - 2:
                 # N/A two steps ago
@@ -1067,6 +1064,16 @@ def _process_valid_responder_result(
             # record the last step where the best explicit response was N/A
             self._no_answer_step = self._step_idx
 
+    def _process_valid_responder_result(
+        self,
+        r: Responder,
+        parent: ChatDocument | None,
+        result: ChatDocument,
+    ) -> None:
+        """Processes valid result from a responder, during a step"""
+
+        self._update_no_answer_vars(result)
+
         # pending_sender is of type Responder,
         # i.e. it is either one of the agent's entities
         # OR a sub-task, that has produced a valid response.
@@ -1131,6 +1138,7 @@ def _process_invalid_step_result(self, parent: ChatDocument | None) -> None:
                 metadata=ChatDocMetaData(sender=responder, parent_id=parent_id),
             )
             self.pending_sender = responder
+            self._update_no_answer_vars(self.pending_message)
         self.log_message(self.pending_sender, self.pending_message, mark=True)
 
     def _show_pending_message_if_debug(self) -> None:
@@ -1346,13 +1354,20 @@ def _is_done_response(
     def _maybe_infinite_loop(self) -> bool:
         """
         Detect possible infinite loop based on message frequencies.
-        NOTE: This only (attempts to) detect "exact" loops, i.e. a cycle
-        of messages that repeats exactly, e.g.
+        NOTE: This detects two types of loops:
+        - Alternating NO_ANSWER loops, specifically of the form
+        x1 NO_ANSWER x2 NO_ANSWER x3 NO_ANSWER...
+        (e.g. an LLM repeatedly saying something different, and another responder
+        or sub-task saying NO_ANSWER -- i.e. "DO-NOT-KNOW")
+
+        - "exact" loops, i.e. a cycle of messages that repeats exactly, e.g.
         a r b i t r a t e r a t e r a t e r a t e ...
 
-        [It does not detect "approximate" loops, where the LLM is generating a
-        sequence of messages that are similar, but not exactly the same.]
+        [It does not detect more general "approximate" loops, where two entities are
+        responding to each other potentially forever, with (slightly) different
+        messages each time]
 
+        Here is the logic for the exact-loop detection:
         Intuition: when you look at a sufficiently long sequence with an m-message
         loop, then the frequencies of these m messages will "dominate" those
         of all other messages.

diff --git a/tests/main/test_task_inf_loop.py b/tests/main/test_task_inf_loop.py
@@ -86,19 +86,35 @@ def user_response(
 
 
 def test_task_stall():
-    """Test that task.run() bails when stalled"""
+    """Test that task.run() bails when stalled, i.e. no valid response
+    for many steps."""
 
     agent = ChatAgent(
         ChatAgentConfig(
             name="Random",
-            llm=MockLMConfig(response_fn=lambda x: choice(["1", "2", "3"])),
+            llm=MockLMConfig(
+                response_fn=lambda x: choice([str(x) for x in range(30)]),
+            ),
         )
     )
 
-    alice_task = lr.Task(agent, interactive=False)
-    result = alice_task.run(turns=100)
+    # interactive=False, so in each step,
+    # other than LLM, other responders have no response -> stalled
+    task = lr.Task(agent, interactive=False)
+    result = task.run(turns=100)
     assert result is None
 
+    # set allow_null_result=True, so in each step, when no valid response is found,
+    # we create a dummy NO_ANSWER response from the entity "opposite" to the author
+    # of the pending message, i.e.
+    # - if the author was LLM, then the entity is USER
+    # - if the author was not LLM, then the entity is LLM
+    # But this should result in an "alternating NA infinite loop", i.e.
+    # LLM says x1, then USER says NA, then LLM says x2, then USER says NA, ...
+    task = lr.Task(agent, restart=True, interactive=False, allow_null_result=True)
+    with pytest.raises(lr.InfiniteLoopException):
+        task.run(turns=100)
+
 
 def test_task_alternating_no_answer():
     """Test that task.run() bails when there's a long enough