fix user addressing issues (#505)

* fix user addressing issues * improve NO_ANSWER detection, init of no_answer_step * mv alternating-no-answer loop detect outside maybe_inf_loop() * redis pool max_connections * redis pool max_connections 500 * reorder tests * reorder tests * update pytest.yml * pytest.yml syntax * pytest use -ra for short test smry * pytest.yml adj flags * conftest.py redis_close_connections() * pytest wkfl use -v during coverage * pytest restrict logs to 1 line * pytest --show-capture=no * pyproject.toml pytest cfg * adjust pytest output * pytest.yml accumulate coverage across runs * conftest.py session_finish hook to kill pytest when done * pytest continue on err, to get coverage * conftest exit pytest w status 0 * conftest sessionfinish hook exit status * nullify endorsement
langroid · Jun 21, 2024 · 5a24de3 · 5a24de3
1 parent ef54081
commit 5a24de3
Show file tree

Hide file tree

Showing 11 changed files with 228 additions and 21 deletions.
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -101,44 +101,57 @@ jobs:
 
     - name: Run ALL tests with coverage (GPT-4o)
       id: first_test
+      env:
+        PYTEST_ADDOPTS: "-p no:logging"
       run: |
-        poetry run coverage run --source=langroid -m pytest \
+        poetry run coverage run --source=langroid \
+          -m pytest -rf --tb=no --show-capture=no \
           --first-test-file=tests/main/test_task_inf_loop.py \
           --first-test-file=tests/main/test_task.py \
           --first-test-file=tests/main/test_lance_doc_chat_agent.py \
+          --first-test-file=tests/main/test_llm.py \
+          --first-test-file=tests/main/test_llm_async.py \
           tests/main tests/extras/test_hf_embeddings.py
-        poetry run coverage report
       continue-on-error: true
 
+
     - name: Retry FAILED tests ONLY with gpt-4 instead of gpt-4o (no coverage)
       id: second_test
       if: steps.first_test.outcome == 'failure'
+      env:
+        PYTEST_ADDOPTS: "-p no:logging"
       run: |
-        poetry run pytest --m gpt-4 \
+        poetry run coverage run --append --source=langroid \
+          -m pytest --m gpt-4  -rf --tb=no --show-capture=no \
           --first-test-file=tests/main/test_task_inf_loop.py \
           --first-test-file=tests/main/test_task.py \
           --first-test-file=tests/main/test_lance_doc_chat_agent.py \
+          --first-test-file=tests/main/test_llm.py \
+          --first-test-file=tests/main/test_llm_async.py \
           --lf --last-failed-no-failures none \
           tests/main tests/extras/test_hf_embeddings.py
-        poetry run coverage report
       continue-on-error: true
 
     - name: Install Pydantic 1.x to pass lancedb tests
       run: poetry add "pydantic<2.0.0"
     - name: Retry FAILED tests ONLY with pydantic 1.x, gpt-4
       if: steps.second_test.outcome == 'failure'
       run: |
-        poetry run pytest --m gpt-4 \
+        poetry run coverage run --append --source=langroid \
+          -m pytest --m gpt-4 \
           --first-test-file=tests/main/test_task_inf_loop.py \
           --first-test-file=tests/main/test_task.py \
           --first-test-file=tests/main/test_lance_doc_chat_agent.py \
+          --first-test-file=tests/main/test_llm.py \
+          --first-test-file=tests/main/test_llm_async.py \
           --lf --last-failed-no-failures none \
           tests/main tests/extras/test_hf_embeddings.py
-        poetry run coverage report
+
+    - name: Generate final coverage report from all runs
+      run: poetry run coverage report
 
     - name: Generate XML coverage report
-      run: |
-        poetry run coverage xml
+      run: poetry run coverage xml
     - name: Upload coverage to Codecov
       uses: codecov/codecov-action@v2
       with:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
   # Ruff version.
-  rev: v0.4.9
+  rev: v0.4.10
   hooks:
     - id: ruff
diff --git a/README.md b/README.md
@@ -46,6 +46,16 @@ This Multi-Agent paradigm is inspired by the
 `Langroid` is a fresh take on LLM app-development, where considerable thought has gone 
 into simplifying the developer experience; it does not use `Langchain`.
 
+Companies are using/adapting Langroid in production. Here is a quote from one of them:
+
+>[Nullify](https://www.nullify.ai) uses AI Agents for secure software development. 
+> It finds, prioritizes and fixes vulnerabilities. We have internally adapted Langroid's multi-agent orchestration framework in production, after evaluating CrewAI, Autogen, LangChain, Langflow, etc. We found Langroid to be far superior to those frameworks in terms of ease of setup and flexibility. Langroid's Agent and Task abstractions are intuitive, well thought out, and provide a great developer  experience. We wanted the quickest way to get something in production. 
+With other frameworks it would have taken us weeks, but with Langroid we got to 
+good results in minutes. Highly recommended!
+<br> 
+-- Jacky Wong, Head of AI at Nullify.
+
+
 :fire: See this [Intro to Langroid](https://lancedb.substack.com/p/langoid-multi-agent-programming-framework)
 blog post from the LanceDB team
 

diff --git a/docs/assets/nullify-logo.jpg b/docs/assets/nullify-logo.jpg
diff --git a/docs/assets/nullify-logo.png b/docs/assets/nullify-logo.png
diff --git a/examples/basic/plan-subtasks.py b/examples/basic/plan-subtasks.py
@@ -0,0 +1,114 @@
+"""
+Planner agent receives a math calculation expression from user,
+involving + - * / ops, with possible parentheses. Planner has no math abilities,
+so it needs to create a plan of elementary operations to compute the result,
+and send each step to the appropriate helper agent, who will return the result.
+
+Run like this:
+
+python3 examples/basic/plan-subtasks.py
+
+When it waits for user input, try asking things like:
+
+- (10 + 2)/6 - 1
+- 3*(4+1) - 3
+
+"""
+
+import langroid as lr
+from langroid.utils.constants import NO_ANSWER, DONE
+
+planner = lr.ChatAgent(
+    lr.ChatAgentConfig(
+        name="Planner",
+        system_message=f"""
+        User will give you a math calculation, but you have no math abilities.
+        However you are a great planner, so your task is to do two things:
+        
+        1. CREATE a PLAN of what 
+          sequence of ELEMENTARY operations (ONLY add/subtract, multiply/divide) need
+          to performed, in order to compute what the user asked for.
+        2. EMIT the needed operations, ONE BY ONE, and wait for the answer from
+            each, before emitting the next operation. Since you cannot directly
+            calculate these, you will have to SEND the needed operations to 
+            specific helpers, as follows:
+            
+            * Send Multiplication operation to `Multiplier`
+            * Send Add operation to `Adder`
+            * Send Subtract operation to `Subtractor`
+            * Send Divide operation to `Divider`
+            
+            To clarify who you are sending the message to, preface your message with
+            @<helper_name>, e.g. "@Multiplier multiply with 5" 
+            
+            When you have the final answer, say {DONE} and show it.
+            
+            At the START, ask the user what they need help with, address them as "@user"
+            
+        EXAMPLE: 
+        ============
+        User: please calculate (4*5 + 1)/3
+        Assistant (You): 
+            PLAN: 
+                1. multiply 4 with 5
+                2. add 1 to the result
+                3. divide result by 3
+            @Multiplier multiply 4 with 5
+            [... wait for result, then show your NEW PLAN and send a new request]
+            and so on.                         
+                        
+        """,
+    )
+)
+
+adder = lr.ChatAgent(
+    lr.ChatAgentConfig(
+        name="Adder",
+        system_message=f"""
+        If you receive an Add request, return the result,
+        otherwise say {NO_ANSWER}.
+        """,
+    )
+)
+
+multiplier = lr.ChatAgent(
+    lr.ChatAgentConfig(
+        name="Multiplier",
+        system_message=f"""
+        If you receive a Multiply request, return the result,
+        otherwise say {NO_ANSWER}.
+        """,
+    )
+)
+
+subtractor = lr.ChatAgent(
+    lr.ChatAgentConfig(
+        name="Subtractor",
+        system_message=f"""
+        If you receive a Subtraction request, return the result,
+        otherwise say {NO_ANSWER}.
+        """,
+    )
+)
+
+divider = lr.ChatAgent(
+    lr.ChatAgentConfig(
+        name="Divider",
+        system_message=f"""
+        If you receive a Division request, return the result,
+        otherwise say {NO_ANSWER}.
+        """,
+    )
+)
+
+
+planner_task = lr.Task(planner, interactive=False)
+adder_task = lr.Task(adder, interactive=False, single_round=True)
+multiplier_task = lr.Task(multiplier, interactive=False, single_round=True)
+divider_task = lr.Task(divider, interactive=False, single_round=True)
+subtractor_task = lr.Task(subtractor, interactive=False, single_round=True)
+
+planner_task.add_sub_task([adder_task, multiplier_task, divider_task, subtractor_task])
+
+
+planner_task.run()
diff --git a/langroid/agent/task.py b/langroid/agent/task.py
@@ -3,6 +3,7 @@
 import asyncio
 import copy
 import logging
+import re
 import threading
 from collections import Counter, deque
 from pathlib import Path
@@ -256,7 +257,7 @@ def __init__(
         # how many 2-step-apart alternations of no_answer step-result have we had,
         # i.e. x1, N/A, x2, N/A, x3, N/A ...
         self.n_no_answer_alternations = 0
-        self._no_answer_step: int = -1
+        self._no_answer_step: int = -5
         self._step_idx = -1  # current step index
         self.max_stalled_steps = max_stalled_steps
         self.done_if_response = [r.value for r in done_if_response]
@@ -579,7 +580,7 @@ def run(
             self.reset_all_sub_tasks()
 
         self.n_stalled_steps = 0
-        self._no_answer_step = -1  # last step where the best explicit response was N/A
+        self._no_answer_step = -5  # last step where the best explicit response was N/A
         # how many N/A alternations have we had so far? (for Inf loop detection)
         self.n_no_answer_alternations = 0
         self.max_cost = max_cost
@@ -639,6 +640,7 @@ def run(
                 self.config.inf_loop_cycle_len > 0
                 and i % self.config.inf_loop_cycle_len == 0
                 and self._maybe_infinite_loop()
+                or self.n_no_answer_alternations > self.config.inf_loop_wait_factor
             ):
                 raise InfiniteLoopException(
                     """Possible infinite loop detected!
@@ -704,7 +706,7 @@ async def run_async(
             self.reset_all_sub_tasks()
 
         self.n_stalled_steps = 0
-        self._no_answer_step = -1  # last step where the best explicit response was N/A
+        self._no_answer_step = -5  # last step where the best explicit response was N/A
         # how many N/A alternations have we had so far? (for Inf loop detection)
         self.n_no_answer_alternations = 0
         self.max_cost = max_cost
@@ -761,6 +763,7 @@ async def run_async(
                 self.config.inf_loop_cycle_len > 0
                 and i % self.config.inf_loop_cycle_len == 0
                 and self._maybe_infinite_loop()
+                or self.n_no_answer_alternations > self.config.inf_loop_wait_factor
             ):
                 raise InfiniteLoopException(
                     """Possible infinite loop detected!
@@ -880,7 +883,6 @@ def step(self, turns: int = -1) -> ChatDocument | None:
 
         if (
             Entity.USER in self.responders
-            and self.interactive
             and not self.human_tried
             and not self.agent.has_tool_message_attempt(self.pending_message)
         ):
@@ -932,7 +934,7 @@ def step(self, turns: int = -1) -> ChatDocument | None:
             if self.is_done:
                 # skip trying other responders in this step
                 break
-        if not found_response:  # did not find a Non-NO_ANSWER response
+        if not found_response:  # did not find a valid response
             if no_answer_response:
                 # even though there was no valid response from anyone in this step,
                 # if there was at least one who EXPLICITLY said NO_ANSWER, then
@@ -988,7 +990,6 @@ async def step_async(self, turns: int = -1) -> ChatDocument | None:
 
         if (
             Entity.USER in self.responders
-            and self.interactive
             and not self.human_tried
             and not self.agent.has_tool_message_attempt(self.pending_message)
         ):
@@ -1385,8 +1386,6 @@ def _maybe_infinite_loop(self) -> bool:
            If the set of last (W * m) messages are the same as the
            set of m dominant messages,  then we are likely in a loop.
         """
-        if self.n_no_answer_alternations > self.config.inf_loop_wait_factor:
-            return True
 
         max_cycle_len = self.config.inf_loop_cycle_len
         if max_cycle_len <= 0:
@@ -1453,7 +1452,7 @@ def done(
         result = result or self.pending_message
         user_quit = (
             result is not None
-            and result.content in USER_QUIT_STRINGS
+            and (result.content in USER_QUIT_STRINGS or DONE in result.content)
             and result.metadata.sender == Entity.USER
         )
         if self._level == 0 and self.interactive and self.only_user_quits_root:
@@ -1524,7 +1523,9 @@ def valid(
         return (
             result is not None
             and not self._is_empty_message(result)
-            and result.content.strip() != NO_ANSWER
+            # some weaker LLMs, including even GPT-4o, may say "DO-NOT-KNOW."
+            # (with a punctuation at the end), so need to strip out punctuation
+            and re.sub(r"[,.!?:]", "", result.content.strip()) != NO_ANSWER
         )
 
     def log_message(
@@ -1605,7 +1606,7 @@ def _recipient_mismatch(self, e: Responder) -> bool:
         return (
             self.pending_message is not None
             and (recipient := self.pending_message.metadata.recipient) != ""
-            and recipient != e  # case insensitive
+            and not (recipient == e)  # case insensitive for entities
             and recipient != e.name
             and recipient != self.name  # case sensitive
         )

diff --git a/langroid/cachedb/redis_cachedb.py b/langroid/cachedb/redis_cachedb.py
@@ -54,7 +54,7 @@ def __init__(self, config: RedisCacheConfig):
                     host=redis_host,
                     port=redis_port,
                     password=redis_password,
-                    max_connections=50,
+                    max_connections=500,
                     socket_timeout=5,
                     socket_keepalive=True,
                     retry_on_timeout=True,

diff --git a/pyproject.toml b/pyproject.toml
@@ -231,3 +231,4 @@ lint.extend-ignore = ["F821"]
 
 [tool.pytest.ini_options]
 filterwarnings = ["ignore::DeprecationWarning"]
+
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,11 +1,29 @@
 import os
+import threading
 
 import pytest
 
+from langroid.cachedb.redis_cachedb import RedisCache, RedisCacheConfig
 from langroid.language_models import OpenAIChatModel
 from langroid.utils.configuration import Settings
 
 
+def pytest_sessionfinish(session, exitstatus):
+    """Hook to terminate pytest forcefully after displaying all test stats."""
+
+    def terminate():
+        if exitstatus == 0:
+            print("All tests passed. Exiting cleanly.")
+            os._exit(0)  # Exit code 0 for success
+        else:
+            print("Some tests failed. Exiting with error.")
+            os._exit(1)  # Exit code 1 for error
+
+    # Set a timer that will terminate pytest after a set delay
+    # Delay allows all finalizers and plugins to complete normally
+    threading.Timer(60, terminate).start()  # 60 seconds delay
+
+
 def pytest_addoption(parser) -> None:
     parser.addoption(
         "--show",
@@ -109,3 +127,15 @@ def pytest_collection_modifyitems(config, items):
 
     # Replace the items list with priority items first, followed by others
     items[:] = priority_items + other_items
+
+
+@pytest.fixture(autouse=True)
+def redis_close_connections():
+    """Close all redis connections after each test fn, to avoid
+    max connections exceeded error."""
+
+    # Setup code here (if necessary)
+    yield  # Yield to test execution
+    # Cleanup code here
+    redis = RedisCache(RedisCacheConfig(fake=False))
+    redis.close_all_connections()