Refactor monologue and SWE agent to use the messages in state history (…

…OpenDevin#1863) * Refactor monologue to use the messages in state history * add messages, clean up * fix monologue * update integration tests * move private method * update SWE agent to use the history from State * integration tests for SWE agent * rename monologue to initial_thoughts, since that is what it is
super-dainiu · May 23, 2024 · 0eccf31 · 0eccf31
1 parent 3235836
commit 0eccf31
Show file tree

Hide file tree

Showing 19 changed files with 187 additions and 184 deletions.
diff --git a/agenthub/SWE_agent/agent.py b/agenthub/SWE_agent/agent.py
@@ -6,7 +6,6 @@
     FileWriteAction,
     MessageAction,
 )
-from opendevin.events.observation import Observation
 from opendevin.events.serialization.event import event_to_memory
 from opendevin.llm.llm import LLM
 
@@ -33,15 +32,9 @@ def __init__(self, llm: LLM):
         super().__init__(llm)
         self.memory_window = 4
         self.max_retries = 2
-        self.running_memory: list[str] = []
         self.cur_file: str = ''
         self.cur_line: int = 0
 
-    def _remember(self, action: Action, observation: Observation) -> None:
-        """Agent has a limited memory of the few steps implemented as a queue"""
-        memory = MEMORY_FORMAT(event_to_memory(action), event_to_memory(observation))
-        self.running_memory.append(memory)
-
     def _think_act(self, messages: list[dict]) -> tuple[Action, str]:
         resp = self.llm.do_completion(
             messages=messages,
@@ -69,24 +62,36 @@ def step(self, state: State) -> Action:
             2. Perform think-act - prompt model for action and reasoning
             3. Catch errors - ensure model takes action (5 attempts max)
         """
-        for prev_action, obs in state.updated_info:
-            self._remember(prev_action, obs)
+        # retrieve short term memories from state.history, up to memory_window
+        memory_window = min(self.memory_window, len(state.history))
+        running_memory: list[str] = []
+        for prev_action, obs in state.history[-memory_window:]:
+            running_memory.append(
+                MEMORY_FORMAT(event_to_memory(prev_action), event_to_memory(obs))
+            )
 
         goal = state.get_current_user_intent()
+
+        # always in the prompt if they exist: file and line
         prompt = STEP_PROMPT(goal, self.cur_file, self.cur_line)
 
+        # prepare messages
         msgs = [
             {'content': SYSTEM_MESSAGE, 'role': 'system'},
             {'content': prompt, 'role': 'user'},
         ]
 
-        if len(self.running_memory) > 0:
-            context = CONTEXT_PROMPT(self.running_memory, self.memory_window)
+        # insert memories
+        if len(running_memory) > 0:
+            context = CONTEXT_PROMPT(running_memory, self.memory_window)
             msgs.insert(1, {'content': context, 'role': 'user'})
         # clrs = [''] * (len(msgs)-2) + ['\033[0;36m', '\033[0;35m']
         # print('\n\n'.join([c+m['content']+'\033[0m' for c, m in zip(clrs, msgs)]))
+
+        # send it over
         action, thought = self._think_act(messages=msgs)
 
+        # be robust with malformed responses
         start_msg_len = len(msgs)
         while not action and len(msgs) < self.max_retries + start_msg_len:
             error = NO_ACTION(thought)
@@ -102,9 +107,9 @@ def step(self, state: State) -> Action:
         return action
 
     def search_memory(self, query: str) -> list[str]:
-        return [item for item in self.running_memory if query in item]
+        # return [item for item in self.running_memory if query in item]
+        raise NotImplementedError('Search_memory not implemented currently')
 
     def reset(self) -> None:
         """Used to reset the agent"""
-        self.running_memory = []
         super().reset()
diff --git a/agenthub/SWE_agent/prompts.py b/agenthub/SWE_agent/prompts.py
@@ -92,7 +92,7 @@
 - To execute multiple commands you should write them down in your thoughts section so you can remember it on the next step and execute them then.
 - The only commands you are not capable of executing are interactive commands like `python` or `node` by themselves.
 - If you think that you have completed the task that has been given to you based on your previous actions and outputs then use ``` exit ``` as the command to let the system know that you are done.
-- DO NOT make any copies of your previous memories those will be provided to you at each step, making copies just wastes time and energy. Think smarter not harder.
+- DO NOT make any copies of your previous memories, those will be provided to you at each step, making copies just wastes time and energy. Think smarter not harder.
 - The write and edit commands requires proper indentation in the content section ex. `write hw.py def hello():\n    print(\'Hello World\')` this is how you would have to format your write command.
     - The white spaces matter as the code changes will be added to the code so they must have proper syntax.
 
@@ -115,7 +115,7 @@
 
 SYSTEM_MESSAGE = f"""SYSTEM INFO:
 You are an autonomous coding agent, here to provide solutions for coding issues.
-You have been designed to assist you with a wide range of programming tasks, from code editing and debugging to testing and deployment.
+You have been designed to assist with a wide range of programming tasks, from code editing and debugging to testing and deployment.
 You have access to a variety of tools and commands that you can use to help you solve problems efficiently.
 
 {GENERAL_GUIDELINES}

diff --git a/agenthub/monologue_agent/agent.py b/agenthub/monologue_agent/agent.py
@@ -1,4 +1,5 @@
 import agenthub.monologue_agent.utils.prompts as prompts
+from agenthub.monologue_agent.utils.prompts import INITIAL_THOUGHTS
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
 from opendevin.core.config import config
@@ -25,59 +26,13 @@
 from opendevin.events.serialization.event import event_to_memory
 from opendevin.llm.llm import LLM
 from opendevin.memory.condenser import MemoryCondenser
-from opendevin.memory.history import ShortTermHistory
 
 if config.agent.memory_enabled:
     from opendevin.memory.memory import LongTermMemory
 
 MAX_TOKEN_COUNT_PADDING = 512
 MAX_OUTPUT_LENGTH = 5000
 
-INITIAL_THOUGHTS = [
-    'I exist!',
-    'Hmm...looks like I can type in a command line prompt',
-    'Looks like I have a web browser too!',
-    "Here's what I want to do: $TASK",
-    'How am I going to get there though?',
-    'It seems like I have some kind of short term memory.',
-    'Each of my thoughts seems to be stored in a JSON array.',
-    'It seems whatever I say next will be added as an object to the list.',
-    'But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.',
-    'Fortunately I have long term memory!',
-    'I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!',
-    "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
-    "Let's try it out!",
-    'RECALL what it is I want to do',
-    "Here's what I want to do: $TASK",
-    'How am I going to get there though?',
-    "Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
-    'RUN echo "hello world"',
-    'hello world',
-    'Cool! I bet I can write files too using the write action.',
-    'WRITE echo "console.log(\'hello world\')" > test.js',
-    '',
-    "I just created test.js. I'll try and run it now.",
-    'RUN node test.js',
-    'hello world',
-    'It works!',
-    "I'm going to try reading it now using the read action.",
-    'READ test.js',
-    "console.log('hello world')",
-    'Nice! I can read files too!',
-    'And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument',
-    "Let's try that...",
-    'BROWSE google.com',
-    '<form><input type="text"></input><button type="submit"></button></form>',
-    'I can browse the web too!',
-    'And once I have completed my task, I can use the finish action to stop working.',
-    "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
-    'Very cool. Now to accomplish my task.',
-    "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
-    'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
-    "OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
-    'It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.',
-]
-
 
 class MonologueAgent(Agent):
     VERSION = '1.0'
@@ -88,57 +43,19 @@ class MonologueAgent(Agent):
     """
 
     _initialized = False
-    monologue: ShortTermHistory
+    initial_thoughts: list[dict[str, str]]
     memory: 'LongTermMemory | None'
     memory_condenser: MemoryCondenser
 
     def __init__(self, llm: LLM):
         """
-        Initializes the Monologue Agent with an llm, monologue, and memory.
+        Initializes the Monologue Agent with an llm.
 
         Parameters:
         - llm (LLM): The llm to be used by this agent
         """
         super().__init__(llm)
 
-    def _add_event(self, event_dict: dict):
-        """
-        Adds a new event to the agent's monologue and memory.
-        Monologue automatically condenses when it gets too large.
-
-        Parameters:
-        - event (dict): The event that will be added to monologue and memory
-        """
-
-        if (
-            'args' in event_dict
-            and 'output' in event_dict['args']
-            and len(event_dict['args']['output']) > MAX_OUTPUT_LENGTH
-        ):
-            event_dict['args']['output'] = (
-                event_dict['args']['output'][:MAX_OUTPUT_LENGTH] + '...'
-            )
-
-        self.monologue.add_event(event_dict)
-        if self.memory is not None:
-            self.memory.add_event(event_dict)
-
-        # Test monologue token length
-        prompt = prompts.get_request_action_prompt(
-            '',
-            self.monologue.get_events(),
-            [],
-        )
-        messages = [{'content': prompt, 'role': 'user'}]
-        token_count = self.llm.get_token_count(messages)
-
-        if token_count + MAX_TOKEN_COUNT_PADDING > self.llm.max_input_tokens:
-            prompt = prompts.get_summarize_monologue_prompt(self.monologue.events)
-            summary_response = self.memory_condenser.condense(
-                summarize_prompt=prompt, llm=self.llm
-            )
-            self.monologue.events = prompts.parse_summary_response(summary_response)
-
     def _initialize(self, task: str):
         """
         Utilizes the INITIAL_THOUGHTS list to give the agent a context for its capabilities
@@ -159,7 +76,7 @@ def _initialize(self, task: str):
         if task is None or task == '':
             raise AgentNoInstructionError()
 
-        self.monologue = ShortTermHistory()
+        self.initial_thoughts = []
         if config.agent.memory_enabled:
             self.memory = LongTermMemory()
         else:
@@ -188,7 +105,7 @@ def _add_initial_thoughts(self, task):
                     observation = BrowserOutputObservation(
                         content=thought, url='', screenshot=''
                     )
-                self._add_event(event_to_memory(observation))
+                self.initial_thoughts.append(event_to_memory(observation))
                 previous_action = ''
             else:
                 action: Action = NullAction()
@@ -215,7 +132,7 @@ def _add_initial_thoughts(self, task):
                     previous_action = ActionType.BROWSE
                 else:
                     action = MessageAction(thought)
-                self._add_event(event_to_memory(action))
+                self.initial_thoughts.append(event_to_memory(action))
 
     def step(self, state: State) -> Action:
         """
@@ -230,25 +147,75 @@ def step(self, state: State) -> Action:
 
         goal = state.get_current_user_intent()
         self._initialize(goal)
-        for prev_action, obs in state.updated_info:
-            self._add_event(event_to_memory(prev_action))
-            self._add_event(event_to_memory(obs))
 
-        state.updated_info = []
+        recent_events: list[dict[str, str]] = []
+
+        # add the events from state.history
+        for prev_action, obs in state.history:
+            if not isinstance(prev_action, NullAction):
+                recent_events.append(event_to_memory(prev_action))
+            if not isinstance(obs, NullObservation):
+                recent_events.append(self._truncate_output(event_to_memory(obs)))
 
+        # add the last messages to long term memory
+        if self.memory is not None and state.history and len(state.history) > 0:
+            self.memory.add_event(event_to_memory(state.history[-1][0]))
+            self.memory.add_event(
+                self._truncate_output(event_to_memory(state.history[-1][1]))
+            )
+
+        # the action prompt with initial thoughts and recent events
         prompt = prompts.get_request_action_prompt(
             goal,
-            self.monologue.get_events(),
+            self.initial_thoughts,
+            recent_events,
             state.background_commands_obs,
         )
-        messages = [{'content': prompt, 'role': 'user'}]
+
+        messages: list[dict[str, str]] = [
+            {'role': 'user', 'content': prompt},
+        ]
+
+        # format all as a single message, a monologue
         resp = self.llm.do_completion(messages=messages)
+
+        # get the next action from the response
         action_resp = resp['choices'][0]['message']['content']
+
+        # keep track of max_chars fallback option
         state.num_of_chars += len(prompt) + len(action_resp)
+
         action = prompts.parse_action_response(action_resp)
         self.latest_action = action
         return action
 
+    def _truncate_output(
+        self, observation: dict, max_chars: int = MAX_OUTPUT_LENGTH
+    ) -> dict[str, str]:
+        """
+        Truncates the output of an observation to a maximum number of characters.
+
+        Parameters:
+        - output (str): The observation whose output to truncate
+        - max_chars (int): The maximum number of characters to allow
+
+        Returns:
+        - str: The truncated output
+        """
+        if (
+            'args' in observation
+            and 'output' in observation['args']
+            and len(observation['args']['output']) > max_chars
+        ):
+            output = observation['args']['output']
+            half = max_chars // 2
+            observation['args']['output'] = (
+                output[:half]
+                + '\n[... Output truncated due to length...]\n'
+                + output[-half:]
+            )
+        return observation
+
     def search_memory(self, query: str) -> list[str]:
         """
         Uses VectorIndexRetriever to find related memories within the long term memory.