Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 61 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,66 @@
openai==1.107.3
aiohappyeyeballs==2.6.1
aiohttp==3.12.15
aiosignal==1.4.0
annotated-types==0.7.0
anthropic==0.75.0
python-dotenv==1.1.1
anyio==4.10.0
attrs==25.3.0
bashlex==0.18
certifi==2025.8.3
charset-normalizer==3.4.3
click==8.3.0
coverage==7.11.3
datasets==4.5.0
dill==0.4.0
distro==1.9.0
docker==7.1.0
docstring_parser==0.17.0
fastapi==0.116.1
uvicorn==0.35.0
filelock==3.20.3
frozenlist==1.7.0
fsspec==2025.10.0
h11==0.16.0
hf-xet==1.2.0
httpcore==1.0.9
httpx==0.28.1
huggingface_hub==1.3.2
idna==3.10
iniconfig==2.1.0
jiter==0.11.0
markdown-it-py==4.0.0
mdurl==0.1.2
multidict==6.6.4
multiprocess==0.70.18
numpy==1.26.4
openai==1.107.3
packaging==25.0
pandas==3.0.0
pexpect==4.9.0
pluggy==1.6.0
propcache==0.3.2
ptyprocess==0.7.0
pyarrow==23.0.0
pydantic==2.11.9
pydantic_core==2.33.2
Pygments==2.19.2
pytest==8.4.2
pytest-cov==7.0.0
python-dateutil==2.9.0.post0
python-dotenv==1.1.1
python-multipart==0.0.20
PyYAML==6.0.2
requests==2.32.5
rich==14.1.0
shellingham==1.5.4
six==1.17.0
sniffio==1.3.1
starlette==0.47.3
swe-rex==1.4.0
aiohttp==3.13.3
pyyaml==6.0.2
tqdm==4.67.1
typer-slim==0.21.1
typing-inspection==0.4.1
typing_extensions==4.15.0
urllib3==2.5.0
uvicorn==0.35.0
xxhash==3.6.0
yarl==1.20.1
92 changes: 4 additions & 88 deletions src/microbots/MicroBot.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
You are a helpful agent well versed in software development and debugging.

You will be provided with a coding or debugging task to complete inside a sandboxed shell environment.
There is a shell session open for you. You will be provided with a task and you should achieve it using the shell commands.
There is a shell session open for you.
You will be provided with a task and you should achieve it using the shell commands.
All your response must be in the following json format:
{llm_output_format_str}
The properties ( task_done, thoughts, command ) are mandatory on each response.
Expand All @@ -45,22 +46,6 @@
1. If a command fails, analyze the error message and provide an alternative command in your next response. Same command will not pass again.
2. Avoid using recursive commands like 'ls -R', 'rm -rf', 'tree', or 'find' without depth limits as they can produce excessive output or be destructive.
3. You cannot run any interactive commands like vim, nano, etc.

# TOOLS
You have following special tools.

1. summarize_context: Use this tool if your input has too many irrelevant conversation turns.
You can use this tool to rewrite your own context in a concise manner focusing on important points only. For example, if you have a failed command output which you've solved in later steps deep down in the conversation, that is not required to be in the context. You can summarize the context to remove such irrelevant information. This tool will not update the system prompt. So, you can ignore details in the system prompt while summarizing.

Usage:
summarize_context <no_of_recent_turns_to_keep> "<your summary of the context>"

<no_of_recent_turns_to_keep> : Number of recent conversation turns to keep as is without summarizing. 1 means last user-assistant pair will be kept as is.
"<your summary of the context>" : Your summarized context in double quotes. The summary can be empty if you finished a sub-task and want to remove previous context.

Important Notes:
- The summarize tool call step will not be added to your history.
- Try to be very precise and concise in your summary.
"""


Expand Down Expand Up @@ -212,7 +197,7 @@ def run(
f" 💭 LLM thoughts: {LogTextColor.OKCYAN}{llm_response.thoughts}{LogTextColor.ENDC}",
)
logger.info(
f" ➡️ LLM tool call : {LogTextColor.OKBLUE}{llm_response.command}{LogTextColor.ENDC}",
f" ➡️ LLM tool call : {LogTextColor.OKBLUE}{json.dumps(llm_response.command)}{LogTextColor.ENDC}",
)
# increment iteration count
iteration_count += 1
Expand All @@ -233,19 +218,6 @@ def run(
return_value.error = f"Timeout of {timeout} seconds reached"
return return_value

# Handle context summarization
if llm_response.command.startswith("summarize_context"):
parsed_args = self._parse_summarize_context_command(llm_response.command)
if parsed_args is None:
# Invalid syntax - ask LLM to correct it
error_msg = self._get_summarize_context_syntax_error(llm_response.command)
llm_response = self.llm.ask(error_msg)
continue
last_n_messages, summary = parsed_args
last_msg = self.llm.summarize_context(last_n_messages=last_n_messages, summary=summary)
llm_response = self.llm.ask(last_msg["content"])
continue

# Validate command for dangerous operations
is_safe, explanation = self._is_safe_command(llm_response.command)
if not is_safe:
Expand All @@ -269,7 +241,7 @@ def run(
# HACK: anthropic-text-editor tool extra formats the output
try:
output_json = json.loads(llm_command_output.stdout)
if isinstance(output_json, dict) and "content" in output_json:
if "content" in output_json:
output_text = pformat(output_json["content"])
except json.JSONDecodeError:
pass
Expand Down Expand Up @@ -356,62 +328,6 @@ def _validate_folder_to_mount(self, folder_to_mount: Mount):
"Only MOUNT mount type is supported for folder_to_mount"
)

def _parse_summarize_context_command(self, command: str) -> tuple[int, str] | None:
"""
Parse the summarize_context command and extract arguments.

Expected format: summarize_context <n> "<summary>"
Where <n> is an integer and <summary> is a quoted string.

Returns:
tuple[int, str]: (last_n_messages, summary) if valid
None: if invalid syntax
"""
import shlex
try:
# Use shlex to properly handle quoted strings
parts = shlex.split(command)
if len(parts) < 2 or len(parts) > 3:
return None

# First part should be 'summarize_context'
if parts[0] != "summarize_context":
return None

# Second part should be an integer
try:
last_n_messages = int(parts[1])
except ValueError:
return None

# Third part is the summary (optional, defaults to empty string)
summary = parts[2] if len(parts) > 2 else ""

return (last_n_messages, summary)

except ValueError:
# shlex.split can raise ValueError for malformed strings
return None

def _get_summarize_context_syntax_error(self, command: str) -> str:
"""
Generate an error message for invalid summarize_context syntax.

Returns a detailed error message guiding the LLM to use correct syntax.
"""
return f"""COMMAND_ERROR: Invalid summarize_context syntax.
Your command: {command}

Correct usage:
summarize_context <no_of_recent_turns_to_keep> "<your summary of the context>"

Examples:
summarize_context 2 "Summary of previous work: explored files and found the bug"
summarize_context 0 ""
summarize_context 5 "Completed file exploration, ready to make changes"

Please send the command again with correct syntax."""

def _get_dangerous_command_explanation(self, command: str) -> Optional[str]:
"""Provides detailed explanation for why a command is dangerous and suggests alternatives.

Expand Down
7 changes: 6 additions & 1 deletion src/microbots/llm/anthropic_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3
base_url=endpoint
)
self.deployment_name = deployment_name
super().__init__(system_prompt=system_prompt, max_retries=max_retries)
self.system_prompt = system_prompt
self.messages = []

# Set these values here. This logic will be handled in the parent class.
self.max_retries = max_retries
self.retries = 0

def ask(self, message) -> LLMAskResponse:
self.retries = 0 # reset retries for each ask. Handled in parent class.
Expand Down
55 changes: 1 addition & 54 deletions src/microbots/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,6 @@ class LLMAskResponse:
command: str = ""

class LLMInterface(ABC):
def __init__(self, system_prompt: str, max_retries: int = 3):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LLMInterface.__init__ was removed but _validate_llm_response still relies on self.retries, self.max_retries, and self.messages. This means any new subclass that forgets to manually initialize these will get an error at runtime, nothing in the interface signals they're required.
Maybe its better to restore a base __init__ and have subclasses call super().__init__(). The only subclass-specific part is whether messages includes a system prompt entry that can be handled after the super call by appending to messges in the subclass.This also removes the duplicated 3 lines in each subclass.

self.system_prompt = system_prompt
self.max_retries = max_retries
self.retries = 0
self.messages = [
{
"role": "system",
"content": self.system_prompt,
}
]

@abstractmethod
def ask(self, message: str) -> LLMAskResponse:
pass
Expand All @@ -55,7 +44,7 @@ def _validate_llm_response(self, response: str) -> tuple[bool, LLMAskResponse]:
return False, None

if all(key in response_dict for key in LLMAskResponse.__annotations__.keys()):
logger.debug("The llm response is %s ", response_dict)
logger.info("The llm response is %s ", response_dict)

if response_dict.get("task_done") not in [True, False]:
self.retries += 1
Expand Down Expand Up @@ -95,45 +84,3 @@ def _validate_llm_response(self, response: str) -> tuple[bool, LLMAskResponse]:
logger.warning("LLM response is missing required fields. Retrying... (%d/%d)", self.retries, self.max_retries)
self.messages.append({"role": "user", "content": "LLM_RES_ERROR: LLM response is missing required fields. Please respond in the correct JSON format.\n" + llm_output_format_str})
return False, None

def summarize_context(self, last_n_messages: int = 10, summary: str="") -> dict:
"""
It is a helper function for the LLM to summarize its own context.
Leave the last N messages and add the summary between system prompt and the last N messages.

summary can be empty. If empty, empty summary will be added.
"""
logger.debug("Messages : %s", self.messages)
# Keep the system prompt
msg0 = self.messages[0]["content"]
# Pop the last message which asked for summarization
self.messages.pop()
# Get the last N conversations (user + assistant)
# If there are not enough messages, take all except system prompt
if (len(self.messages) > (last_n_messages*2 - 1)):
logger.debug("Summarizing last %d messages", last_n_messages)
recent_messages = self.messages[-(last_n_messages*2 - 1):]
else:
logger.debug("Not enough messages to summarize, taking all except system prompt")
recent_messages = self.messages[1:]
logger.debug("Recent messages that will not be summarized: %s", recent_messages)

# Update system prompt if it already has a summary
# summary will be between __summary__ and __end_summary__
if "__summary__" in msg0:
system_prompt = msg0.split("__summary__")[0]
old_summary = msg0.split("__end_summary__")[0].split("__summary__")[1]
logger.debug("Old summary found: %s", old_summary)
combined_summary = old_summary + "\n" + summary
else:
system_prompt = msg0
combined_summary = summary

new_system_prompt = f"{system_prompt}\n__summary__\n{combined_summary}\n__end_summary__"

# Append without previous user message
self.messages = [{"role": "system", "content": new_system_prompt}] + recent_messages[:-1]
logger.debug("Context summarized. New system prompt: %s", new_system_prompt)

logger.debug("Last message before summarization: %s", recent_messages[-1])
return recent_messages[-1] # return the last user message that given before summarization
6 changes: 5 additions & 1 deletion src/microbots/llm/ollama_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,15 @@ class OllamaLocal(LLMInterface):
def __init__(self, system_prompt, model_name=None, model_port=None, max_retries=3):
self.model_name = model_name or os.environ.get("LOCAL_MODEL_NAME")
self.model_port = model_port or os.environ.get("LOCAL_MODEL_PORT")
self.system_prompt = system_prompt
self.messages = [{"role": "system", "content": system_prompt}]

if not self.model_name or not self.model_port:
raise ValueError("LOCAL_MODEL_NAME and LOCAL_MODEL_PORT environment variables must be set or passed as arguments to OllamaLocal.")

super().__init__(system_prompt=system_prompt, max_retries=max_retries)
# Set these values here. This logic will be handled in the parent class.
self.max_retries = max_retries
self.retries = 0

def ask(self, message) -> LLMAskResponse:
self.retries = 0 # reset retries for each ask. Handled in parent class.
Expand Down
9 changes: 7 additions & 2 deletions src/microbots/llm/openai_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,14 @@
class OpenAIApi(LLMInterface):

def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3):
self.deployment_name = deployment_name
self.ai_client = OpenAI(base_url=f"{endpoint}", api_key=api_key)
super().__init__(system_prompt=system_prompt, max_retries=max_retries)
self.deployment_name = deployment_name
self.system_prompt = system_prompt
self.messages = [{"role": "system", "content": system_prompt}]

# Set these values here. This logic will be handled in the parent class.
self.max_retries = max_retries
self.retries = 0

def ask(self, message) -> LLMAskResponse:
self.retries = 0 # reset retries for each ask. Handled in parent class.
Expand Down
Loading