diff --git a/sweepai/core/prompts.py b/sweepai/core/prompts.py index 0c213ff76e..2002ed8280 100644 --- a/sweepai/core/prompts.py +++ b/sweepai/core/prompts.py @@ -297,6 +297,142 @@ [Your explanation of why this plan was chosen and how it aligns with the guidelines and any modications made to this plan] """ +plan_review_prompt = """Now you are to double check the plan you have generated using the following guidelines: +- Name the types for each variable you are using/creating. You can do this by adding comments above each line of code. +- Double check that the attributes you are accessing/modifying for classes and variables actually exist. Comment where in the code each attribute is defined. + +FOCUS ONLY ON THE GUIDELINES AND YOUR EVALUATION + +List out all mistakes/errors you spot, and list out how to fix them. If the mistake/error is currently not fixable due to a lack of context, explain what you would need in order to be able to fi the issue. If there are no errors simply continue to the next check. +After you have made your evaluation you will be given a chance to correct these mistakes at a later date. +Respond in the following xml format: + + + +[Describe the mistake and how to fix it. If the mistake is not fixable due to a lack of context, explain what you would need to fix it.] + +... + +[Describe the mistake and how to fix it. If the mistake is not fixable due to a lack of context, explain what you would need to fix it.] + + + + +[A comma separated list of file paths to fetch the contents of. Make sure that the file paths are correct and spelled correct. Incorrectly spelled file paths will not be fetched, if you don't include this tag, it is assumed you do not need any extra code files to be fetched] + + + +['yes' if you did not find any mistakes and the original plan is good to go as is. 'no' otherwise. If you don't include this tag, it will be assumed that the original plan was good to go] +""" + +planning_tool_for_eval_view = """ + +view_file + +View a full code file in the code base. + + + +justification +str + +Explain why you need to view this file specifically. + + + +file_name +str + +Name of the file that you want to view. Ensure correct spelling as this is case-sensitive. + + + + +""" + +planning_tool_for_eval_submit = """ + +submit_final_plan + +Once you are certain all issues have been resolved, submit the final revised plan. + + + +explanation +Explain how this final plan differs from the original (if any) and why it is ready to submit. +str + + + + +final_plan +str + +Your final plan revised should go here. Make sure fix ALL the mistakes you identified while still solving the user request. Be sure that the final plan is still in the correct xml format. +Here is an example: + +[Example instructions here] + +... + +[More example instructions here] + +[Your explanation of why this plan was chosen and how it aligns with the guidelines and any modications made to this plan] + + + +""" + +planning_tools_for_eval = {"submit_final_plan": planning_tool_for_eval_submit, + "view_file": planning_tool_for_eval_view} + +planning_tools_prompt = """Now that you have evaluated the plan, you will take the necessary steps in order to improve the plan. +In this environment, you have access to the following tools to assist in fulfilling the user request: + +You MUST call them like this: + + +$TOOL_NAME + +<$PARAMETER_NAME>$PARAMETER_VALUE +... + + + + +Here are the tools available: +""" + +planning_redo_prompt = """ +Here are the code files that you have requested as further context to improve the plan: + +{code_files} + + +With this new context, you are to improve the inital plan created by fixing all the issues that were discovered in the evaluation of the plan. +Remember to respond in the following xml format: + + + +Instructions for creating the new file. Reference imports and entity names. Include relevant type definitions, interfaces, schemas. + +[additional creates] + + +Instructions for modifying one section of the file. If you reference specific lines of code, code must be copied VERBATIM from the file. Never paraphrase code or comments. +Include relevant type definitions, interfaces, schemas. + + + +Instructions for modifying a different section of the same file. If you reference specific lines of code, code must be copied VERBATIM from the file. Never paraphrase code or comments. +Use multiple blocks for the same file to separate distinct changes. + + +[additional modifies as needed, for the same file or different files] + + +MAKE SURE TO ADDRESS ALL ISSUES/ERRORS FOUND IN THE EVALUATION OF THE PLAN""" + context_files_to_change_prompt = """Your job is to write a high quality, detailed, step-by-step plan for an intern to help resolve a user's GitHub issue. You will analyze the provided code snippets, repository, and GitHub issue to understand the requested change. Create a step-by-step plan for an intern to fully resolve the user's GitHub issue. The plan should utilize the relevant code snippets and utility modules provided. Give detailed instructions for updating the code logic, as the intern is unfamiliar with the codebase. diff --git a/sweepai/core/sweep_bot.py b/sweepai/core/sweep_bot.py index 9eb331607a..5e05707741 100644 --- a/sweepai/core/sweep_bot.py +++ b/sweepai/core/sweep_bot.py @@ -11,6 +11,7 @@ from networkx import Graph from pydantic import BaseModel +from sweepai.agents.modify import validate_and_parse_function_call from sweepai.agents.modify_file import modify_file from sweepai.config.client import SweepConfig, get_blocked_dirs, get_branch_name_config from sweepai.config.server import DEFAULT_GPT4_32K_MODEL, DEFAULT_GPT35_MODEL @@ -33,9 +34,14 @@ context_files_to_change_prompt, pull_request_prompt, subissues_prompt, - files_to_change_system_prompt + files_to_change_system_prompt, + plan_review_prompt, + planning_tools_for_eval, + planning_tools_prompt, + planning_redo_prompt ) from sweepai.utils.chat_logger import ChatLogger, discord_log_error +from sweepai.utils.convert_openai_anthropic import AnthropicFunctionCall from sweepai.utils.progress import ( AssistantAPIMessage, AssistantConversation, @@ -46,6 +52,33 @@ from sweepai.utils.github_utils import ClonedRepo, commit_multi_file_changes BOT_ANALYSIS_SUMMARY = "bot_analysis_summary" +MODEL = "claude-3-opus-20240229" + +NO_FUNCTION_CALL = """ERROR!\n\nNo function call was made. If you attempted to make a function call but failed retry again but with the correct xml format. +If you are finished with fixing the issues with the plan you can submit the final plan by using the `submit_final_plan` tool. +An example is given below: + + + + + +[Explanation of why this plan was chosen, what issues were fixed (if any) and how it solves the original problem] + + + +[Example instructions here] + +... + +[More example instructions here] + +[Your explanation of why this plan was chosen and how it aligns with the guidelines and any modications made to this plan] + + + + + +""" def to_raw_string(s): @@ -174,7 +207,7 @@ def organize_snippets(snippets: list[Snippet], fuse_distance: int=600) -> list[S def get_max_snippets( snippets: list[Snippet], budget: int = 150_000 * 3.5, # 140k tokens - expand: int = 300, + expand: int = 3000, # testing expand ): """ Start with max number of snippets and then remove then until the budget is met. @@ -187,6 +220,159 @@ def get_max_snippets( return proposed_snippets raise Exception("Budget number of chars too low!") +def parse_xml_tag_from_string(tag: str, string: str) -> str: + match = re.search(f"<{tag}>(.*?)", string, re.DOTALL) + return match.group(1) if match else None + +# handles function calls made by planning +def handle_planning_function_call( + function_call: AnthropicFunctionCall, + llm_state: dict[str, str | bool | list[str]], + cloned_repo: ClonedRepo): + tool_name = function_call.function_name + tool_call = function_call.function_parameters + if tool_name == "submit_final_plan": + llm_state["done"] = True + final_plan = tool_call["final_plan"].strip("\n") + return final_plan, llm_state + elif tool_name == "view_file": + file_path = tool_call["file_name"].strip() # strip ALL whitespace + try: + file_contents = cloned_repo.get_file_contents(file_path) + success_message = f'SUCCESS!\n\nFile {file_path} found in the codebase. Here are the contents:\n\n\n{file_contents}\n' + return success_message, llm_state + except FileNotFoundError: + import pdb; pdb.set_trace() + error_message = f"ERROR!\n\nFile {file_path} not found in the codebase." + return error_message, llm_state + else: + available_tools = ", ".join(llm_state["available_tools"]) + error_message = f"ERROR!\n\nUnknown tool {tool_name}:\n\nYou have access to the following tools only:\n{available_tools}\n\nMake sure you respond with the correct xml format." + return error_message, llm_state + +# iterate on the initial plan to improve it using an agent +def iterate_on_plan(chat_gpt: ChatGPT, cloned_repo: ClonedRepo, chat_logger: ChatLogger = None): + # keep track of state + llm_state = { + "done": False, + "available_tools": ["view_file", "submit_final_plan"] + } + # make initial function call + planning_tools_prompt_string = planning_tools_prompt + # give agent what tools it has available + for tool in llm_state["available_tools"]: + planning_tools_prompt_string += f'\n\n{planning_tools_for_eval[tool]}' + function_calls_string = chat_gpt.chat_anthropic( + content=planning_tools_prompt_string, + model=MODEL, + stop_sequences=[""], + temperature=0.1 + ) + final_plan = "" + # max 10 iterations anymore probably means something has gone wrong. + max_iterations = 10 + for i in range(max_iterations): + function_call = validate_and_parse_function_call(function_calls_string, chat_gpt) + if function_call: + function_output, llm_state = handle_planning_function_call(function_call, llm_state, cloned_repo) + # check if we are done + if llm_state["done"]: + # update chat logger + if chat_logger: + chat_logger.add_chat( + { + "model": MODEL, + "messages": [{"role": message.role, "content": message.content} for message in chat_gpt.messages], + "output": f"We are done! Here is the final output:\n\n{function_output}", + } + ) + final_plan = function_output + break + # get the next function call + function_calls_string = chat_gpt.chat_anthropic( + content=function_output, + model=MODEL, + stop_sequences=[""], + temperature=0.1 + ) + else: + # get the next function call + function_calls_string = chat_gpt.chat_anthropic( + content=NO_FUNCTION_CALL, + model=MODEL, + stop_sequences=[""], + temperature=0.1 + ) + + if chat_logger: + output_message = function_call + if i == max_iterations - 1: + output_message += f"\n\nMAX ITERATIONS REACHED!" + + chat_logger.add_chat( + { + "model": MODEL, + "messages": [{"role": message.role, "content": message.content} for message in chat_gpt.messages], + "output": output_message, + } + ) + return final_plan + +# parse the output of the evaluation of the planning +def parse_planning_evaluation(evaluation: str) -> tuple[list[str], bool]: + initial_plan_is_good = parse_xml_tag_from_string("initial_plan_is_good", evaluation) + code_files_to_fetch = parse_xml_tag_from_string("code_files_to_fetch", evaluation) + initial_plan_is_good = False if initial_plan_is_good and 'no' in initial_plan_is_good else True # default to True if no tag is found + code_files_to_fetch = [file.strip() for file in code_files_to_fetch.split(",")] if code_files_to_fetch and code_files_to_fetch.strip() else [] # default to empty list if no tag is found + return code_files_to_fetch, initial_plan_is_good + +def planning_qc_pipeline(chat_gpt: ChatGPT, cloned_repo: ClonedRepo, chat_logger: ChatLogger = None): + # get initial evaluation + initial_evaluation = chat_gpt.chat_anthropic( + content=plan_review_prompt, + model=MODEL, + temperature=0.1 + ) + + if chat_logger: + chat_logger.add_chat( + { + "model": MODEL, + "messages": [{"role": message.role, "content": message.content} for message in chat_gpt.messages], + "output": initial_evaluation, + }) + # based on results of evaluation, iterate on the plan + # first parse the initial evaluation to see if there are any code files we need to fetch + code_files_to_fetch, initial_plan_is_good = parse_planning_evaluation(initial_evaluation) + if initial_plan_is_good: + return "" # return if no fixes are needed + fetched_code_files = {} + for code_file in code_files_to_fetch: + try: + fetched_code_file = cloned_repo.get_file_contents(code_file) + fetched_code_files[code_file] = fetched_code_file + except FileNotFoundError: + pass + formatted_code_files = "" + for code_file, fetched_code_file in fetched_code_files.items(): + formatted_code_files += f'\n\n{fetched_code_file}\n\n' + # now we get a new plan + formatted_planning_redo_prompt = planning_redo_prompt.format(code_files=formatted_code_files) + final_plan = chat_gpt.chat_anthropic( + content=formatted_planning_redo_prompt, + model=MODEL, + temperature=0.1 + ) + if chat_logger: + chat_logger.add_chat( + { + "model": MODEL, + "messages": [{"role": message.role, "content": message.content} for message in chat_gpt.messages], + "output": final_plan, + }) + return final_plan + +# get the plan and fcrs for the change def get_files_to_change( relevant_snippets: list[Snippet], read_only_snippets: list[Snippet], @@ -257,7 +443,7 @@ def get_files_to_change( Message( role="user", content=relevant_snippets_message, - key="relevant_snippets", + key="relevant_code_files", ) ) joined_relevant_read_only_snippets = "\n".join( @@ -328,12 +514,18 @@ def get_files_to_change( ), ], ) - MODEL = "claude-3-opus-20240229" + # get initial plan files_to_change_response = chat_gpt.chat_anthropic( content=joint_message + "\n\n" + (files_to_change_prompt if not context else context_files_to_change_prompt), model=MODEL, temperature=0.1 ) + + final_plan = planning_qc_pipeline(chat_gpt, cloned_repo, chat_logger=chat_logger) + if final_plan: + files_to_change_response = final_plan # update our final plan + + # files_to_change_response = iterate_on_plan(chat_gpt, cloned_repo, chat_logger=chat_logger) if chat_logger: chat_logger.add_chat( {