From ce9972681e24a7530e4d1d38373aa4f29cbf4acd Mon Sep 17 00:00:00 2001 From: TIANYOU CHEN <42710806+CTY-git@users.noreply.github.com> Date: Mon, 24 Mar 2025 10:36:36 +0800 Subject: [PATCH 1/4] Add database agent --- .../multiturn_strategy/planning_strategy.py | 203 ++++++++++++++++++ patchwork/common/tools/db_query_tool.py | 37 ++++ patchwork/steps/AgenticLLM/typed.py | 18 +- patchwork/steps/CallLLM/typed.py | 18 +- .../steps/DatabaseAgent/DatabaseAgent.py | 86 ++++++++ patchwork/steps/DatabaseAgent/__init__.py | 0 patchwork/steps/DatabaseAgent/typed.py | 36 ++++ patchwork/steps/FixIssue/typed.py | 18 +- patchwork/steps/GitHubAgent/GitHubAgent.py | 3 +- patchwork/steps/GitHubAgent/typed.py | 18 +- patchwork/steps/LLM/typed.py | 18 +- patchwork/steps/ManageEngineAgent/__init__.py | 0 patchwork/steps/SendEmail/SendEmail.py | 8 +- patchwork/steps/SendEmail/typed.py | 2 +- patchwork/steps/SimplifiedLLM/typed.py | 18 +- patchwork/steps/SimplifiedLLMOnce/typed.py | 18 +- poetry.lock | 6 +- pyproject.toml | 2 +- 18 files changed, 472 insertions(+), 37 deletions(-) create mode 100644 patchwork/common/multiturn_strategy/planning_strategy.py create mode 100644 patchwork/common/tools/db_query_tool.py create mode 100644 patchwork/steps/DatabaseAgent/DatabaseAgent.py create mode 100644 patchwork/steps/DatabaseAgent/__init__.py create mode 100644 patchwork/steps/DatabaseAgent/typed.py create mode 100644 patchwork/steps/ManageEngineAgent/__init__.py diff --git a/patchwork/common/multiturn_strategy/planning_strategy.py b/patchwork/common/multiturn_strategy/planning_strategy.py new file mode 100644 index 000000000..2fafc0a6e --- /dev/null +++ b/patchwork/common/multiturn_strategy/planning_strategy.py @@ -0,0 +1,203 @@ +import asyncio +from functools import partial +from typing import Any, Optional, Union + +from pydantic import BaseModel +from pydantic_ai import Agent +from pydantic_ai.agent import AgentRunResult + +from patchwork.common.client.llm.protocol import LlmClient +from patchwork.common.client.llm.utils import example_json_to_base_model +from patchwork.common.tools import Tool + + +class StepCompletedResult(BaseModel): + is_step_completed: bool + + +class PlanCompletedResult(BaseModel): + is_plan_completed: bool + + +class ExecutionResult(BaseModel): + json_data: str + message: str + is_completed: bool + + +class _Plan: + def __init__(self, initial_plan: Optional[list[str]] = None): + self.__plan = initial_plan or [] + self.__cursor = 0 + + def advance(self) -> bool: + self.__cursor += 1 + return self.__cursor < len(self.__plan) + + def is_empty(self) -> bool: + return len(self.__plan) == 0 + + def register_steps(self, agent: Agent): + agent.tool_plain(self.get_current_plan) + agent.tool_plain(self.get_current_step) + agent.tool_plain(self.get_current_step_index) + agent.tool_plain(self.add_step) + agent.tool_plain(self.delete_step) + + def get_current_plan(self) -> str: + return "\n".join([f"{i}. {step}" for i, step in enumerate(self.__plan)]) + + def get_current_step(self) -> str: + if len(self.__plan) == 0: + return "There is currently no plan" + + return self.__plan[self.__cursor] + + def get_current_step_index(self) -> int: + return self.__cursor + + def add_step(self, index: int, step: str) -> str: + if index < 0: + return "index cannot be a negative number" + + if index >= len(self.__plan): + insertion_func = self.__plan.append + else: + insertion_func = partial(self.__plan.insert, index) + + insertion_func(step) + return "Added step\nCurrent plan:\n" + self.get_current_plan() + + def delete_step(self, step: str) -> str: + try: + i = self.__plan.index(step) + self.__plan.pop(i) + return self.get_current_plan() + except ValueError: + return "Step not found in plan\nCurrent plan:\n" + self.get_current_plan() + + +class PlanningStrategy: + def __init__( + self, + llm_client: LlmClient, + planner_system_prompt: str, + executor_system_prompt: str, + executor_tool_set: dict[str, Tool], + example_json: Union[str, dict[str, Any]] = '{"output":"output text"}', + ): + self.planner = Agent( + llm_client, + name="Planner", + system_prompt=planner_system_prompt, + model_settings=dict( + parallel_tool_calls=False, + model="gemini-2.0-flash", + ), + ) + + self.plan = _Plan() + self.plan.register_steps(self.planner) + + self.executor = Agent( + llm_client, + name="Executor", + system_prompt=executor_system_prompt, + result_type=ExecutionResult, + tools=[tool.to_pydantic_ai_function_tool() for tool in executor_tool_set.values()], + model_settings=dict( + parallel_tool_calls=False, + model="gemini-2.0-flash", + ), + ) + + self.__summariser = Agent( + llm_client, + result_retries=5, + system_prompt="""\ +Please summarise the conversation given and provide the result in the structure that is asked of you. +""", + result_type=example_json_to_base_model(example_json), + model_settings=dict( + parallel_tool_calls=False, + model="gemini-2.0-flash", + ), + ) + + self.reset() + + def reset(self): + self.__request_tokens = 0 + self.__response_tokens = 0 + + def usage(self): + return { + "request_tokens": self.__request_tokens, + "response_tokens": self.__response_tokens, + } + + def __agent_run(self, agent: Agent, prompt: str, **kwargs) -> AgentRunResult[Any]: + planner_response = agent.run_sync(prompt, **kwargs) + self.__request_tokens += planner_response.usage().request_tokens + self.__response_tokens += planner_response.usage().response_tokens + return planner_response + + def run(self, task: str, conversation_limit: int = 10) -> dict: + loop = asyncio.new_event_loop() + + planner_response = self.__agent_run(self.planner, f"Produce the initial plan for {task}") + planner_history = planner_response.all_messages() + if self.plan.is_empty(): + planner_response = self.__agent_run( + self.planner, f"Please use the tools provided to setup the plan", message_history=planner_history + ) + planner_history = planner_response.all_messages() + + for i in range(conversation_limit): + step = self.plan.get_current_step() + executor_prompt = f"Please execute the following task: {step}" + response = self.__agent_run(self.executor, executor_prompt) + + plan_str = self.plan.get_current_plan() + step_index = self.plan.get_current_step_index() + planner_prompt = f"""\ +The current plan is: +{plan_str} + +We are current at {step_index}. +If the current step is not completed, edit the current step. + +The execution result for the step {step_index} is: +{response.data} + +""" + planner_response = self.__agent_run( + self.planner, + planner_prompt, + message_history=planner_history, + result_type=StepCompletedResult, + ) + planner_history = planner_response.all_messages() + if not planner_response.data.is_step_completed: + continue + + if self.plan.advance(): + continue + + planner_response = self.__agent_run( + self.planner, + "Is the task completed? If the task is not completed please add more steps using the tools provided.", + message_history=planner_history, + result_type=PlanCompletedResult, + ) + if planner_response.data.is_plan_completed: + break + + final_result = self.__agent_run( + self.__summariser, + "From the actions taken by the assistant. Please give me the result.", + message_history=planner_history, + ) + + loop.close() + return final_result.data.dict() diff --git a/patchwork/common/tools/db_query_tool.py b/patchwork/common/tools/db_query_tool.py new file mode 100644 index 000000000..67163f952 --- /dev/null +++ b/patchwork/common/tools/db_query_tool.py @@ -0,0 +1,37 @@ +from typing_extensions import Any, Union + +from patchwork.common.tools import Tool +from patchwork.steps import CallSQL + + +class DatabaseQueryTool(Tool, tool_name="db_query_tool"): + def __init__(self, inputs: dict[str, Any]): + super().__init__() + self.db_settings = inputs.copy() + + @property + def json_schema(self) -> dict: + return { + "name": "db_query_tool", + "description": """\ +Run SQL Query on current database. +""", + "input_schema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Database query to run.", + } + }, + "required": ["query"], + }, + } + + def execute(self, query: str) -> Union[list[dict[str, Any]], str]: + db_settings = self.db_settings.copy() + db_settings["db_query"] = query + try: + return CallSQL(db_settings).run().get("results", []) + except Exception as e: + return str(e) diff --git a/patchwork/steps/AgenticLLM/typed.py b/patchwork/steps/AgenticLLM/typed.py index 52cd04673..081d06b5f 100644 --- a/patchwork/steps/AgenticLLM/typed.py +++ b/patchwork/steps/AgenticLLM/typed.py @@ -11,10 +11,14 @@ class AgenticLLMInputs(TypedDict, total=False): user_prompt: str max_llm_calls: Annotated[int, StepTypeConfig(is_config=True)] openai_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"] + ), ] anthropic_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]) + str, + StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]), ] patched_api_key: Annotated[ str, @@ -31,10 +35,16 @@ class AgenticLLMInputs(TypedDict, total=False): ), ] google_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"] + ), ] client_is_gcp: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"] + ), ] diff --git a/patchwork/steps/CallLLM/typed.py b/patchwork/steps/CallLLM/typed.py index 6ddafe36c..f3a051f94 100644 --- a/patchwork/steps/CallLLM/typed.py +++ b/patchwork/steps/CallLLM/typed.py @@ -13,10 +13,14 @@ class CallLLMInputs(TypedDict, total=False): model_args: Annotated[str, StepTypeConfig(is_config=True)] client_args: Annotated[str, StepTypeConfig(is_config=True)] openai_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"] + ), ] anthropic_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]) + str, + StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]), ] patched_api_key: Annotated[ str, @@ -33,10 +37,16 @@ class CallLLMInputs(TypedDict, total=False): ), ] google_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"] + ), ] client_is_gcp: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"] + ), ] file: Annotated[str, StepTypeConfig(is_path=True)] diff --git a/patchwork/steps/DatabaseAgent/DatabaseAgent.py b/patchwork/steps/DatabaseAgent/DatabaseAgent.py new file mode 100644 index 000000000..ba2787235 --- /dev/null +++ b/patchwork/steps/DatabaseAgent/DatabaseAgent.py @@ -0,0 +1,86 @@ +from patchwork.common.client.llm.aio import AioLlmClient +from patchwork.common.multiturn_strategy.agentic_strategy_v2 import ( + AgentConfig, + AgenticStrategyV2, +) +from patchwork.common.tools.db_query_tool import DatabaseQueryTool +from patchwork.common.utils.utils import mustache_render +from patchwork.step import Step +from patchwork.steps.DatabaseAgent.typed import ( + DatabaseAgentInputs, + DatabaseAgentOutputs, +) + + +class DatabaseAgent(Step, input_class=DatabaseAgentInputs, output_class=DatabaseAgentOutputs): + def __init__(self, inputs): + super().__init__(inputs) + data = inputs.get("prompt_value", {}) + task = mustache_render(inputs["task"], data) + db_dialect = inputs["db_dialect"] + self.agentic_strategy = AgenticStrategyV2( + model="gemini-2.0-flash", + llm_client=AioLlmClient.create_aio_client(inputs), + template_data=dict(), + system_prompt_template=f"""\ +Please summarise the conversation given and provide the result in the structure that is asked of you. +""", + user_prompt_template=f"""\ +Please take note of any requirements to the data required to fetch. + +{task} +""", + agent_configs=[ + AgentConfig( + model="gemini-2.0-flash", + name="Assistant", + tool_set=dict(db_tool=DatabaseQueryTool(inputs)), + system_prompt=f"""\ +You are a {db_dialect} database query execution assistant. Assist me in completing a task. +Before you begin you should first try to know all tables currently available. +Then find out what data is held in the relevant tables. +""", + ) + ], + example_json=inputs.get("example_json"), + ) + + def run(self) -> dict: + result = self.agentic_strategy.execute(limit=10) + return {**result, **self.agentic_strategy.usage()} + + +# class DatabaseAgent(Step, input_class=DatabaseAgentInputs, output_class=DatabaseAgentOutputs): +# def __init__(self, inputs): +# super().__init__(inputs) +# +# llm_client = AioLlmClient.create_aio_client(inputs) +# +# data = inputs.get("prompt_value", {}) +# self.task = mustache_render(inputs["task"], data) +# +# db_dialect = inputs["db_dialect"] +# self.planner = PlanningStrategy( +# llm_client, +# planner_system_prompt=f"""\ +# You are a {db_dialect} database query planning assistant. You are tasked to plan the steps to assist with the provided task. +# You will not execute the steps in the plan. The user will do that instead. +# The first step of the plan should be as follows: +# 1. Tell me all tables currently available. +# +# After the list of table names is provided, get the DDL of the tables that is relevant. +# +# Your steps should be clear and concise like the following example: +# 1. Tell me the column descriptions of the table `orders`. +# 2. Execute the SQL Query: `SELECT * FROM orders` +# +# After every step, you will be asked to edit the plan so feel free to plan 1 step at a time. +# """, +# executor_system_prompt=f"""\ +# You are a {db_dialect} database query execution assistant. You will be provided instructions on what to do. +# """, +# ) +# +# def run(self) -> dict: +# planner_response = self.planner.run(self.task, 10) +# return {**planner_response, **self.planner.usage()} diff --git a/patchwork/steps/DatabaseAgent/__init__.py b/patchwork/steps/DatabaseAgent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/patchwork/steps/DatabaseAgent/typed.py b/patchwork/steps/DatabaseAgent/typed.py new file mode 100644 index 000000000..4f997ca83 --- /dev/null +++ b/patchwork/steps/DatabaseAgent/typed.py @@ -0,0 +1,36 @@ +from typing_extensions import Annotated, Any, Dict, TypedDict + +from patchwork.common.utils.step_typing import StepTypeConfig + + +class __DatabaseAgentOutputsRequiredInputs(TypedDict): + task: str + db_dialect: str + + +class DatabaseAgentInputs(__DatabaseAgentOutputsRequiredInputs, total=False): + db_driver: str + db_username: str + db_password: str + db_host: str + db_port: int + db_name: str + db_params: dict[str, Any] + db_driver_args: dict[str, Any] + prompt_value: Dict[str, Any] + max_llm_calls: Annotated[int, StepTypeConfig(is_config=True)] + openai_api_key: Annotated[ + str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "anthropic_api_key"]) + ] + anthropic_api_key: Annotated[ + str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "openai_api_key"]) + ] + google_api_key: Annotated[ + str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key"]) + ] + example_json: str + + +class DatabaseAgentOutputs(TypedDict): + request_tokens: int + response_tokens: int diff --git a/patchwork/steps/FixIssue/typed.py b/patchwork/steps/FixIssue/typed.py index 6824d4e6c..fd38f8d4c 100644 --- a/patchwork/steps/FixIssue/typed.py +++ b/patchwork/steps/FixIssue/typed.py @@ -11,10 +11,14 @@ class __FixIssueRequiredInputs(TypedDict): class FixIssueInputs(__FixIssueRequiredInputs, total=False): base_path: Annotated[str, StepTypeConfig(is_path=True)] openai_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"] + ), ] anthropic_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]) + str, + StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]), ] patched_api_key: Annotated[ str, @@ -31,10 +35,16 @@ class FixIssueInputs(__FixIssueRequiredInputs, total=False): ), ] google_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"] + ), ] client_is_gcp: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"] + ), ] diff --git a/patchwork/steps/GitHubAgent/GitHubAgent.py b/patchwork/steps/GitHubAgent/GitHubAgent.py index 2a1fcd15d..4fe3125ce 100644 --- a/patchwork/steps/GitHubAgent/GitHubAgent.py +++ b/patchwork/steps/GitHubAgent/GitHubAgent.py @@ -18,7 +18,7 @@ def __init__(self, inputs): data = inputs.get("prompt_value", {}) task = mustache_render(inputs["task"], data) self.agentic_strategy = AgenticStrategyV2( - model="claude-3-7-sonnet-latest", + model="claude-3-5-sonnet-latest", llm_client=AioLlmClient.create_aio_client(inputs), template_data=dict(), system_prompt_template=f"""\ @@ -33,6 +33,7 @@ def __init__(self, inputs): agent_configs=[ AgentConfig( name="Assistant", + model="gemini-2.0-flash", tool_set=dict(github_tool=GitHubTool(base_path, inputs["github_api_token"])), system_prompt="""\ You are a senior software developer helping the program manager to obtain some data from GitHub. diff --git a/patchwork/steps/GitHubAgent/typed.py b/patchwork/steps/GitHubAgent/typed.py index 9eab5246d..b54b132de 100644 --- a/patchwork/steps/GitHubAgent/typed.py +++ b/patchwork/steps/GitHubAgent/typed.py @@ -12,16 +12,26 @@ class GitHubAgentInputs(__GitHubAgentRequiredInputs, total=False): prompt_value: Dict[str, Any] max_llm_calls: Annotated[int, StepTypeConfig(is_config=True)] openai_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"] + ), ] anthropic_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]) + str, + StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]), ] google_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "client_is_gcp", "anthropic_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "client_is_gcp", "anthropic_api_key"] + ), ] client_is_gcp: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"] + ), ] diff --git a/patchwork/steps/LLM/typed.py b/patchwork/steps/LLM/typed.py index 57e382887..489a7f52f 100644 --- a/patchwork/steps/LLM/typed.py +++ b/patchwork/steps/LLM/typed.py @@ -21,10 +21,14 @@ class LLMInputs(__LLMInputsRequired, total=False): model_args: Annotated[str, StepTypeConfig(is_config=True)] client_args: Annotated[str, StepTypeConfig(is_config=True)] openai_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"] + ), ] anthropic_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]) + str, + StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]), ] patched_api_key: Annotated[ str, @@ -41,10 +45,16 @@ class LLMInputs(__LLMInputsRequired, total=False): ), ] google_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"] + ), ] client_is_gcp: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"] + ), ] file: Annotated[str, StepTypeConfig(is_path=True)] # ExtractModelResponseInputs diff --git a/patchwork/steps/ManageEngineAgent/__init__.py b/patchwork/steps/ManageEngineAgent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/patchwork/steps/SendEmail/SendEmail.py b/patchwork/steps/SendEmail/SendEmail.py index 6dc279346..57a7b7127 100644 --- a/patchwork/steps/SendEmail/SendEmail.py +++ b/patchwork/steps/SendEmail/SendEmail.py @@ -34,11 +34,13 @@ def __handle_eml_file(self, eml_file: str): original_email_data = ReadEmail(dict(eml_file_path=eml_file)).run() timestamp: datetime = original_email_data.get("datetime") - date_str = timestamp.date().strftime('%-d %b %Y') - time_str = timestamp.time().strftime('%H:%M') + date_str = timestamp.date().strftime("%-d %b %Y") + time_str = timestamp.time().strftime("%H:%M") from_ = original_email_data.get("from") self.subject = original_email_data.get("subject") - self.body += f"\n\nOn {date_str} at {time_str}, {from_} wrote:\n\n" + textwrap.indent(original_email_data.get("body"), "> ") + self.body += f"\n\nOn {date_str} at {time_str}, {from_} wrote:\n\n" + textwrap.indent( + original_email_data.get("body"), "> " + ) self.reply_message_id = original_email_data.get("message_id") def run(self) -> dict: diff --git a/patchwork/steps/SendEmail/typed.py b/patchwork/steps/SendEmail/typed.py index ccbf67a2b..a21ba7389 100644 --- a/patchwork/steps/SendEmail/typed.py +++ b/patchwork/steps/SendEmail/typed.py @@ -1,4 +1,4 @@ -from typing_extensions import Any, TypedDict, Annotated +from typing_extensions import Annotated, Any, TypedDict from patchwork.common.utils.step_typing import StepTypeConfig diff --git a/patchwork/steps/SimplifiedLLM/typed.py b/patchwork/steps/SimplifiedLLM/typed.py index ee313372b..5cb8c312c 100644 --- a/patchwork/steps/SimplifiedLLM/typed.py +++ b/patchwork/steps/SimplifiedLLM/typed.py @@ -16,10 +16,14 @@ class SimplifiedLLMInputs(__SimplifiedLLMInputsRequired, total=False): max_llm_calls: Annotated[int, StepTypeConfig(is_config=True)] model: Annotated[str, StepTypeConfig(is_config=True)] openai_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"] + ), ] anthropic_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]) + str, + StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]), ] patched_api_key: Annotated[ str, @@ -36,10 +40,16 @@ class SimplifiedLLMInputs(__SimplifiedLLMInputsRequired, total=False): ), ] google_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"] + ), ] client_is_gcp: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"] + ), ] json: Annotated[bool, StepTypeConfig(is_config=True)] json_example_schema: Annotated[str, StepTypeConfig(is_config=True)] diff --git a/patchwork/steps/SimplifiedLLMOnce/typed.py b/patchwork/steps/SimplifiedLLMOnce/typed.py index 4e71d2c05..f2695fed5 100644 --- a/patchwork/steps/SimplifiedLLMOnce/typed.py +++ b/patchwork/steps/SimplifiedLLMOnce/typed.py @@ -16,10 +16,14 @@ class SimplifiedLLMOnceInputs(__SimplifiedLLMOncePBInputsRequired, total=False): # CallLLMInputs model: Annotated[str, StepTypeConfig(is_config=True)] openai_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "anthropic_api_key"] + ), ] anthropic_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]) + str, + StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "client_is_gcp", "openai_api_key"]), ] patched_api_key: Annotated[ str, @@ -36,10 +40,16 @@ class SimplifiedLLMOnceInputs(__SimplifiedLLMOncePBInputsRequired, total=False): ), ] google_api_key: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "client_is_gcp"] + ), ] client_is_gcp: Annotated[ - str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"]) + str, + StepTypeConfig( + is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key", "google_api_key"] + ), ] file: Annotated[str, StepTypeConfig(is_path=True)] diff --git a/poetry.lock b/poetry.lock index e5f723083..557915e18 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2156,7 +2156,7 @@ description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=3.7" groups = ["main"] -markers = "(platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") and python_version < \"3.14\" or extra == \"browser-use\" or extra == \"all\"" +markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") or extra == \"browser-use\" or extra == \"all\"" files = [ {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"}, {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83"}, @@ -3822,7 +3822,7 @@ description = "Fast, correct Python JSON library supporting dataclasses, datetim optional = true python-versions = ">=3.8" groups = ["main"] -markers = "(extra == \"browser-use\" or extra == \"all\") and platform_python_implementation != \"PyPy\" or extra == \"security\" or extra == \"all\"" +markers = "(extra == \"browser-use\" or extra == \"all\" or extra == \"security\") and platform_python_implementation != \"PyPy\" or (extra == \"browser-use\" or extra == \"all\" or extra == \"security\") and (extra == \"security\" or extra == \"all\")" files = [ {file = "orjson-3.10.15-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:552c883d03ad185f720d0c09583ebde257e41b9521b74ff40e08b7dec4559c04"}, {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:616e3e8d438d02e4854f70bfdc03a6bcdb697358dbaa6bcd19cbe24d24ece1f8"}, @@ -5838,7 +5838,7 @@ description = "C version of reader, parser and emitter for ruamel.yaml derived f optional = true python-versions = ">=3.9" groups = ["main"] -markers = "platform_python_implementation == \"CPython\" and (extra == \"security\" or extra == \"all\") and python_version <= \"3.12\"" +markers = "platform_python_implementation == \"CPython\" and python_version <= \"3.12\" and (extra == \"security\" or extra == \"all\")" files = [ {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:11f891336688faf5156a36293a9c362bdc7c88f03a8a027c2c1d8e0bcde998e5"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a606ef75a60ecf3d924613892cc603b154178ee25abb3055db5062da811fd969"}, diff --git a/pyproject.toml b/pyproject.toml index 400a9a451..df1fd225e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "patchwork-cli" -version = "0.0.118" +version = "0.0.119" description = "" authors = ["patched.codes"] license = "AGPL" From df137134c86e99162f25eff421a143135861b427 Mon Sep 17 00:00:00 2001 From: TIANYOU CHEN <42710806+CTY-git@users.noreply.github.com> Date: Mon, 24 Mar 2025 14:16:56 +0800 Subject: [PATCH 2/4] qol stuff --- patchwork/common/multiturn_strategy/planning_strategy.py | 7 ++++--- patchwork/common/tools/db_query_tool.py | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/patchwork/common/multiturn_strategy/planning_strategy.py b/patchwork/common/multiturn_strategy/planning_strategy.py index 2fafc0a6e..a590c14f5 100644 --- a/patchwork/common/multiturn_strategy/planning_strategy.py +++ b/patchwork/common/multiturn_strategy/planning_strategy.py @@ -137,13 +137,15 @@ def usage(self): } def __agent_run(self, agent: Agent, prompt: str, **kwargs) -> AgentRunResult[Any]: - planner_response = agent.run_sync(prompt, **kwargs) + loop = asyncio.new_event_loop() + planner_response = loop.run_until_complete(agent.run(prompt, **kwargs)) + loop.close() self.__request_tokens += planner_response.usage().request_tokens self.__response_tokens += planner_response.usage().response_tokens + return planner_response def run(self, task: str, conversation_limit: int = 10) -> dict: - loop = asyncio.new_event_loop() planner_response = self.__agent_run(self.planner, f"Produce the initial plan for {task}") planner_history = planner_response.all_messages() @@ -199,5 +201,4 @@ def run(self, task: str, conversation_limit: int = 10) -> dict: message_history=planner_history, ) - loop.close() return final_result.data.dict() diff --git a/patchwork/common/tools/db_query_tool.py b/patchwork/common/tools/db_query_tool.py index 67163f952..3f1abc406 100644 --- a/patchwork/common/tools/db_query_tool.py +++ b/patchwork/common/tools/db_query_tool.py @@ -8,13 +8,14 @@ class DatabaseQueryTool(Tool, tool_name="db_query_tool"): def __init__(self, inputs: dict[str, Any]): super().__init__() self.db_settings = inputs.copy() + self.db_dialect = inputs.get("db_dialect", "SQL") @property def json_schema(self) -> dict: return { "name": "db_query_tool", - "description": """\ -Run SQL Query on current database. + "description": f"""\ +Run SQL Query on current {self.db_dialect} database. """, "input_schema": { "type": "object", From df7129c581770b0b00aed0f63652496150c12e2a Mon Sep 17 00:00:00 2001 From: TIANYOU CHEN <42710806+CTY-git@users.noreply.github.com> Date: Mon, 24 Mar 2025 15:28:10 +0800 Subject: [PATCH 3/4] add planner db agent to planner docstring --- .../multiturn_strategy/planning_strategy.py | 36 +++++++++++++++++++ .../steps/DatabaseAgent/DatabaseAgent.py | 36 ------------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/patchwork/common/multiturn_strategy/planning_strategy.py b/patchwork/common/multiturn_strategy/planning_strategy.py index a590c14f5..e15c5e8ce 100644 --- a/patchwork/common/multiturn_strategy/planning_strategy.py +++ b/patchwork/common/multiturn_strategy/planning_strategy.py @@ -86,6 +86,42 @@ def __init__( executor_tool_set: dict[str, Tool], example_json: Union[str, dict[str, Any]] = '{"output":"output text"}', ): + """ + Use this like this:: + + class DatabaseAgent(Step, input_class=DatabaseAgentInputs, output_class=DatabaseAgentOutputs): + def __init__(self, inputs): + super().__init__(inputs) + + llm_client = AioLlmClient.create_aio_client(inputs) + + data = inputs.get("prompt_value", {}) + self.task = mustache_render(inputs["task"], data) + + db_dialect = inputs["db_dialect"] + self.planner = PlanningStrategy( + llm_client, + planner_system_prompt=f'''\\ + You are a {db_dialect} database query planning assistant. You are tasked to plan the steps to assist with the provided task. + You will not execute the steps in the plan. The user will do that instead. + The first step of the plan should be as follows: + 1. Tell me all tables currently available. + After the list of table names is provided, get the DDL of the tables that is relevant. + Your steps should be clear and concise like the following example: + 1. Tell me the column descriptions of the table `orders`. + 2. Execute the SQL Query: `SELECT * FROM orders` + After every step, you will be asked to edit the plan so feel free to plan 1 step at a time. + ''', + executor_system_prompt=f'''\\ + You are a {db_dialect} database query execution assistant. You will be provided instructions on what to do. + ''', + ) + + def run(self) -> dict: + planner_response = self.planner.run(self.task, 10) + return {**planner_response, **self.planner.usage()} + + """ self.planner = Agent( llm_client, name="Planner", diff --git a/patchwork/steps/DatabaseAgent/DatabaseAgent.py b/patchwork/steps/DatabaseAgent/DatabaseAgent.py index ba2787235..42a8a45ed 100644 --- a/patchwork/steps/DatabaseAgent/DatabaseAgent.py +++ b/patchwork/steps/DatabaseAgent/DatabaseAgent.py @@ -48,39 +48,3 @@ def __init__(self, inputs): def run(self) -> dict: result = self.agentic_strategy.execute(limit=10) return {**result, **self.agentic_strategy.usage()} - - -# class DatabaseAgent(Step, input_class=DatabaseAgentInputs, output_class=DatabaseAgentOutputs): -# def __init__(self, inputs): -# super().__init__(inputs) -# -# llm_client = AioLlmClient.create_aio_client(inputs) -# -# data = inputs.get("prompt_value", {}) -# self.task = mustache_render(inputs["task"], data) -# -# db_dialect = inputs["db_dialect"] -# self.planner = PlanningStrategy( -# llm_client, -# planner_system_prompt=f"""\ -# You are a {db_dialect} database query planning assistant. You are tasked to plan the steps to assist with the provided task. -# You will not execute the steps in the plan. The user will do that instead. -# The first step of the plan should be as follows: -# 1. Tell me all tables currently available. -# -# After the list of table names is provided, get the DDL of the tables that is relevant. -# -# Your steps should be clear and concise like the following example: -# 1. Tell me the column descriptions of the table `orders`. -# 2. Execute the SQL Query: `SELECT * FROM orders` -# -# After every step, you will be asked to edit the plan so feel free to plan 1 step at a time. -# """, -# executor_system_prompt=f"""\ -# You are a {db_dialect} database query execution assistant. You will be provided instructions on what to do. -# """, -# ) -# -# def run(self) -> dict: -# planner_response = self.planner.run(self.task, 10) -# return {**planner_response, **self.planner.usage()} From d917a75fde58f6f6643a09d3e5c6b4ac79212cab Mon Sep 17 00:00:00 2001 From: "patched.codes[bot]" <298395+patched.codes[bot]@users.noreply.github.com> Date: Mon, 24 Mar 2025 07:35:46 +0000 Subject: [PATCH 4/4] Patched patchwork/steps/FileAgent/README.md --- patchwork/steps/FileAgent/README.md | 56 +++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 patchwork/steps/FileAgent/README.md diff --git a/patchwork/steps/FileAgent/README.md b/patchwork/steps/FileAgent/README.md new file mode 100644 index 000000000..f5afb99b0 --- /dev/null +++ b/patchwork/steps/FileAgent/README.md @@ -0,0 +1,56 @@ +# Documentation for FileAgent Module + +## Overview + +The `FileAgent` module is designed to interact with various files, particularly those in tabular formats like CSV, XLS, or XLSX, using a strategy driven by an AI agent. The agent is capable of converting files to CSV format if needed and performs tasks based on textual prompts. It is structured around several classes and utilities to handle input processing, AI interaction, and file management. + +--- + +## Components + +### 1. `typed.py` + +This file defines typed dictionaries that specify the structure of inputs and outputs for the `FileAgent`: + +- **Inputs**: + - `task`: (str) The task that needs to be performed by the agent. + - `base_path`: (str, optional) The base path for file operations. + - `prompt_value`: (Dict[str, Any]) Dictionary used to format the task. + - `max_llm_calls`: (int) The maximum number of API calls to the language model. + - `anthropic_api_key`: (str) API key for using Anthropic's services. + +- **Outputs**: + - `request_tokens`: (int) Number of tokens in the request. + - `response_tokens`: (int) Number of tokens in the response. + +### 2. `FileAgent.py` + +The core class `FileAgent` inherits from `Step` and utilizes the defined input and output structures to perform tasks through the `AgenticStrategyV2`. It utilizes various tools to conduct file operations and leverages an AI model to fulfill the requested tasks: + +- **Inputs**: + - Inputs to the class include configurations like model selection, API clients, system prompts, etc. + +- **Outputs**: + - A dictionary containing results from executing the strategy and usage statistics. + +- **Key Features**: + - Converts various tabular file formats to CSV using the `in2csv_tool`. + - Allows executing additional tools via agent configuration. + - Runs on temporary directories for file operations. + +### 3. `__init__.py` + +An empty module initializer file, indicating the package structure. + +--- + +## How to Use the Module + +1. **Instantiate `FileAgent`**: + Create an instance of `FileAgent` with required inputs that specify the task, file paths, and necessary configurations. + +2. **Run the Agent**: + - Call the `run()` method to execute the agent's strategy. It will process the files as per the provided task, using the defined tools and configurations. + - Retrieve the results along with token usage statistics. + +This module is likely to be integrated as a part of a larger system that requires automated file processing and interaction with language models for task execution.