Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ jobs:

- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --only main --extras security
run: poetry install --no-interaction --all-extras

- name: AutoFix Patchwork
run: |
Expand Down Expand Up @@ -151,7 +151,7 @@ jobs:

- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --only main
run: poetry install --no-interaction --all-extras

- name: PR Review
run: |
Expand Down
20 changes: 10 additions & 10 deletions patchwork/common/multiturn_strategy/agentic_strategy_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ def model_post_init(self, __context: Any) -> None:

class AgenticStrategyV2:
def __init__(
self,
model: str,
llm_client: LlmClient,
template_data: dict[str, str],
system_prompt_template: str,
user_prompt_template: str,
agent_configs: list[AgentConfig],
example_json: Union[str, dict[str, Any]] = '{"output":"output text"}',
limit: Optional[int] = None,
self,
model: str,
llm_client: LlmClient,
template_data: dict[str, str],
system_prompt_template: str,
user_prompt_template: str,
agent_configs: list[AgentConfig],
example_json: Union[str, dict[str, Any]] = '{"output":"output text"}',
limit: Optional[int] = None,
):
self.__limit = limit
self.__template_data = template_data
Expand Down Expand Up @@ -153,7 +153,7 @@ def execute(self, limit: Optional[int] = None) -> dict:
self.__summariser.run(
"Please give me the result from the following summary of what the assistants have done."
+ agent_summary_list,
)
)
)
self.__request_tokens += final_result.usage().request_tokens or 0
self.__response_tokens += final_result.usage().response_tokens or 0
Expand Down
2 changes: 1 addition & 1 deletion patchwork/common/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from patchwork.common.tools.api_tool import APIRequestTool
from patchwork.common.tools.bash_tool import BashTool
from patchwork.common.tools.code_edit_tools import CodeEditTool, FileViewTool
from patchwork.common.tools.grep_tool import FindTextTool, FindTool
from patchwork.common.tools.api_tool import APIRequestTool
from patchwork.common.tools.tool import Tool

__all__ = [
Expand Down
4 changes: 1 addition & 3 deletions patchwork/common/tools/api_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,7 @@ def execute(
status_code = response.status_code
headers = response.headers

header_string = "\n".join(
f"{key}: {value}" for key, value in headers.items()
)
header_string = "\n".join(f"{key}: {value}" for key, value in headers.items())

return (
f"HTTP/{response.raw.version / 10:.1f} {status_code} {response.reason}\n"
Expand Down
21 changes: 13 additions & 8 deletions patchwork/common/tools/code_edit_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ def json_schema(self) -> dict:
}

def __get_abs_path(self, path: str):
wanted_path = Path(path).resolve()
wanted_path = Path(path)
if not Path(path).is_absolute():
wanted_path = self.repo_path / path
if wanted_path.is_relative_to(self.repo_path):
return wanted_path
else:
Expand All @@ -57,13 +59,16 @@ def execute(self, path: str, view_range: Optional[list[int]] = None) -> str:
return f"Error: Path {abs_path} does not exist"

if abs_path.is_file():
with open(abs_path, "r") as f:
content = f.read()

if view_range:
lines = content.splitlines()
start, end = view_range
content = "\n".join(lines[start - 1 : end])
try:
with open(abs_path, "r") as f:
content = f.read()

if view_range:
lines = content.splitlines()
start, end = view_range
content = "\n".join(lines[start - 1 : end])
except Exception as e:
content = "Error: " + str(e)

if len(content) > self.__VIEW_LIMIT:
content = content[: self.__VIEW_LIMIT] + self.__TRUNCATION_TOKEN
Expand Down
142 changes: 142 additions & 0 deletions patchwork/common/tools/csvkit_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from __future__ import annotations

import sqlite3
import subprocess
from pathlib import Path

import pandas
from sqlalchemy import URL
from typing_extensions import Optional

from patchwork.common.tools.tool import Tool


class In2CSVTool(Tool, tool_name="in2csv_tool", auto_register=False):
def __init__(self, path: str):
super().__init__()
self.path = path

@property
def json_schema(self) -> dict:
return {
"name": "in2csv_tool",
"description": """\
Convert common tabular data formats to CSV.

optional arguments:
--reset-dimensions Ignore the sheet dimensions provided by the XLSX file.
--encoding-xls ENCODING_XLS
Specify the encoding of the input XLS file.
-y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT
Limit CSV dialect sniffing to the specified number of
bytes. Specify "0" to disable sniffing entirely, or
"-1" to sniff the entire file.
-I, --no-inference Disable type inference (and --locale, --date-format,
--datetime-format, --no-leading-zeroes) when parsing
CSV input.
""",
"input_schema": {
"type": "object",
"properties": {
"files": {
"type": "array",
"items": {"type": "string"},
"description": "The CSV file(s) to operate on",
},
"args": {
"type": "array",
"items": {"type": "string"},
"description": "The args to run with",
},
},
"required": ["files"],
},
}

def execute(self, files: list[str], args: Optional[list[str]] = None) -> str:
args = args or []

original_csvs = set()
for p in Path(self.path).iterdir():
if p.suffix == ".csv":
original_csvs.add(p.name)

p = subprocess.run(
["in2csv", *files, *args, "--write-sheets", "-", "--use-sheet-names"],
capture_output=True,
text=True,
cwd=self.path,
)
if p.returncode != 0:
return "ERROR:\n" + p.stderr

rv = "Files converted to CSV:"
for p in Path(self.path).iterdir():
if p.suffix == ".csv" and p.name not in original_csvs:
rv += f"\n* {p}"

return rv


class CSVSQLTool(Tool, tool_name="csvsql_tool", auto_register=False):
def __init__(self, path: str, tmp_path: str):
super().__init__()
self.path = path
self.tmp_path = tmp_path

@property
def json_schema(self) -> dict:
return {
"name": "csvsql_tool",
"description": """\
Execute SQL query directly on csv files. The name of the csv files can be referenced as table in the SQL query

If the output is larger than 5000 characters, the remaining characters are replaced with <TRUNCATED>.
""",
"input_schema": {
"type": "object",
"properties": {
"files": {
"type": "array",
"items": {"type": "string"},
"description": "The CSV file(s) to operate on",
},
"query": {
"type": "string",
"description": "SQL query to execute",
},
},
"required": ["files", "query"],
},
}

def execute(self, files: list[str], query: str) -> str:
db_path = (Path(self.tmp_path) / "tmp.db").resolve()
db_url = URL.create(drivername="sqlite", host="/" + str(db_path)).render_as_string()

files_to_insert = []
if db_path.is_file():
with sqlite3.connect(str(db_path)) as conn:
for file in files:
res = conn.execute(
f"SELECT 1 from {file.removesuffix('.csv')}",
)
if res.fetchone() is None:
files_to_insert.append(file)
else:
files_to_insert = files

if len(files_to_insert) > 0:
p = subprocess.run(
["csvsql", *files_to_insert, "--db", db_url, "--insert"], capture_output=True, text=True, cwd=self.path
)
if p.returncode != 0:
return "ERROR:\n" + p.stderr

with sqlite3.connect(str(db_path)) as conn:
pandas_df = pandas.read_sql_query(query, conn)
rv = pandas_df.to_csv()

if len(rv) > 5000:
return rv[:5000] + "<TRUNCATED>"
return rv
29 changes: 16 additions & 13 deletions patchwork/common/tools/grep_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,10 @@ def json_schema(self) -> dict:
}

def execute(
self,
pattern: Optional[str] = None,
path: Optional[Path] = None,
is_case_sensitive: bool = False,
self,
pattern: Optional[str] = None,
path: Optional[Path] = None,
is_case_sensitive: bool = False,
) -> str:
if pattern is None:
raise ValueError("pattern argument is required!")
Expand All @@ -183,18 +183,22 @@ def execute(
paths = [p for p in path.iterdir() if p.is_file()]

from collections import defaultdict

file_matches = defaultdict(list)
for path in paths:
with path.open("r") as f:
for i, line in enumerate(f.readlines()):
if not matcher(line, pattern):
continue
try:
with path.open("r") as f:
for i, line in enumerate(f.readlines()):
if not matcher(line, pattern):
continue

content = f"Line {i + 1}: {line}"
if len(line) > self.__CHAR_LIMIT:
content = f"Line {i + 1}: {self.__CHAR_LIMIT_TEXT}"
content = f"Line {i + 1}: {line}"
if len(line) > self.__CHAR_LIMIT:
content = f"Line {i + 1}: {self.__CHAR_LIMIT_TEXT}"

file_matches[str(path)].append(content)
file_matches[str(path)].append(content)
except Exception as e:
pass

total_file_matches = ""
for path_str, matches in file_matches.items():
Expand All @@ -207,4 +211,3 @@ def execute(
for path_str, matches in file_matches.items():
total_file_matches += f"\n {len(matches)} Pattern matches found in '{path}': <TRUNCATED>\n"
return total_file_matches

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import unittest
from pathlib import Path

from patchwork.steps.ExtractPackageManagerFile.ExtractPackageManagerFile import ExtractPackageManagerFile
from patchwork.steps.ExtractPackageManagerFile.ExtractPackageManagerFile import (
ExtractPackageManagerFile,
)


class TestExtractPackageManagerFile(unittest.TestCase):
Expand Down
63 changes: 63 additions & 0 deletions patchwork/steps/FileAgent/FileAgent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import tempfile
from pathlib import Path

from patchwork.common.client.llm.aio import AioLlmClient
from patchwork.common.multiturn_strategy.agentic_strategy_v2 import (
AgentConfig,
AgenticStrategyV2,
)
from patchwork.common.tools import FileViewTool, FindTextTool
from patchwork.common.tools.csvkit_tool import CSVSQLTool, In2CSVTool
from patchwork.common.utils.utils import mustache_render
from patchwork.step import Step
from patchwork.steps.FileAgent.typed import FileAgentInputs, FileAgentOutputs


class FileAgent(Step, input_class=FileAgentInputs, output_class=FileAgentOutputs):
def __init__(self, inputs):
super().__init__(inputs)
self.base_path = inputs.get("base_path", str(Path.cwd()))
data = inputs.get("prompt_value", {})
task = mustache_render(inputs["task"], data)

self.strat_kwargs = dict(
model="claude-3-5-sonnet-latest",
llm_client=AioLlmClient.create_aio_client(inputs),
template_data=dict(),
system_prompt_template=f"""\
Please summarise the conversation given and provide the result in the structure that is asked of you.
""",
user_prompt_template=f"""\
Please help me with this task:

{task}
""",
agent_configs=[
AgentConfig(
name="Assistant",
model="claude-3-7-sonnet-latest",
tool_set=dict(),
system_prompt="""\
You are a assistant that is supposed to help me with a set of files. These files are commonly tabular formatted like csv, xls or xlsx.
If you find a tabular formatted file you should use the `in2csv_tool` tool to convert the files into CSV.

After that is done, then run other tools to assist me.
""",
)
],
example_json=inputs.get("example_json"),
)

def run(self) -> dict:
kwargs = self.strat_kwargs
with tempfile.TemporaryDirectory() as tmpdir:
agent_config = next(iter(kwargs.get("agent_configs", [])), None)
agent_config.tool_set = dict(
find_text=FindTextTool(self.base_path),
file_view=FileViewTool(self.base_path),
in2csv_tool=In2CSVTool(self.base_path),
csvsql_tool=CSVSQLTool(self.base_path, tmpdir),
)
agentic_strategy = AgenticStrategyV2(**kwargs)
result = agentic_strategy.execute(limit=10)
return {**result, **agentic_strategy.usage()}
Empty file.
Loading