patched-codes · patched-admin · Mar 10, 2025 · Mar 10, 2025 · Mar 10, 2025 · Mar 10, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -96,7 +96,7 @@ jobs:
 
       - name: Install dependencies
         if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
-        run: poetry install --no-interaction --only main --extras security
+        run: poetry install --no-interaction --all-extras
 
       - name: AutoFix Patchwork
         run: |
@@ -151,7 +151,7 @@ jobs:
 
       - name: Install dependencies
         if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
-        run: poetry install --no-interaction --only main
+        run: poetry install --no-interaction --all-extras
 
       - name: PR Review
         run: |

diff --git a/patchwork/common/multiturn_strategy/agentic_strategy_v2.py b/patchwork/common/multiturn_strategy/agentic_strategy_v2.py
@@ -42,15 +42,15 @@ def model_post_init(self, __context: Any) -> None:
 
 class AgenticStrategyV2:
     def __init__(
-            self,
-            model: str,
-            llm_client: LlmClient,
-            template_data: dict[str, str],
-            system_prompt_template: str,
-            user_prompt_template: str,
-            agent_configs: list[AgentConfig],
-            example_json: Union[str, dict[str, Any]] = '{"output":"output text"}',
-            limit: Optional[int] = None,
+        self,
+        model: str,
+        llm_client: LlmClient,
+        template_data: dict[str, str],
+        system_prompt_template: str,
+        user_prompt_template: str,
+        agent_configs: list[AgentConfig],
+        example_json: Union[str, dict[str, Any]] = '{"output":"output text"}',
+        limit: Optional[int] = None,
     ):
         self.__limit = limit
         self.__template_data = template_data
@@ -153,7 +153,7 @@ def execute(self, limit: Optional[int] = None) -> dict:
                 self.__summariser.run(
                     "Please give me the result from the following summary of what the assistants have done."
                     + agent_summary_list,
-                    )
+                )
             )
         self.__request_tokens += final_result.usage().request_tokens or 0
         self.__response_tokens += final_result.usage().response_tokens or 0

diff --git a/patchwork/common/tools/__init__.py b/patchwork/common/tools/__init__.py
@@ -1,7 +1,7 @@
+from patchwork.common.tools.api_tool import APIRequestTool
 from patchwork.common.tools.bash_tool import BashTool
 from patchwork.common.tools.code_edit_tools import CodeEditTool, FileViewTool
 from patchwork.common.tools.grep_tool import FindTextTool, FindTool
-from patchwork.common.tools.api_tool import APIRequestTool
 from patchwork.common.tools.tool import Tool
 
 __all__ = [

diff --git a/patchwork/common/tools/api_tool.py b/patchwork/common/tools/api_tool.py
@@ -91,9 +91,7 @@ def execute(
             status_code = response.status_code
             headers = response.headers
 
-            header_string = "\n".join(
-                f"{key}: {value}" for key, value in headers.items()
-            )
+            header_string = "\n".join(f"{key}: {value}" for key, value in headers.items())
 
             return (
                 f"HTTP/{response.raw.version / 10:.1f} {status_code} {response.reason}\n"

diff --git a/patchwork/common/tools/code_edit_tools.py b/patchwork/common/tools/code_edit_tools.py
@@ -45,7 +45,9 @@ def json_schema(self) -> dict:
         }
 
     def __get_abs_path(self, path: str):
-        wanted_path = Path(path).resolve()
+        wanted_path = Path(path)
+        if not Path(path).is_absolute():
+            wanted_path = self.repo_path / path
         if wanted_path.is_relative_to(self.repo_path):
             return wanted_path
         else:
@@ -57,13 +59,16 @@ def execute(self, path: str, view_range: Optional[list[int]] = None) -> str:
             return f"Error: Path {abs_path} does not exist"
 
         if abs_path.is_file():
-            with open(abs_path, "r") as f:
-                content = f.read()
-
-            if view_range:
-                lines = content.splitlines()
-                start, end = view_range
-                content = "\n".join(lines[start - 1 : end])
+            try:
+                with open(abs_path, "r") as f:
+                    content = f.read()
+
+                if view_range:
+                    lines = content.splitlines()
+                    start, end = view_range
+                    content = "\n".join(lines[start - 1 : end])
+            except Exception as e:
+                content = "Error: " + str(e)
 
             if len(content) > self.__VIEW_LIMIT:
                 content = content[: self.__VIEW_LIMIT] + self.__TRUNCATION_TOKEN

diff --git a/patchwork/common/tools/csvkit_tool.py b/patchwork/common/tools/csvkit_tool.py
@@ -0,0 +1,142 @@
+from __future__ import annotations
+
+import sqlite3
+import subprocess
+from pathlib import Path
+
+import pandas
+from sqlalchemy import URL
+from typing_extensions import Optional
+
+from patchwork.common.tools.tool import Tool
+
+
+class In2CSVTool(Tool, tool_name="in2csv_tool", auto_register=False):
+    def __init__(self, path: str):
+        super().__init__()
+        self.path = path
+
+    @property
+    def json_schema(self) -> dict:
+        return {
+            "name": "in2csv_tool",
+            "description": """\
+Convert common tabular data formats to CSV.
+
+optional arguments:
+  --reset-dimensions    Ignore the sheet dimensions provided by the XLSX file.
+  --encoding-xls ENCODING_XLS
+                        Specify the encoding of the input XLS file.
+  -y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT
+                        Limit CSV dialect sniffing to the specified number of
+                        bytes. Specify "0" to disable sniffing entirely, or
+                        "-1" to sniff the entire file.
+  -I, --no-inference    Disable type inference (and --locale, --date-format,
+                        --datetime-format, --no-leading-zeroes) when parsing
+                        CSV input.
+""",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "files": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "The CSV file(s) to operate on",
+                    },
+                    "args": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "The args to run with",
+                    },
+                },
+                "required": ["files"],
+            },
+        }
+
+    def execute(self, files: list[str], args: Optional[list[str]] = None) -> str:
+        args = args or []
+
+        original_csvs = set()
+        for p in Path(self.path).iterdir():
+            if p.suffix == ".csv":
+                original_csvs.add(p.name)
+
+        p = subprocess.run(
+            ["in2csv", *files, *args, "--write-sheets", "-", "--use-sheet-names"],
+            capture_output=True,
+            text=True,
+            cwd=self.path,
+        )
+        if p.returncode != 0:
+            return "ERROR:\n" + p.stderr
+
+        rv = "Files converted to CSV:"
+        for p in Path(self.path).iterdir():
+            if p.suffix == ".csv" and p.name not in original_csvs:
+                rv += f"\n* {p}"
+
+        return rv
+
+
+class CSVSQLTool(Tool, tool_name="csvsql_tool", auto_register=False):
+    def __init__(self, path: str, tmp_path: str):
+        super().__init__()
+        self.path = path
+        self.tmp_path = tmp_path
+
+    @property
+    def json_schema(self) -> dict:
+        return {
+            "name": "csvsql_tool",
+            "description": """\
+Execute SQL query directly on csv files. The name of the csv files can be referenced as table in the SQL query
+
+If the output is larger than 5000 characters, the remaining characters are replaced with <TRUNCATED>.
+""",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "files": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "The CSV file(s) to operate on",
+                    },
+                    "query": {
+                        "type": "string",
+                        "description": "SQL query to execute",
+                    },
+                },
+                "required": ["files", "query"],
+            },
+        }
+
+    def execute(self, files: list[str], query: str) -> str:
+        db_path = (Path(self.tmp_path) / "tmp.db").resolve()
+        db_url = URL.create(drivername="sqlite", host="/" + str(db_path)).render_as_string()
+
+        files_to_insert = []
+        if db_path.is_file():
+            with sqlite3.connect(str(db_path)) as conn:
+                for file in files:
+                    res = conn.execute(
+                        f"SELECT 1 from {file.removesuffix('.csv')}",
+                    )
+                    if res.fetchone() is None:
+                        files_to_insert.append(file)
+        else:
+            files_to_insert = files
+
+        if len(files_to_insert) > 0:
+            p = subprocess.run(
+                ["csvsql", *files_to_insert, "--db", db_url, "--insert"], capture_output=True, text=True, cwd=self.path
+            )
+            if p.returncode != 0:
+                return "ERROR:\n" + p.stderr
+
+        with sqlite3.connect(str(db_path)) as conn:
+            pandas_df = pandas.read_sql_query(query, conn)
+            rv = pandas_df.to_csv()
+
+        if len(rv) > 5000:
+            return rv[:5000] + "<TRUNCATED>"
+        return rv
diff --git a/patchwork/common/tools/grep_tool.py b/patchwork/common/tools/grep_tool.py
@@ -158,10 +158,10 @@ def json_schema(self) -> dict:
         }
 
     def execute(
-            self,
-            pattern: Optional[str] = None,
-            path: Optional[Path] = None,
-            is_case_sensitive: bool = False,
+        self,
+        pattern: Optional[str] = None,
+        path: Optional[Path] = None,
+        is_case_sensitive: bool = False,
     ) -> str:
         if pattern is None:
             raise ValueError("pattern argument is required!")
@@ -183,18 +183,22 @@ def execute(
             paths = [p for p in path.iterdir() if p.is_file()]
 
         from collections import defaultdict
+
         file_matches = defaultdict(list)
         for path in paths:
-            with path.open("r") as f:
-                for i, line in enumerate(f.readlines()):
-                    if not matcher(line, pattern):
-                        continue
+            try:
+                with path.open("r") as f:
+                    for i, line in enumerate(f.readlines()):
+                        if not matcher(line, pattern):
+                            continue
 
-                    content = f"Line {i + 1}: {line}"
-                    if len(line) > self.__CHAR_LIMIT:
-                        content = f"Line {i + 1}: {self.__CHAR_LIMIT_TEXT}"
+                        content = f"Line {i + 1}: {line}"
+                        if len(line) > self.__CHAR_LIMIT:
+                            content = f"Line {i + 1}: {self.__CHAR_LIMIT_TEXT}"
 
-                    file_matches[str(path)].append(content)
+                        file_matches[str(path)].append(content)
+            except Exception as e:
+                pass
 
         total_file_matches = ""
         for path_str, matches in file_matches.items():
@@ -207,4 +211,3 @@ def execute(
         for path_str, matches in file_matches.items():
             total_file_matches += f"\n {len(matches)} Pattern matches found in '{path}': <TRUNCATED>\n"
         return total_file_matches
-
diff --git a/patchwork/steps/ExtractPackageManagerFile/TestExtractPackageManagerFile.py b/patchwork/steps/ExtractPackageManagerFile/TestExtractPackageManagerFile.py
@@ -2,7 +2,9 @@
 import unittest
 from pathlib import Path
 
-from patchwork.steps.ExtractPackageManagerFile.ExtractPackageManagerFile import ExtractPackageManagerFile
+from patchwork.steps.ExtractPackageManagerFile.ExtractPackageManagerFile import (
+    ExtractPackageManagerFile,
+)
 
 
 class TestExtractPackageManagerFile(unittest.TestCase):

diff --git a/patchwork/steps/FileAgent/FileAgent.py b/patchwork/steps/FileAgent/FileAgent.py
@@ -0,0 +1,63 @@
+import tempfile
+from pathlib import Path
+
+from patchwork.common.client.llm.aio import AioLlmClient
+from patchwork.common.multiturn_strategy.agentic_strategy_v2 import (
+    AgentConfig,
+    AgenticStrategyV2,
+)
+from patchwork.common.tools import FileViewTool, FindTextTool
+from patchwork.common.tools.csvkit_tool import CSVSQLTool, In2CSVTool
+from patchwork.common.utils.utils import mustache_render
+from patchwork.step import Step
+from patchwork.steps.FileAgent.typed import FileAgentInputs, FileAgentOutputs
+
+
+class FileAgent(Step, input_class=FileAgentInputs, output_class=FileAgentOutputs):
+    def __init__(self, inputs):
+        super().__init__(inputs)
+        self.base_path = inputs.get("base_path", str(Path.cwd()))
+        data = inputs.get("prompt_value", {})
+        task = mustache_render(inputs["task"], data)
+
+        self.strat_kwargs = dict(
+            model="claude-3-5-sonnet-latest",
+            llm_client=AioLlmClient.create_aio_client(inputs),
+            template_data=dict(),
+            system_prompt_template=f"""\
+Please summarise the conversation given and provide the result in the structure that is asked of you.
+""",
+            user_prompt_template=f"""\
+Please help me with this task:
+
+{task}
+""",
+            agent_configs=[
+                AgentConfig(
+                    name="Assistant",
+                    model="claude-3-7-sonnet-latest",
+                    tool_set=dict(),
+                    system_prompt="""\
+You are a assistant that is supposed to help me with a set of files. These files are commonly tabular formatted like csv, xls or xlsx.
+If you find a tabular formatted file you should use the `in2csv_tool` tool to convert the files into CSV.
+
+After that is done, then run other tools to assist me.
+""",
+                )
+            ],
+            example_json=inputs.get("example_json"),
+        )
+
+    def run(self) -> dict:
+        kwargs = self.strat_kwargs
+        with tempfile.TemporaryDirectory() as tmpdir:
+            agent_config = next(iter(kwargs.get("agent_configs", [])), None)
+            agent_config.tool_set = dict(
+                find_text=FindTextTool(self.base_path),
+                file_view=FileViewTool(self.base_path),
+                in2csv_tool=In2CSVTool(self.base_path),
+                csvsql_tool=CSVSQLTool(self.base_path, tmpdir),
+            )
+            agentic_strategy = AgenticStrategyV2(**kwargs)
+            result = agentic_strategy.execute(limit=10)
+            return {**result, **agentic_strategy.usage()}
diff --git a/patchwork/steps/FileAgent/__init__.py b/patchwork/steps/FileAgent/__init__.py