patched-codes · patched-admin · Mar 10, 2025 · Mar 10, 2025 · Mar 10, 2025 · Mar 10, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -96,7 +96,7 @@ jobs:
 
       - name: Install dependencies
         if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
-        run: poetry install --no-interaction --only main --extras security
+        run: poetry install --no-interaction --all-extras
 
       - name: AutoFix Patchwork
         run: |
@@ -151,7 +151,7 @@ jobs:
 
       - name: Install dependencies
         if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
-        run: poetry install --no-interaction --only main
+        run: poetry install --no-interaction --all-extras
 
       - name: PR Review
         run: |

diff --git a/patchwork/app.py b/patchwork/app.py
@@ -59,6 +59,7 @@ def list_option_callback(ctx: click.Context, param: click.Parameter, value: str
 
 
 def find_patchflow(possible_module_paths: Iterable[str], patchflow: str) -> Any | None:
+    allowed_modules = {'trusted.module1', 'trusted.module2', 'trusted.module3'}
     for module_path in possible_module_paths:
         try:
             spec = importlib.util.spec_from_file_location("custom_module", module_path)
@@ -72,9 +73,12 @@ def find_patchflow(possible_module_paths: Iterable[str], patchflow: str) -> Any
             logger.debug(f"Patchflow {patchflow} not found as a file/directory in {module_path}")
 
         try:
-            module = importlib.import_module(module_path)
-            logger.info(f"Patchflow {patchflow} loaded from {module_path}")
-            return getattr(module, patchflow)
+            if module_path in allowed_modules:
+                module = importlib.import_module(module_path)
+                logger.info(f"Patchflow {patchflow} loaded from {module_path}")
+                return getattr(module, patchflow)
+            else:
+                logger.debug(f"Module {module_path} not in allowed list")
         except ModuleNotFoundError:
             logger.debug(f"Patchflow {patchflow} not found as a module in {module_path}")
         except AttributeError:

diff --git a/patchwork/common/multiturn_strategy/agentic_strategy_v2.py b/patchwork/common/multiturn_strategy/agentic_strategy_v2.py
@@ -42,15 +42,15 @@ def model_post_init(self, __context: Any) -> None:
 
 class AgenticStrategyV2:
     def __init__(
-            self,
-            model: str,
-            llm_client: LlmClient,
-            template_data: dict[str, str],
-            system_prompt_template: str,
-            user_prompt_template: str,
-            agent_configs: list[AgentConfig],
-            example_json: Union[str, dict[str, Any]] = '{"output":"output text"}',
-            limit: Optional[int] = None,
+        self,
+        model: str,
+        llm_client: LlmClient,
+        template_data: dict[str, str],
+        system_prompt_template: str,
+        user_prompt_template: str,
+        agent_configs: list[AgentConfig],
+        example_json: Union[str, dict[str, Any]] = '{"output":"output text"}',
+        limit: Optional[int] = None,
     ):
         self.__limit = limit
         self.__template_data = template_data
@@ -153,7 +153,7 @@ def execute(self, limit: Optional[int] = None) -> dict:
                 self.__summariser.run(
                     "Please give me the result from the following summary of what the assistants have done."
                     + agent_summary_list,
-                    )
+                )
             )
         self.__request_tokens += final_result.usage().request_tokens or 0
         self.__response_tokens += final_result.usage().response_tokens or 0

diff --git a/patchwork/common/tools/__init__.py b/patchwork/common/tools/__init__.py
@@ -1,7 +1,7 @@
+from patchwork.common.tools.api_tool import APIRequestTool
 from patchwork.common.tools.bash_tool import BashTool
 from patchwork.common.tools.code_edit_tools import CodeEditTool, FileViewTool
 from patchwork.common.tools.grep_tool import FindTextTool, FindTool
-from patchwork.common.tools.api_tool import APIRequestTool
 from patchwork.common.tools.tool import Tool
 
 __all__ = [

diff --git a/patchwork/common/tools/api_tool.py b/patchwork/common/tools/api_tool.py
@@ -91,9 +91,7 @@ def execute(
             status_code = response.status_code
             headers = response.headers
 
-            header_string = "\n".join(
-                f"{key}: {value}" for key, value in headers.items()
-            )
+            header_string = "\n".join(f"{key}: {value}" for key, value in headers.items())
 
             return (
                 f"HTTP/{response.raw.version / 10:.1f} {status_code} {response.reason}\n"

diff --git a/patchwork/common/tools/bash_tool.py b/patchwork/common/tools/bash_tool.py
@@ -45,7 +45,7 @@ def execute(
 
         try:
             result = subprocess.run(
-                command, shell=True, cwd=self.path, capture_output=True, text=True, timeout=60  # Add timeout for safety
+                command.split(), cwd=self.path, capture_output=True, text=True, timeout=60  # Add timeout for safety
             )
             return result.stdout if result.returncode == 0 else f"Error: {result.stderr}"
         except subprocess.TimeoutExpired:

diff --git a/patchwork/common/tools/code_edit_tools.py b/patchwork/common/tools/code_edit_tools.py
@@ -45,7 +45,9 @@ def json_schema(self) -> dict:
         }
 
     def __get_abs_path(self, path: str):
-        wanted_path = Path(path).resolve()
+        wanted_path = Path(path)
+        if not Path(path).is_absolute():
+            wanted_path = self.repo_path / path
         if wanted_path.is_relative_to(self.repo_path):
             return wanted_path
         else:
@@ -57,13 +59,16 @@ def execute(self, path: str, view_range: Optional[list[int]] = None) -> str:
             return f"Error: Path {abs_path} does not exist"
 
         if abs_path.is_file():
-            with open(abs_path, "r") as f:
-                content = f.read()
-
-            if view_range:
-                lines = content.splitlines()
-                start, end = view_range
-                content = "\n".join(lines[start - 1 : end])
+            try:
+                with open(abs_path, "r") as f:
+                    content = f.read()
+
+                if view_range:
+                    lines = content.splitlines()
+                    start, end = view_range
+                    content = "\n".join(lines[start - 1 : end])
+            except Exception as e:
+                content = "Error: " + str(e)
 
             if len(content) > self.__VIEW_LIMIT:
                 content = content[: self.__VIEW_LIMIT] + self.__TRUNCATION_TOKEN

diff --git a/patchwork/common/tools/csvkit_tool.py b/patchwork/common/tools/csvkit_tool.py
@@ -0,0 +1,142 @@
+from __future__ import annotations
+
+import sqlite3
+import subprocess
+from pathlib import Path
+
+import pandas
+from sqlalchemy import URL
+from typing_extensions import Optional
+
+from patchwork.common.tools.tool import Tool
+
+
+class In2CSVTool(Tool, tool_name="in2csv_tool", auto_register=False):
+    def __init__(self, path: str):
+        super().__init__()
+        self.path = path
+
+    @property
+    def json_schema(self) -> dict:
+        return {
+            "name": "in2csv_tool",
+            "description": """\
+Convert common tabular data formats to CSV.
+
+optional arguments:
+  --reset-dimensions    Ignore the sheet dimensions provided by the XLSX file.
+  --encoding-xls ENCODING_XLS
+                        Specify the encoding of the input XLS file.
+  -y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT
+                        Limit CSV dialect sniffing to the specified number of
+                        bytes. Specify "0" to disable sniffing entirely, or
+                        "-1" to sniff the entire file.
+  -I, --no-inference    Disable type inference (and --locale, --date-format,
+                        --datetime-format, --no-leading-zeroes) when parsing
+                        CSV input.
+""",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "files": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "The CSV file(s) to operate on",
+                    },
+                    "args": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "The args to run with",
+                    },
+                },
+                "required": ["files"],
+            },
+        }
+
+    def execute(self, files: list[str], args: Optional[list[str]] = None) -> str:
+        args = args or []
+
+        original_csvs = set()
+        for p in Path(self.path).iterdir():
+            if p.suffix == ".csv":
+                original_csvs.add(p.name)
+
+        p = subprocess.run(
+            ["in2csv", *files, *args, "--write-sheets", "-", "--use-sheet-names"],
+            capture_output=True,
+            text=True,
+            cwd=self.path,
+        )
+        if p.returncode != 0:
+            return "ERROR:\n" + p.stderr
+
+        rv = "Files converted to CSV:"
+        for p in Path(self.path).iterdir():
+            if p.suffix == ".csv" and p.name not in original_csvs:
+                rv += f"\n* {p}"
+
+        return rv
+
+
+class CSVSQLTool(Tool, tool_name="csvsql_tool", auto_register=False):
+    def __init__(self, path: str, tmp_path: str):
+        super().__init__()
+        self.path = path
+        self.tmp_path = tmp_path
+
+    @property
+    def json_schema(self) -> dict:
+        return {
+            "name": "csvsql_tool",
+            "description": """\
+Execute SQL query directly on csv files. The name of the csv files can be referenced as table in the SQL query
+
+If the output is larger than 5000 characters, the remaining characters are replaced with <TRUNCATED>.
+""",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "files": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "The CSV file(s) to operate on",
+                    },
+                    "query": {
+                        "type": "string",
+                        "description": "SQL query to execute",
+                    },
+                },
+                "required": ["files", "query"],
+            },
+        }
+
+    def execute(self, files: list[str], query: str) -> str:
+        db_path = (Path(self.tmp_path) / "tmp.db").resolve()
+        db_url = URL.create(drivername="sqlite", host="/" + str(db_path)).render_as_string()
+
+        files_to_insert = []
+        if db_path.is_file():
+            with sqlite3.connect(str(db_path)) as conn:
+                for file in files:
+                    table_name = file.removesuffix('.csv')
+                    res = conn.execute("SELECT 1 from ?", (table_name,))
+                    if res.fetchone() is None:
+                        files_to_insert.append(file)
+        else:
+            files_to_insert = files
+
+        if len(files_to_insert) > 0:
+            p = subprocess.run(
+                ["csvsql", *files_to_insert, "--db", db_url, "--insert"], capture_output=True, text=True, cwd=self.path
+            )
+            if p.returncode != 0:
+                return "ERROR:\n" + p.stderr
+
+        with sqlite3.connect(str(db_path)) as conn:
+            pandas_df = pandas.read_sql_query(query, conn)
+            rv = pandas_df.to_csv()
+
+        if len(rv) > 5000:
+            return rv[:5000] + "<TRUNCATED>"
+        return rv
+
diff --git a/patchwork/common/tools/grep_tool.py b/patchwork/common/tools/grep_tool.py
@@ -158,10 +158,10 @@ def json_schema(self) -> dict:
         }
 
     def execute(
-            self,
-            pattern: Optional[str] = None,
-            path: Optional[Path] = None,
-            is_case_sensitive: bool = False,
+        self,
+        pattern: Optional[str] = None,
+        path: Optional[Path] = None,
+        is_case_sensitive: bool = False,
     ) -> str:
         if pattern is None:
             raise ValueError("pattern argument is required!")
@@ -183,18 +183,22 @@ def execute(
             paths = [p for p in path.iterdir() if p.is_file()]
 
         from collections import defaultdict
+
         file_matches = defaultdict(list)
         for path in paths:
-            with path.open("r") as f:
-                for i, line in enumerate(f.readlines()):
-                    if not matcher(line, pattern):
-                        continue
+            try:
+                with path.open("r") as f:
+                    for i, line in enumerate(f.readlines()):
+                        if not matcher(line, pattern):
+                            continue
 
-                    content = f"Line {i + 1}: {line}"
-                    if len(line) > self.__CHAR_LIMIT:
-                        content = f"Line {i + 1}: {self.__CHAR_LIMIT_TEXT}"
+                        content = f"Line {i + 1}: {line}"
+                        if len(line) > self.__CHAR_LIMIT:
+                            content = f"Line {i + 1}: {self.__CHAR_LIMIT_TEXT}"
 
-                    file_matches[str(path)].append(content)
+                        file_matches[str(path)].append(content)
+            except Exception as e:
+                pass
 
         total_file_matches = ""
         for path_str, matches in file_matches.items():
@@ -207,4 +211,3 @@ def execute(
         for path_str, matches in file_matches.items():
             total_file_matches += f"\n {len(matches)} Pattern matches found in '{path}': <TRUNCATED>\n"
         return total_file_matches
-
diff --git a/patchwork/common/utils/dependency.py b/patchwork/common/utils/dependency.py
@@ -9,6 +9,9 @@
 
 @lru_cache(maxsize=None)
 def import_with_dependency_group(name):
+    if not any(name in dependencies for dependencies in __DEPENDENCY_GROUPS.values()):
+        raise ImportError(f"Unauthorized import attempt for {name}")
+
     try:
         return importlib.import_module(name)
     except ImportError:

diff --git a/patchwork/common/utils/step_typing.py b/patchwork/common/utils/step_typing.py
@@ -108,6 +108,13 @@ def validate_step_type_config_with_inputs(
 def validate_step_with_inputs(input_keys: Set[str], step: Type[Step]) -> Tuple[Set[str], Dict[str, str]]:
     module_path, _, _ = step.__module__.rpartition(".")
     step_name = step.__name__
+
+    # Whitelist of trusted module paths
+    trusted_modules = {"valid_module1.typed", "valid_module2.typed"}
+
+    if f"{module_path}.typed" not in trusted_modules:
+        raise ValueError(f"Untrusted module import attempt: {module_path}.typed")
+
     type_module = importlib.import_module(f"{module_path}.typed")
     step_input_model = getattr(type_module, f"{step_name}Inputs", __NOT_GIVEN)
     step_output_model = getattr(type_module, f"{step_name}Outputs", __NOT_GIVEN)

diff --git a/patchwork/steps/CallShell/CallShell.py b/patchwork/steps/CallShell/CallShell.py
@@ -46,7 +46,8 @@ def __parse_env_text(env_text: str) -> dict[str, str]:
         return env
 
     def run(self) -> dict:
-        p = subprocess.run(self.script, shell=True, capture_output=True, text=True, cwd=self.working_dir, env=self.env)
+        command_list = shlex.split(self.script)
+        p = subprocess.run(command_list, shell=False, capture_output=True, text=True, cwd=self.working_dir, env=self.env)
         try:
             p.check_returncode()
         except subprocess.CalledProcessError as e:
@@ -57,3 +58,4 @@ def run(self) -> dict:
         logger.info(f"stdout: \n{p.stdout}")
         logger.info(f"stderr:\n{p.stderr}")
         return dict(stdout_output=p.stdout, stderr_output=p.stderr)
+
diff --git a/patchwork/steps/ExtractPackageManagerFile/TestExtractPackageManagerFile.py b/patchwork/steps/ExtractPackageManagerFile/TestExtractPackageManagerFile.py
@@ -2,7 +2,9 @@
 import unittest
 from pathlib import Path
 
-from patchwork.steps.ExtractPackageManagerFile.ExtractPackageManagerFile import ExtractPackageManagerFile
+from patchwork.steps.ExtractPackageManagerFile.ExtractPackageManagerFile import (
+    ExtractPackageManagerFile,
+)
 
 
 class TestExtractPackageManagerFile(unittest.TestCase):