Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ jobs:

- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --only main --extras security
run: poetry install --no-interaction --all-extras

- name: AutoFix Patchwork
run: |
Expand Down Expand Up @@ -151,7 +151,7 @@ jobs:

- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --only main
run: poetry install --no-interaction --all-extras

- name: PR Review
run: |
Expand Down
10 changes: 7 additions & 3 deletions patchwork/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def list_option_callback(ctx: click.Context, param: click.Parameter, value: str


def find_patchflow(possible_module_paths: Iterable[str], patchflow: str) -> Any | None:
allowed_modules = {'trusted.module1', 'trusted.module2', 'trusted.module3'}
for module_path in possible_module_paths:
try:
spec = importlib.util.spec_from_file_location("custom_module", module_path)
Expand All @@ -72,9 +73,12 @@ def find_patchflow(possible_module_paths: Iterable[str], patchflow: str) -> Any
logger.debug(f"Patchflow {patchflow} not found as a file/directory in {module_path}")

try:
module = importlib.import_module(module_path)
logger.info(f"Patchflow {patchflow} loaded from {module_path}")
return getattr(module, patchflow)
if module_path in allowed_modules:
module = importlib.import_module(module_path)
logger.info(f"Patchflow {patchflow} loaded from {module_path}")
return getattr(module, patchflow)
else:
logger.debug(f"Module {module_path} not in allowed list")
except ModuleNotFoundError:
logger.debug(f"Patchflow {patchflow} not found as a module in {module_path}")
except AttributeError:
Expand Down
20 changes: 10 additions & 10 deletions patchwork/common/multiturn_strategy/agentic_strategy_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ def model_post_init(self, __context: Any) -> None:

class AgenticStrategyV2:
def __init__(
self,
model: str,
llm_client: LlmClient,
template_data: dict[str, str],
system_prompt_template: str,
user_prompt_template: str,
agent_configs: list[AgentConfig],
example_json: Union[str, dict[str, Any]] = '{"output":"output text"}',
limit: Optional[int] = None,
self,
model: str,
llm_client: LlmClient,
template_data: dict[str, str],
system_prompt_template: str,
user_prompt_template: str,
agent_configs: list[AgentConfig],
example_json: Union[str, dict[str, Any]] = '{"output":"output text"}',
limit: Optional[int] = None,
):
self.__limit = limit
self.__template_data = template_data
Expand Down Expand Up @@ -153,7 +153,7 @@ def execute(self, limit: Optional[int] = None) -> dict:
self.__summariser.run(
"Please give me the result from the following summary of what the assistants have done."
+ agent_summary_list,
)
)
)
self.__request_tokens += final_result.usage().request_tokens or 0
self.__response_tokens += final_result.usage().response_tokens or 0
Expand Down
2 changes: 1 addition & 1 deletion patchwork/common/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from patchwork.common.tools.api_tool import APIRequestTool
from patchwork.common.tools.bash_tool import BashTool
from patchwork.common.tools.code_edit_tools import CodeEditTool, FileViewTool
from patchwork.common.tools.grep_tool import FindTextTool, FindTool
from patchwork.common.tools.api_tool import APIRequestTool
from patchwork.common.tools.tool import Tool

__all__ = [
Expand Down
4 changes: 1 addition & 3 deletions patchwork/common/tools/api_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,7 @@ def execute(
status_code = response.status_code
headers = response.headers

header_string = "\n".join(
f"{key}: {value}" for key, value in headers.items()
)
header_string = "\n".join(f"{key}: {value}" for key, value in headers.items())

return (
f"HTTP/{response.raw.version / 10:.1f} {status_code} {response.reason}\n"
Expand Down
2 changes: 1 addition & 1 deletion patchwork/common/tools/bash_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def execute(

try:
result = subprocess.run(
command, shell=True, cwd=self.path, capture_output=True, text=True, timeout=60 # Add timeout for safety
command.split(), cwd=self.path, capture_output=True, text=True, timeout=60 # Add timeout for safety
)
return result.stdout if result.returncode == 0 else f"Error: {result.stderr}"
except subprocess.TimeoutExpired:
Expand Down
21 changes: 13 additions & 8 deletions patchwork/common/tools/code_edit_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ def json_schema(self) -> dict:
}

def __get_abs_path(self, path: str):
wanted_path = Path(path).resolve()
wanted_path = Path(path)
if not Path(path).is_absolute():
wanted_path = self.repo_path / path
if wanted_path.is_relative_to(self.repo_path):
return wanted_path
else:
Expand All @@ -57,13 +59,16 @@ def execute(self, path: str, view_range: Optional[list[int]] = None) -> str:
return f"Error: Path {abs_path} does not exist"

if abs_path.is_file():
with open(abs_path, "r") as f:
content = f.read()

if view_range:
lines = content.splitlines()
start, end = view_range
content = "\n".join(lines[start - 1 : end])
try:
with open(abs_path, "r") as f:
content = f.read()

if view_range:
lines = content.splitlines()
start, end = view_range
content = "\n".join(lines[start - 1 : end])
except Exception as e:
content = "Error: " + str(e)

if len(content) > self.__VIEW_LIMIT:
content = content[: self.__VIEW_LIMIT] + self.__TRUNCATION_TOKEN
Expand Down
142 changes: 142 additions & 0 deletions patchwork/common/tools/csvkit_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from __future__ import annotations

import sqlite3
import subprocess
from pathlib import Path

import pandas
from sqlalchemy import URL
from typing_extensions import Optional

from patchwork.common.tools.tool import Tool


class In2CSVTool(Tool, tool_name="in2csv_tool", auto_register=False):
def __init__(self, path: str):
super().__init__()
self.path = path

@property
def json_schema(self) -> dict:
return {
"name": "in2csv_tool",
"description": """\
Convert common tabular data formats to CSV.

optional arguments:
--reset-dimensions Ignore the sheet dimensions provided by the XLSX file.
--encoding-xls ENCODING_XLS
Specify the encoding of the input XLS file.
-y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT
Limit CSV dialect sniffing to the specified number of
bytes. Specify "0" to disable sniffing entirely, or
"-1" to sniff the entire file.
-I, --no-inference Disable type inference (and --locale, --date-format,
--datetime-format, --no-leading-zeroes) when parsing
CSV input.
""",
"input_schema": {
"type": "object",
"properties": {
"files": {
"type": "array",
"items": {"type": "string"},
"description": "The CSV file(s) to operate on",
},
"args": {
"type": "array",
"items": {"type": "string"},
"description": "The args to run with",
},
},
"required": ["files"],
},
}

def execute(self, files: list[str], args: Optional[list[str]] = None) -> str:
args = args or []

original_csvs = set()
for p in Path(self.path).iterdir():
if p.suffix == ".csv":
original_csvs.add(p.name)

p = subprocess.run(
["in2csv", *files, *args, "--write-sheets", "-", "--use-sheet-names"],
capture_output=True,
text=True,
cwd=self.path,
)
if p.returncode != 0:
return "ERROR:\n" + p.stderr

rv = "Files converted to CSV:"
for p in Path(self.path).iterdir():
if p.suffix == ".csv" and p.name not in original_csvs:
rv += f"\n* {p}"

return rv


class CSVSQLTool(Tool, tool_name="csvsql_tool", auto_register=False):
def __init__(self, path: str, tmp_path: str):
super().__init__()
self.path = path
self.tmp_path = tmp_path

@property
def json_schema(self) -> dict:
return {
"name": "csvsql_tool",
"description": """\
Execute SQL query directly on csv files. The name of the csv files can be referenced as table in the SQL query

If the output is larger than 5000 characters, the remaining characters are replaced with <TRUNCATED>.
""",
"input_schema": {
"type": "object",
"properties": {
"files": {
"type": "array",
"items": {"type": "string"},
"description": "The CSV file(s) to operate on",
},
"query": {
"type": "string",
"description": "SQL query to execute",
},
},
"required": ["files", "query"],
},
}

def execute(self, files: list[str], query: str) -> str:
db_path = (Path(self.tmp_path) / "tmp.db").resolve()
db_url = URL.create(drivername="sqlite", host="/" + str(db_path)).render_as_string()

files_to_insert = []
if db_path.is_file():
with sqlite3.connect(str(db_path)) as conn:
for file in files:
table_name = file.removesuffix('.csv')
res = conn.execute("SELECT 1 from ?", (table_name,))
if res.fetchone() is None:
files_to_insert.append(file)
else:
files_to_insert = files

if len(files_to_insert) > 0:
p = subprocess.run(
["csvsql", *files_to_insert, "--db", db_url, "--insert"], capture_output=True, text=True, cwd=self.path
)
if p.returncode != 0:
return "ERROR:\n" + p.stderr

with sqlite3.connect(str(db_path)) as conn:
pandas_df = pandas.read_sql_query(query, conn)
rv = pandas_df.to_csv()

if len(rv) > 5000:
return rv[:5000] + "<TRUNCATED>"
return rv

29 changes: 16 additions & 13 deletions patchwork/common/tools/grep_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,10 @@ def json_schema(self) -> dict:
}

def execute(
self,
pattern: Optional[str] = None,
path: Optional[Path] = None,
is_case_sensitive: bool = False,
self,
pattern: Optional[str] = None,
path: Optional[Path] = None,
is_case_sensitive: bool = False,
) -> str:
if pattern is None:
raise ValueError("pattern argument is required!")
Expand All @@ -183,18 +183,22 @@ def execute(
paths = [p for p in path.iterdir() if p.is_file()]

from collections import defaultdict

file_matches = defaultdict(list)
for path in paths:
with path.open("r") as f:
for i, line in enumerate(f.readlines()):
if not matcher(line, pattern):
continue
try:
with path.open("r") as f:
for i, line in enumerate(f.readlines()):
if not matcher(line, pattern):
continue

content = f"Line {i + 1}: {line}"
if len(line) > self.__CHAR_LIMIT:
content = f"Line {i + 1}: {self.__CHAR_LIMIT_TEXT}"
content = f"Line {i + 1}: {line}"
if len(line) > self.__CHAR_LIMIT:
content = f"Line {i + 1}: {self.__CHAR_LIMIT_TEXT}"

file_matches[str(path)].append(content)
file_matches[str(path)].append(content)
except Exception as e:
pass

total_file_matches = ""
for path_str, matches in file_matches.items():
Expand All @@ -207,4 +211,3 @@ def execute(
for path_str, matches in file_matches.items():
total_file_matches += f"\n {len(matches)} Pattern matches found in '{path}': <TRUNCATED>\n"
return total_file_matches

3 changes: 3 additions & 0 deletions patchwork/common/utils/dependency.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@

@lru_cache(maxsize=None)
def import_with_dependency_group(name):
if not any(name in dependencies for dependencies in __DEPENDENCY_GROUPS.values()):
raise ImportError(f"Unauthorized import attempt for {name}")

try:
return importlib.import_module(name)
except ImportError:
Expand Down
7 changes: 7 additions & 0 deletions patchwork/common/utils/step_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ def validate_step_type_config_with_inputs(
def validate_step_with_inputs(input_keys: Set[str], step: Type[Step]) -> Tuple[Set[str], Dict[str, str]]:
module_path, _, _ = step.__module__.rpartition(".")
step_name = step.__name__

# Whitelist of trusted module paths
trusted_modules = {"valid_module1.typed", "valid_module2.typed"}

if f"{module_path}.typed" not in trusted_modules:
raise ValueError(f"Untrusted module import attempt: {module_path}.typed")

type_module = importlib.import_module(f"{module_path}.typed")
step_input_model = getattr(type_module, f"{step_name}Inputs", __NOT_GIVEN)
step_output_model = getattr(type_module, f"{step_name}Outputs", __NOT_GIVEN)
Expand Down
4 changes: 3 additions & 1 deletion patchwork/steps/CallShell/CallShell.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def __parse_env_text(env_text: str) -> dict[str, str]:
return env

def run(self) -> dict:
p = subprocess.run(self.script, shell=True, capture_output=True, text=True, cwd=self.working_dir, env=self.env)
command_list = shlex.split(self.script)
p = subprocess.run(command_list, shell=False, capture_output=True, text=True, cwd=self.working_dir, env=self.env)
try:
p.check_returncode()
except subprocess.CalledProcessError as e:
Expand All @@ -57,3 +58,4 @@ def run(self) -> dict:
logger.info(f"stdout: \n{p.stdout}")
logger.info(f"stderr:\n{p.stderr}")
return dict(stdout_output=p.stdout, stderr_output=p.stderr)

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import unittest
from pathlib import Path

from patchwork.steps.ExtractPackageManagerFile.ExtractPackageManagerFile import ExtractPackageManagerFile
from patchwork.steps.ExtractPackageManagerFile.ExtractPackageManagerFile import (
ExtractPackageManagerFile,
)


class TestExtractPackageManagerFile(unittest.TestCase):
Expand Down
Loading