From 116f2dfae5968f1de7467e0f972c3168830dc762 Mon Sep 17 00:00:00 2001 From: TIANYOU CHEN <42710806+CTY-git@users.noreply.github.com> Date: Mon, 10 Mar 2025 09:34:35 +0800 Subject: [PATCH 01/11] final --- .../multiturn_strategy/agentic_strategy_v2.py | 20 +- patchwork/common/tools/__init__.py | 2 +- patchwork/common/tools/api_tool.py | 4 +- patchwork/common/tools/code_edit_tools.py | 21 +- patchwork/common/tools/csvkit_tool.py | 142 ++++++++ patchwork/common/tools/grep_tool.py | 29 +- patchwork/steps/FileAgent/FileAgent.py | 65 ++++ patchwork/steps/FileAgent/__init__.py | 0 patchwork/steps/FileAgent/typed.py | 27 ++ patchwork/steps/GitHubAgent/typed.py | 5 +- patchwork/steps/__init__.py | 2 + poetry.lock | 341 +++++++++++++++--- pyproject.toml | 3 +- 13 files changed, 576 insertions(+), 85 deletions(-) create mode 100644 patchwork/common/tools/csvkit_tool.py create mode 100644 patchwork/steps/FileAgent/FileAgent.py create mode 100644 patchwork/steps/FileAgent/__init__.py create mode 100644 patchwork/steps/FileAgent/typed.py diff --git a/patchwork/common/multiturn_strategy/agentic_strategy_v2.py b/patchwork/common/multiturn_strategy/agentic_strategy_v2.py index c5f265322..085266975 100644 --- a/patchwork/common/multiturn_strategy/agentic_strategy_v2.py +++ b/patchwork/common/multiturn_strategy/agentic_strategy_v2.py @@ -42,15 +42,15 @@ def model_post_init(self, __context: Any) -> None: class AgenticStrategyV2: def __init__( - self, - model: str, - llm_client: LlmClient, - template_data: dict[str, str], - system_prompt_template: str, - user_prompt_template: str, - agent_configs: list[AgentConfig], - example_json: Union[str, dict[str, Any]] = '{"output":"output text"}', - limit: Optional[int] = None, + self, + model: str, + llm_client: LlmClient, + template_data: dict[str, str], + system_prompt_template: str, + user_prompt_template: str, + agent_configs: list[AgentConfig], + example_json: Union[str, dict[str, Any]] = '{"output":"output text"}', + limit: Optional[int] = None, ): self.__limit = limit self.__template_data = template_data @@ -153,7 +153,7 @@ def execute(self, limit: Optional[int] = None) -> dict: self.__summariser.run( "Please give me the result from the following summary of what the assistants have done." + agent_summary_list, - ) + ) ) self.__request_tokens += final_result.usage().request_tokens or 0 self.__response_tokens += final_result.usage().response_tokens or 0 diff --git a/patchwork/common/tools/__init__.py b/patchwork/common/tools/__init__.py index 9e37152cb..aa4d3660c 100644 --- a/patchwork/common/tools/__init__.py +++ b/patchwork/common/tools/__init__.py @@ -1,7 +1,7 @@ +from patchwork.common.tools.api_tool import APIRequestTool from patchwork.common.tools.bash_tool import BashTool from patchwork.common.tools.code_edit_tools import CodeEditTool, FileViewTool from patchwork.common.tools.grep_tool import FindTextTool, FindTool -from patchwork.common.tools.api_tool import APIRequestTool from patchwork.common.tools.tool import Tool __all__ = [ diff --git a/patchwork/common/tools/api_tool.py b/patchwork/common/tools/api_tool.py index 294bd4e29..cc91a7b11 100644 --- a/patchwork/common/tools/api_tool.py +++ b/patchwork/common/tools/api_tool.py @@ -91,9 +91,7 @@ def execute( status_code = response.status_code headers = response.headers - header_string = "\n".join( - f"{key}: {value}" for key, value in headers.items() - ) + header_string = "\n".join(f"{key}: {value}" for key, value in headers.items()) return ( f"HTTP/{response.raw.version / 10:.1f} {status_code} {response.reason}\n" diff --git a/patchwork/common/tools/code_edit_tools.py b/patchwork/common/tools/code_edit_tools.py index bb576960a..6f965b2f6 100644 --- a/patchwork/common/tools/code_edit_tools.py +++ b/patchwork/common/tools/code_edit_tools.py @@ -45,7 +45,9 @@ def json_schema(self) -> dict: } def __get_abs_path(self, path: str): - wanted_path = Path(path).resolve() + wanted_path = Path(path) + if not Path(path).is_absolute(): + wanted_path = self.repo_path / path if wanted_path.is_relative_to(self.repo_path): return wanted_path else: @@ -57,13 +59,16 @@ def execute(self, path: str, view_range: Optional[list[int]] = None) -> str: return f"Error: Path {abs_path} does not exist" if abs_path.is_file(): - with open(abs_path, "r") as f: - content = f.read() - - if view_range: - lines = content.splitlines() - start, end = view_range - content = "\n".join(lines[start - 1 : end]) + try: + with open(abs_path, "r") as f: + content = f.read() + + if view_range: + lines = content.splitlines() + start, end = view_range + content = "\n".join(lines[start - 1 : end]) + except Exception as e: + content = "Error: " + str(e) if len(content) > self.__VIEW_LIMIT: content = content[: self.__VIEW_LIMIT] + self.__TRUNCATION_TOKEN diff --git a/patchwork/common/tools/csvkit_tool.py b/patchwork/common/tools/csvkit_tool.py new file mode 100644 index 000000000..a1ef8dc59 --- /dev/null +++ b/patchwork/common/tools/csvkit_tool.py @@ -0,0 +1,142 @@ +from __future__ import annotations + +import sqlite3 +import subprocess +from pathlib import Path + +import pandas +from sqlalchemy import URL +from typing_extensions import Optional + +from patchwork.common.tools.tool import Tool + + +class In2CSVTool(Tool, tool_name="in2csv_tool", auto_register=False): + def __init__(self, path: str): + super().__init__() + self.path = path + + @property + def json_schema(self) -> dict: + return { + "name": "in2csv_tool", + "description": """\ +Convert common tabular data formats to CSV. + +optional arguments: + --reset-dimensions Ignore the sheet dimensions provided by the XLSX file. + --encoding-xls ENCODING_XLS + Specify the encoding of the input XLS file. + -y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT + Limit CSV dialect sniffing to the specified number of + bytes. Specify "0" to disable sniffing entirely, or + "-1" to sniff the entire file. + -I, --no-inference Disable type inference (and --locale, --date-format, + --datetime-format, --no-leading-zeroes) when parsing + CSV input. +""", + "input_schema": { + "type": "object", + "properties": { + "files": { + "type": "array", + "items": {"type": "string"}, + "description": "The CSV file(s) to operate on", + }, + "args": { + "type": "array", + "items": {"type": "string"}, + "description": "The args to run with", + }, + }, + "required": ["files"], + }, + } + + def execute(self, files: list[str], args: Optional[list[str]] = None) -> str: + args = args or [] + + original_csvs = set() + for p in Path(self.path).iterdir(): + if p.suffix == ".csv": + original_csvs.add(p.name) + + p = subprocess.run( + ["in2csv", *files, *args, "--write-sheets", "-", "--use-sheet-names"], + capture_output=True, + text=True, + cwd=self.path, + ) + if p.returncode != 0: + return "ERROR:\n" + p.stderr + + rv = "Files converted to CSV:" + for p in Path(self.path).iterdir(): + if p.suffix == ".csv" and p.name not in original_csvs: + rv += f"\n* {p}" + + return rv + + +class CSVSQLTool(Tool, tool_name="csvsql_tool", auto_register=False): + def __init__(self, path: str, tmp_path: str): + super().__init__() + self.path = path + self.tmp_path = tmp_path + + @property + def json_schema(self) -> dict: + return { + "name": "csvsql_tool", + "description": """\ +Execute SQL query directly on csv files. The name of the csv files can be referenced as table in the SQL query + +If the output is larger than 5000 characters, the remaining characters are replaced with . +""", + "input_schema": { + "type": "object", + "properties": { + "files": { + "type": "array", + "items": {"type": "string"}, + "description": "The CSV file(s) to operate on", + }, + "query": { + "type": "string", + "description": "SQL query to execute", + }, + }, + "required": ["files", "query"], + }, + } + + def execute(self, files: list[str], query: str) -> str: + db_path = (Path(self.tmp_path) / "tmp.db").resolve() + db_url = URL.create(drivername="sqlite", host="/" + str(db_path)).render_as_string() + + files_to_insert = [] + if db_path.is_file(): + with sqlite3.connect(str(db_path)) as conn: + for file in files: + res = conn.execute( + f"SELECT 1 from {file.removesuffix('.csv')}", + ) + if res.fetchone() is None: + files_to_insert.append(file) + else: + files_to_insert = files + + if len(files_to_insert) > 0: + p = subprocess.run( + ["csvsql", *files_to_insert, "--db", db_url, "--insert"], capture_output=True, text=True, cwd=self.path + ) + if p.returncode != 0: + return "ERROR:\n" + p.stderr + + with sqlite3.connect(str(db_path)) as conn: + pandas_df = pandas.read_sql_query(query, conn) + rv = pandas_df.to_csv() + + if len(rv) > 5000: + return rv[:5000] + "" + return rv diff --git a/patchwork/common/tools/grep_tool.py b/patchwork/common/tools/grep_tool.py index 4358d8e05..0361ceb6f 100644 --- a/patchwork/common/tools/grep_tool.py +++ b/patchwork/common/tools/grep_tool.py @@ -158,10 +158,10 @@ def json_schema(self) -> dict: } def execute( - self, - pattern: Optional[str] = None, - path: Optional[Path] = None, - is_case_sensitive: bool = False, + self, + pattern: Optional[str] = None, + path: Optional[Path] = None, + is_case_sensitive: bool = False, ) -> str: if pattern is None: raise ValueError("pattern argument is required!") @@ -183,18 +183,22 @@ def execute( paths = [p for p in path.iterdir() if p.is_file()] from collections import defaultdict + file_matches = defaultdict(list) for path in paths: - with path.open("r") as f: - for i, line in enumerate(f.readlines()): - if not matcher(line, pattern): - continue + try: + with path.open("r") as f: + for i, line in enumerate(f.readlines()): + if not matcher(line, pattern): + continue - content = f"Line {i + 1}: {line}" - if len(line) > self.__CHAR_LIMIT: - content = f"Line {i + 1}: {self.__CHAR_LIMIT_TEXT}" + content = f"Line {i + 1}: {line}" + if len(line) > self.__CHAR_LIMIT: + content = f"Line {i + 1}: {self.__CHAR_LIMIT_TEXT}" - file_matches[str(path)].append(content) + file_matches[str(path)].append(content) + except Exception as e: + pass total_file_matches = "" for path_str, matches in file_matches.items(): @@ -207,4 +211,3 @@ def execute( for path_str, matches in file_matches.items(): total_file_matches += f"\n {len(matches)} Pattern matches found in '{path}': \n" return total_file_matches - diff --git a/patchwork/steps/FileAgent/FileAgent.py b/patchwork/steps/FileAgent/FileAgent.py new file mode 100644 index 000000000..27291b939 --- /dev/null +++ b/patchwork/steps/FileAgent/FileAgent.py @@ -0,0 +1,65 @@ +import tempfile +from pathlib import Path + +from patchwork.common.client.llm.aio import AioLlmClient +from patchwork.common.multiturn_strategy.agentic_strategy_v2 import ( + AgentConfig, + AgenticStrategyV2, +) +from patchwork.common.tools import FileViewTool, FindTextTool +from patchwork.common.tools.csvkit_tool import CSVSQLTool, In2CSVTool +from patchwork.common.utils.utils import mustache_render +from patchwork.step import Step +from patchwork.steps.FileAgent.typed import FileAgentInputs, FileAgentOutputs + + +class FileAgent(Step, input_class=FileAgentInputs, output_class=FileAgentOutputs): + def __init__(self, inputs): + super().__init__(inputs) + self.base_path = inputs.get("base_path", str(Path.cwd())) + data = inputs.get("prompt_value", {}) + task = mustache_render(inputs["task"], data) + + self.strat_kwargs = dict( + model="claude-3-5-sonnet-latest", + llm_client=AioLlmClient.create_aio_client(inputs), + template_data=dict(), + system_prompt_template=f"""\ +Please summarise the conversation given and provide the result in the structure that is asked of you. +""", + user_prompt_template=f"""\ +Please help me with this task: + +{task} +""", + agent_configs=[ + AgentConfig( + name="Assistant", + model="claude-3-7-sonnet-latest", + tool_set=dict(), + system_prompt="""\ +You are a assistant that is supposed to help me with a set of files. These files are commonly tabular formatted like csv, xls or xlsx. +If you find a tabular formatted file you should use the `in2csv_tool` tool to convert the files into CSV. + +After that is done, then run other tools to assist me. +""", + ) + ], + example_json=inputs.get("example_json"), + ) + + def run(self) -> dict: + kwargs = self.strat_kwargs + with tempfile.TemporaryDirectory() as tmpdir: + agent_config = next(iter(kwargs.get("agent_configs", [])), None) + agent_config.tool_set = dict( + find_text=FindTextTool(self.base_path), + file_view=FileViewTool(self.base_path), + in2csv_tool=In2CSVTool(self.base_path), + csvsql_tool=CSVSQLTool(self.base_path, tmpdir), + ) + agentic_strategy = AgenticStrategyV2(**kwargs) + result = agentic_strategy.execute(limit=10) + return {**result, **agentic_strategy.usage()} + + diff --git a/patchwork/steps/FileAgent/__init__.py b/patchwork/steps/FileAgent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/patchwork/steps/FileAgent/typed.py b/patchwork/steps/FileAgent/typed.py new file mode 100644 index 000000000..aa6e13033 --- /dev/null +++ b/patchwork/steps/FileAgent/typed.py @@ -0,0 +1,27 @@ +from typing_extensions import Annotated, Any, Dict, TypedDict + +from patchwork.common.utils.step_typing import StepTypeConfig + + +class __ReconcilationAgentRequiredInputs(TypedDict): + task: str + + +class FileAgentInputs(__ReconcilationAgentRequiredInputs, total=False): + base_path: str + prompt_value: Dict[str, Any] + max_llm_calls: Annotated[int, StepTypeConfig(is_config=True)] + openai_api_key: Annotated[ + str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "anthropic_api_key"]) + ] + anthropic_api_key: Annotated[ + str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "google_api_key", "openai_api_key"]) + ] + google_api_key: Annotated[ + str, StepTypeConfig(is_config=True, or_op=["patched_api_key", "openai_api_key", "anthropic_api_key"]) + ] + + +class FileAgentOutputs(TypedDict): + request_tokens: int + response_tokens: int diff --git a/patchwork/steps/GitHubAgent/typed.py b/patchwork/steps/GitHubAgent/typed.py index a80c8aa75..9533258e6 100644 --- a/patchwork/steps/GitHubAgent/typed.py +++ b/patchwork/steps/GitHubAgent/typed.py @@ -1,4 +1,4 @@ -from typing_extensions import Annotated, Any, Dict, List, TypedDict +from typing_extensions import Annotated, Any, Dict, TypedDict from patchwork.common.utils.step_typing import StepTypeConfig @@ -6,6 +6,7 @@ class __GitHubAgentRequiredInputs(TypedDict): task: str + class GitHubAgentInputs(__GitHubAgentRequiredInputs, total=False): base_path: str prompt_value: Dict[str, Any] @@ -22,7 +23,5 @@ class GitHubAgentInputs(__GitHubAgentRequiredInputs, total=False): class GitHubAgentOutputs(TypedDict): - conversation_history: List[Dict] - tool_records: List[Dict] request_tokens: int response_tokens: int diff --git a/patchwork/steps/__init__.py b/patchwork/steps/__init__.py index 513d4523e..bd1c82d3e 100644 --- a/patchwork/steps/__init__.py +++ b/patchwork/steps/__init__.py @@ -25,6 +25,7 @@ from patchwork.steps.ExtractPackageManagerFile.ExtractPackageManagerFile import ( ExtractPackageManagerFile, ) +from patchwork.steps.FileAgent.FileAgent import FileAgent from patchwork.steps.FilterBySimilarity.FilterBySimilarity import FilterBySimilarity from patchwork.steps.FixIssue.FixIssue import FixIssue from patchwork.steps.GetTypescriptTypeInfo.GetTypescriptTypeInfo import ( @@ -82,6 +83,7 @@ "ExtractPackageManagerFile", "FilterBySimilarity", "FixIssue", + "FileAgent", "LLM", "ModifyCode", "ModifyCodePB", diff --git a/poetry.lock b/poetry.lock index cd98f32b7..19e8d37f6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,84 @@ # This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. +[[package]] +name = "agate" +version = "1.13.0" +description = "A data analysis library that is optimized for humans instead of machines." +optional = false +python-versions = "*" +files = [ + {file = "agate-1.13.0-py2.py3-none-any.whl", hash = "sha256:0726e3883e157da2b6bb58cc40bd9b810a5df4a11dea3bcd3de0f422a3819294"}, + {file = "agate-1.13.0.tar.gz", hash = "sha256:24bc3d3cbd165aa3ab0ef9e798dd4c53ad703012d450fe89b9c26b239505c445"}, +] + +[package.dependencies] +Babel = ">=2.0" +isodate = ">=0.5.4" +leather = ">=0.3.2" +parsedatetime = ">=2.1,<2.5 || >2.5" +python-slugify = ">=1.2.1" +pytimeparse = ">=1.1.5" +tzdata = {version = ">=2023.3", markers = "platform_system == \"Windows\""} + +[package.extras] +test = ["PyICU (>=2.4.2)", "backports.zoneinfo", "coverage (>=3.7.1)", "cssselect (>=0.9.1)", "lxml (>=3.6.0)", "pytest", "pytest-cov"] + +[[package]] +name = "agate-dbf" +version = "0.2.3" +description = "agate-dbf adds read support for dbf files to agate." +optional = false +python-versions = "*" +files = [ + {file = "agate-dbf-0.2.3.tar.gz", hash = "sha256:98a2b53757136cc74dc297e59e2101d34f6d48f41f74156bb6c0de26bba2aa3f"}, + {file = "agate_dbf-0.2.3-py2.py3-none-any.whl", hash = "sha256:b36e5a8321f9c42f812750256cadd600f779a19e334a80fee6e032395c0b9aa0"}, +] + +[package.dependencies] +agate = ">=1.5.0" +dbfread = ">=2.0.5" + +[package.extras] +test = ["pytest", "pytest-cov"] + +[[package]] +name = "agate-excel" +version = "0.4.1" +description = "agate-excel adds read support for Excel files (xls and xlsx) to agate." +optional = false +python-versions = "*" +files = [ + {file = "agate-excel-0.4.1.tar.gz", hash = "sha256:28426618c90747111e6d566e983d838f1e2fae641ea6970d7acb0e9d4b384091"}, + {file = "agate_excel-0.4.1-py2.py3-none-any.whl", hash = "sha256:398f214cb24e3debcb6cc75f52dc558dee84d594f5f4b5e73997299c6c786158"}, +] + +[package.dependencies] +agate = ">=1.5.0" +olefile = "*" +openpyxl = ">=2.3.0" +xlrd = ">=0.9.4" + +[package.extras] +test = ["pytest", "pytest-cov"] + +[[package]] +name = "agate-sql" +version = "0.7.2" +description = "agate-sql adds SQL read/write support to agate." +optional = false +python-versions = "*" +files = [ + {file = "agate-sql-0.7.2.tar.gz", hash = "sha256:9b1b30284a573fd416759437273dcc5c81022bdf2facb24b4aa029a62afd53b0"}, + {file = "agate_sql-0.7.2-py2.py3-none-any.whl", hash = "sha256:be1cb9a99b3e4ec7f6106278dfb7b534be9629c8a983abb168c3effacc79dd10"}, +] + +[package.dependencies] +agate = ">=1.5.0" +sqlalchemy = ">=1.4" + +[package.extras] +test = ["crate", "geojson", "pytest", "pytest-cov"] + [[package]] name = "aiofiles" version = "24.1.0" @@ -13,13 +92,13 @@ files = [ [[package]] name = "aiohappyeyeballs" -version = "2.4.8" +version = "2.5.0" description = "Happy Eyeballs for asyncio" optional = false python-versions = ">=3.9" files = [ - {file = "aiohappyeyeballs-2.4.8-py3-none-any.whl", hash = "sha256:6cac4f5dd6e34a9644e69cf9021ef679e4394f54e58a183056d12009e42ea9e3"}, - {file = "aiohappyeyeballs-2.4.8.tar.gz", hash = "sha256:19728772cb12263077982d2f55453babd8bec6a052a926cd5c0c42796da8bf62"}, + {file = "aiohappyeyeballs-2.5.0-py3-none-any.whl", hash = "sha256:0850b580748c7071db98bffff6d4c94028d0d3035acc20fd721a0ce7e8cac35d"}, + {file = "aiohappyeyeballs-2.5.0.tar.gz", hash = "sha256:18fde6204a76deeabc97c48bdd01d5801cfda5d6b9c8bbeb1aaaee9d648ca191"}, ] [[package]] @@ -309,6 +388,20 @@ files = [ [package.dependencies] msrest = ">=0.7.1,<0.8.0" +[[package]] +name = "babel" +version = "2.17.0" +description = "Internationalization utilities" +optional = false +python-versions = ">=3.8" +files = [ + {file = "babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2"}, + {file = "babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d"}, +] + +[package.extras] +dev = ["backports.zoneinfo", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata"] + [[package]] name = "backoff" version = "2.2.1" @@ -877,6 +970,31 @@ ssh = ["bcrypt (>=3.1.5)"] test = ["certifi", "cryptography-vectors (==43.0.3)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] +[[package]] +name = "csvkit" +version = "2.1.0" +description = "A suite of command-line tools for working with CSV, the king of tabular file formats." +optional = false +python-versions = "*" +files = [ + {file = "csvkit-2.1.0-py2.py3-none-any.whl", hash = "sha256:3c0495a519be4c9d8a8ebdf852197a80fc324cddacbaa9fec82fc397d52bac9d"}, + {file = "csvkit-2.1.0.tar.gz", hash = "sha256:b91e8f5a485888c3c515b15cc2525ce4be5cfcd4f4766ead83113e787b5fd536"}, +] + +[package.dependencies] +agate = ">=1.12.0" +agate-dbf = ">=0.2.3" +agate-excel = ">=0.4.0" +agate-sql = ">=0.7.0" +importlib_metadata = {version = "*", markers = "python_version < \"3.10\""} +openpyxl = "*" +sqlalchemy = "*" +xlrd = "*" + +[package.extras] +test = ["coverage (>=4.4.2)", "pytest", "pytest-cov"] +zstandard = ["zstandard"] + [[package]] name = "cvss" version = "3.4" @@ -928,6 +1046,17 @@ tests = ["Pillow (>=6.2.1)", "Werkzeug (>=1.0.1)", "absl-py", "aiobotocore", "ap torch = ["torch"] vision = ["Pillow (>=6.2.1)"] +[[package]] +name = "dbfread" +version = "2.0.7" +description = "Read DBF Files with Python" +optional = false +python-versions = "*" +files = [ + {file = "dbfread-2.0.7-py2.py3-none-any.whl", hash = "sha256:f604def58c59694fa0160d7be5d0b8d594467278d2bb6a47d46daf7162c84cec"}, + {file = "dbfread-2.0.7.tar.gz", hash = "sha256:07c8a9af06ffad3f6f03e8fe91ad7d2733e31a26d2b72c4dd4cfbae07ee3b73d"}, +] + [[package]] name = "defusedxml" version = "0.7.1" @@ -1103,6 +1232,17 @@ filemagic = ["file-magic (>=0.4.0)"] regex = ["regex"] test = ["coverage", "pytest", "pytest-sugar"] +[[package]] +name = "et-xmlfile" +version = "2.0.0" +description = "An implementation of lxml.xmlfile for the standard library" +optional = false +python-versions = ">=3.8" +files = [ + {file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"}, + {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"}, +] + [[package]] name = "eval-type-backport" version = "0.2.2" @@ -1358,13 +1498,13 @@ files = [ [[package]] name = "fsspec" -version = "2025.2.0" +version = "2025.3.0" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2025.2.0-py3-none-any.whl", hash = "sha256:9de2ad9ce1f85e1931858535bc882543171d197001a0a5eb2ddc04f1781ab95b"}, - {file = "fsspec-2025.2.0.tar.gz", hash = "sha256:1c24b16eaa0a1798afa0337aa0db9b256718ab2a89c425371f5628d22c3b6afd"}, + {file = "fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3"}, + {file = "fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972"}, ] [package.dependencies] @@ -1541,16 +1681,17 @@ requests = ["requests (>=2.20.0,<3.0.0.dev0)"] [[package]] name = "google-genai" -version = "1.4.0" +version = "1.5.0" description = "GenAI Python SDK" optional = false python-versions = ">=3.9" files = [ - {file = "google_genai-1.4.0-py3-none-any.whl", hash = "sha256:e2d2943a2ebb17fd442d539f7719975af3de07db41e2c72a04b24be0df3dadd9"}, - {file = "google_genai-1.4.0.tar.gz", hash = "sha256:808eb5b73fc81d8da92b734b5ca24fc084ebf714a4c42cc42d7dcfa47b718a18"}, + {file = "google_genai-1.5.0-py3-none-any.whl", hash = "sha256:0ad433836a402957a967ccd57cbab7768325d28966a8556771974ae1c018be59"}, + {file = "google_genai-1.5.0.tar.gz", hash = "sha256:83fcfc4956ad32ecea1fda37d8f3f7cbadbdeebd2310f2a55bc7564a2f1d459f"}, ] [package.dependencies] +anyio = ">=4.8.0,<5.0.0dev" google-auth = ">=2.14.1,<3.0.0dev" httpx = ">=0.28.1,<1.0.0dev" pydantic = ">=2.0.0,<3.0.0dev" @@ -1560,13 +1701,13 @@ websockets = ">=13.0,<15.0dev" [[package]] name = "googleapis-common-protos" -version = "1.69.0" +version = "1.69.1" description = "Common protobufs used in Google APIs" optional = true python-versions = ">=3.7" files = [ - {file = "googleapis_common_protos-1.69.0-py2.py3-none-any.whl", hash = "sha256:17835fdc4fa8da1d61cfe2d4d5d57becf7c61d4112f8d81c67eaa9d7ce43042d"}, - {file = "googleapis_common_protos-1.69.0.tar.gz", hash = "sha256:5a46d58af72846f59009b9c4710425b9af2139555c71837081706b213b298187"}, + {file = "googleapis_common_protos-1.69.1-py2.py3-none-any.whl", hash = "sha256:4077f27a6900d5946ee5a369fab9c8ded4c0ef1c6e880458ea2f70c14f7b70d5"}, + {file = "googleapis_common_protos-1.69.1.tar.gz", hash = "sha256:e20d2d8dda87da6fe7340afbbdf4f0bcb4c8fae7e6cadf55926c31f946b0b9b1"}, ] [package.dependencies] @@ -1873,13 +2014,13 @@ files = [ [[package]] name = "huggingface-hub" -version = "0.29.1" +version = "0.29.2" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" files = [ - {file = "huggingface_hub-0.29.1-py3-none-any.whl", hash = "sha256:352f69caf16566c7b6de84b54a822f6238e17ddd8ae3da4f8f2272aea5b198d5"}, - {file = "huggingface_hub-0.29.1.tar.gz", hash = "sha256:9524eae42077b8ff4fc459ceb7a514eca1c1232b775276b009709fe2a084f250"}, + {file = "huggingface_hub-0.29.2-py3-none-any.whl", hash = "sha256:c56f20fca09ef19da84dcde2b76379ecdaddf390b083f59f166715584953307d"}, + {file = "huggingface_hub-0.29.2.tar.gz", hash = "sha256:590b29c0dcbd0ee4b7b023714dc1ad8563fe4a68a91463438b74e980d28afaf3"}, ] [package.dependencies] @@ -2073,13 +2214,13 @@ trio = ["trio"] [[package]] name = "jinja2" -version = "3.1.5" +version = "3.1.6" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" files = [ - {file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"}, - {file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"}, + {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, + {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, ] [package.dependencies] @@ -2308,13 +2449,13 @@ pydantic = ">=2.7.4,<3.0.0" [[package]] name = "langchain-core" -version = "0.3.41" +version = "0.3.43" description = "Building applications with LLMs through composability" optional = true python-versions = "<4.0,>=3.9" files = [ - {file = "langchain_core-0.3.41-py3-none-any.whl", hash = "sha256:1a27cca5333bae7597de4004fb634b5f3e71667a3da6493b94ce83bcf15a23bd"}, - {file = "langchain_core-0.3.41.tar.gz", hash = "sha256:d3ee9f3616ebbe7943470ade23d4a04e1729b1512c0ec55a4a07bd2ac64dedb4"}, + {file = "langchain_core-0.3.43-py3-none-any.whl", hash = "sha256:caa6bc1f4c6ab71d3c2e400f8b62e1cd6dc5ac2c37e03f12f3e2c60befd5b273"}, + {file = "langchain_core-0.3.43.tar.gz", hash = "sha256:bec60f4f5665b536434ff747b8f23375a812e82cfa529f519b54cc1e7a94a875"}, ] [package.dependencies] @@ -2379,13 +2520,13 @@ tiktoken = ">=0.7,<1" [[package]] name = "langsmith" -version = "0.3.11" +version = "0.3.13" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = true python-versions = "<4.0,>=3.9" files = [ - {file = "langsmith-0.3.11-py3-none-any.whl", hash = "sha256:0cca22737ef07d3b038a437c141deda37e00add56022582680188b681bec095e"}, - {file = "langsmith-0.3.11.tar.gz", hash = "sha256:ddf29d24352e99de79c9618aaf95679214324e146c5d3d9475a7ddd2870018b1"}, + {file = "langsmith-0.3.13-py3-none-any.whl", hash = "sha256:73aaf52bbc293b9415fff4f6dad68df40658081eb26c9cb2c7bd1ff57cedd695"}, + {file = "langsmith-0.3.13.tar.gz", hash = "sha256:14014058cff408772acb93344e03cb64174837292d5f1ae09b2c8c1d8df45e92"}, ] [package.dependencies] @@ -2404,6 +2545,20 @@ zstandard = ">=0.23.0,<0.24.0" langsmith-pyo3 = ["langsmith-pyo3 (>=0.1.0rc2,<0.2.0)"] pytest = ["pytest (>=7.0.0)", "rich (>=13.9.4,<14.0.0)"] +[[package]] +name = "leather" +version = "0.4.0" +description = "Python charting for 80% of humans." +optional = false +python-versions = "*" +files = [ + {file = "leather-0.4.0-py2.py3-none-any.whl", hash = "sha256:18290bc93749ae39039af5e31e871fcfad74d26c4c3ea28ea4f681f4571b3a2b"}, + {file = "leather-0.4.0.tar.gz", hash = "sha256:f964bec2086f3153a6c16e707f20cb718f811f57af116075f4c0f4805c608b95"}, +] + +[package.extras] +test = ["cssselect (>=0.9.1)", "lxml (>=3.6.0)", "pytest", "pytest-cov"] + [[package]] name = "libcst" version = "1.1.0" @@ -2454,13 +2609,13 @@ dev = ["Sphinx (>=5.1.1)", "black (==23.9.1)", "build (>=0.10.0)", "coverage (>= [[package]] name = "logfire-api" -version = "3.7.0" +version = "3.7.1" description = "Shim for the Logfire SDK which does nothing unless Logfire is installed" optional = false python-versions = ">=3.8" files = [ - {file = "logfire_api-3.7.0-py3-none-any.whl", hash = "sha256:9f3def5f808b2af6d4ed15124b636054d55f2d29aa8ae871a6b87dfe48420bd0"}, - {file = "logfire_api-3.7.0.tar.gz", hash = "sha256:72116b99086ac269cb6268cef71471b350bc9a34e519696cf812778f54217507"}, + {file = "logfire_api-3.7.1-py3-none-any.whl", hash = "sha256:10112c1956a173bb277558a9b17e14f60b4cc320dceb5b6f5b6ae5018af97125"}, + {file = "logfire_api-3.7.1.tar.gz", hash = "sha256:59479437a2ab197282f8a05eaedd29068cdee965bdf367a3e0a13d034c7c67a1"}, ] [[package]] @@ -2585,13 +2740,13 @@ files = [ [[package]] name = "mistralai" -version = "1.5.0" +version = "1.5.1" description = "Python Client SDK for the Mistral AI API." optional = false python-versions = ">=3.8" files = [ - {file = "mistralai-1.5.0-py3-none-any.whl", hash = "sha256:9372537719f87bd6f9feef4747d0bf1f4fbe971f8c02945ca4b4bf3c94571c97"}, - {file = "mistralai-1.5.0.tar.gz", hash = "sha256:fd94bc93bc25aad9c6dd8005b1a0bc4ba1250c6b3fbf855a49936989cc6e5c0d"}, + {file = "mistralai-1.5.1-py3-none-any.whl", hash = "sha256:881f8a1b9a7966d15bd1eb4ed05df09483c261f826c1b9d153ceeca605dc79ac"}, + {file = "mistralai-1.5.1.tar.gz", hash = "sha256:ce4b8c7aa587521c46dbc45d42e27575f6197c0229f47242e5b331875104707b"}, ] [package.dependencies] @@ -3031,6 +3186,20 @@ rsa = ["cryptography (>=3.0.0)"] signals = ["blinker (>=1.4.0)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] +[[package]] +name = "olefile" +version = "0.47" +description = "Python package to parse, read and write Microsoft OLE2 files (Structured Storage or Compound Document, Microsoft Office)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f"}, + {file = "olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c"}, +] + +[package.extras] +tests = ["pytest", "pytest-cov"] + [[package]] name = "ollama" version = "0.4.7" @@ -3048,13 +3217,13 @@ pydantic = ">=2.9.0,<3.0.0" [[package]] name = "openai" -version = "1.65.3" +version = "1.65.5" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" files = [ - {file = "openai-1.65.3-py3-none-any.whl", hash = "sha256:a155fa5d60eccda516384d3d60d923e083909cc126f383fe4a350f79185c232a"}, - {file = "openai-1.65.3.tar.gz", hash = "sha256:9b7cd8f79140d03d77f4ed8aeec6009be5dcd79bbc02f03b0e8cd83356004f71"}, + {file = "openai-1.65.5-py3-none-any.whl", hash = "sha256:5948a504e7b4003d921cfab81273813793a31c25b1d7b605797c01757e0141f1"}, + {file = "openai-1.65.5.tar.gz", hash = "sha256:17d39096bbcaf6c86580244b493a59e16613460147f0ba5ab6e608cdb6628149"}, ] [package.dependencies] @@ -3071,6 +3240,20 @@ typing-extensions = ">=4.11,<5" datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] realtime = ["websockets (>=13,<15)"] +[[package]] +name = "openpyxl" +version = "3.1.5" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"}, + {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"}, +] + +[package.dependencies] +et-xmlfile = "*" + [[package]] name = "opentelemetry-api" version = "1.30.0" @@ -3336,6 +3519,17 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.9.2)"] +[[package]] +name = "parsedatetime" +version = "2.6" +description = "Parse human-readable date/time text." +optional = false +python-versions = "*" +files = [ + {file = "parsedatetime-2.6-py3-none-any.whl", hash = "sha256:cb96edd7016872f58479e35879294258c71437195760746faffedb692aef000b"}, + {file = "parsedatetime-2.6.tar.gz", hash = "sha256:4cb368fbb18a0b7231f4d76119165451c8d2e35951455dfee97c62a87b04d455"}, +] + [[package]] name = "pastel" version = "0.2.1" @@ -3664,13 +3858,13 @@ poetry-core = ">=1.7.0,<3.0.0" [[package]] name = "posthog" -version = "3.18.1" +version = "3.19.0" description = "Integrate PostHog into any python application." optional = true python-versions = "*" files = [ - {file = "posthog-3.18.1-py2.py3-none-any.whl", hash = "sha256:6865104b7cf3a5b13949e2bc2aab9b37b5fbf5f9e045fa55b9eabe21b3850200"}, - {file = "posthog-3.18.1.tar.gz", hash = "sha256:ce115b8422f26c57cd4143499115b741f5683c93d0b5b87bab391579aaef084b"}, + {file = "posthog-3.19.0-py2.py3-none-any.whl", hash = "sha256:c294bc0a939e21ecf88d625496f8073cc566c28ec2a917a47d5d32ba33e90a7f"}, + {file = "posthog-3.19.0.tar.gz", hash = "sha256:7fe5c9e494fc2cca9baa2bd8074c0844d572df46a54378101bc20eec2776027e"}, ] [package.dependencies] @@ -3869,13 +4063,13 @@ files = [ [[package]] name = "publicsuffixlist" -version = "1.0.2.20250302" +version = "1.0.2.20250307" description = "publicsuffixlist implement" optional = false python-versions = ">=3.5" files = [ - {file = "publicsuffixlist-1.0.2.20250302-py2.py3-none-any.whl", hash = "sha256:92fa2e194d6eb08a64a3d980b4ed75bf1c4ab112e08053fe142c6c1d202b8cec"}, - {file = "publicsuffixlist-1.0.2.20250302.tar.gz", hash = "sha256:d8dba9f597ddba22bbbce4c6a4c1978890ee16cc80d6de5370beeaa42bf35dca"}, + {file = "publicsuffixlist-1.0.2.20250307-py2.py3-none-any.whl", hash = "sha256:15d6dda9e2cb573cd6367a55532c483831349dde46eae687d89e8ba466da78b0"}, + {file = "publicsuffixlist-1.0.2.20250307.tar.gz", hash = "sha256:2e00bc29b04279a842237583a3b0e47832d59f5ff9ddbcbf1052cd9c8e0162c4"}, ] [package.extras] @@ -4420,6 +4614,34 @@ files = [ {file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"}, ] +[[package]] +name = "python-slugify" +version = "8.0.4" +description = "A Python slugify application that also handles Unicode" +optional = false +python-versions = ">=3.7" +files = [ + {file = "python-slugify-8.0.4.tar.gz", hash = "sha256:59202371d1d05b54a9e7720c5e038f928f45daaffe41dd10822f3907b937c856"}, + {file = "python_slugify-8.0.4-py2.py3-none-any.whl", hash = "sha256:276540b79961052b66b7d116620b36518847f52d5fd9e3a70164fc8c50faa6b8"}, +] + +[package.dependencies] +text-unidecode = ">=1.3" + +[package.extras] +unidecode = ["Unidecode (>=1.1.1)"] + +[[package]] +name = "pytimeparse" +version = "1.1.8" +description = "Time expression parser" +optional = false +python-versions = "*" +files = [ + {file = "pytimeparse-1.1.8-py2.py3-none-any.whl", hash = "sha256:04b7be6cc8bd9f5647a6325444926c3ac34ee6bc7e69da4367ba282f076036bd"}, + {file = "pytimeparse-1.1.8.tar.gz", hash = "sha256:e86136477be924d7e670646a98561957e8ca7308d44841e21f5ddea757556a0a"}, +] + [[package]] name = "pytz" version = "2025.1" @@ -5260,13 +5482,13 @@ files = [ [[package]] name = "setuptools" -version = "75.8.2" +version = "76.0.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.9" files = [ - {file = "setuptools-75.8.2-py3-none-any.whl", hash = "sha256:558e47c15f1811c1fa7adbd0096669bf76c1d3f433f58324df69f3f5ecac4e8f"}, - {file = "setuptools-75.8.2.tar.gz", hash = "sha256:4880473a969e5f23f2a2be3646b2dfd84af9028716d398e46192f84bc36900d2"}, + {file = "setuptools-76.0.0-py3-none-any.whl", hash = "sha256:199466a166ff664970d0ee145839f5582cb9bca7a0a3a2e795b6a9cb2308e9c6"}, + {file = "setuptools-76.0.0.tar.gz", hash = "sha256:43b4ee60e10b0d0ee98ad11918e114c70701bc6051662a9a675a0496c1a158f4"}, ] [package.extras] @@ -5486,6 +5708,17 @@ files = [ doc = ["reno", "sphinx"] test = ["pytest", "tornado (>=4.5)", "typeguard"] +[[package]] +name = "text-unidecode" +version = "1.3" +description = "The most basic Text::Unidecode port" +optional = false +python-versions = "*" +files = [ + {file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"}, + {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"}, +] + [[package]] name = "threadpoolctl" version = "3.5.0" @@ -5827,13 +6060,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "virtualenv" -version = "20.29.2" +version = "20.29.3" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" files = [ - {file = "virtualenv-20.29.2-py3-none-any.whl", hash = "sha256:febddfc3d1ea571bdb1dc0f98d7b45d24def7428214d4fb73cc486c9568cce6a"}, - {file = "virtualenv-20.29.2.tar.gz", hash = "sha256:fdaabebf6d03b5ba83ae0a02cfe96f48a716f4fae556461d180825866f75b728"}, + {file = "virtualenv-20.29.3-py3-none-any.whl", hash = "sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170"}, + {file = "virtualenv-20.29.3.tar.gz", hash = "sha256:95e39403fcf3940ac45bc717597dba16110b74506131845d9b687d5e73d947ac"}, ] [package.dependencies] @@ -6152,6 +6385,22 @@ cffi = ">=1.16.0" [package.extras] test = ["pytest"] +[[package]] +name = "xlrd" +version = "2.0.1" +description = "Library for developers to extract data from Microsoft Excel (tm) .xls spreadsheet files" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "xlrd-2.0.1-py2.py3-none-any.whl", hash = "sha256:6a33ee89877bd9abc1158129f6e94be74e2679636b8a205b43b85206c3f0bbdd"}, + {file = "xlrd-2.0.1.tar.gz", hash = "sha256:f72f148f54442c6b056bf931dbc34f986fd0c3b0b6b5a58d013c9aef274d0c88"}, +] + +[package.extras] +build = ["twine", "wheel"] +docs = ["sphinx"] +test = ["pytest", "pytest-cov"] + [[package]] name = "xxhash" version = "3.5.0" @@ -6520,4 +6769,4 @@ security = ["owasp-depscan", "semgrep"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "0b688301339718225826ec90e93d366b17af26054c910580c6f0ac6970e0f457" +content-hash = "f0fb503850ca8c55b9109bfb65b8881363d4ab92171db2796553a6bdd3764889" diff --git a/pyproject.toml b/pyproject.toml index 00e5c0ee3..c467389be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "patchwork-cli" -version = "0.0.109" +version = "0.0.110" description = "" authors = ["patched.codes"] license = "AGPL" @@ -49,6 +49,7 @@ rich = "~13.7.1" chevron = "~0.14.0" giturlparse = "~0.12.0" eml_parser = "^2.0.0" +csvkit = "^2.1.0" python-magic = "^0.4.27" scikit-learn = "^1.3.2" json-repair = "~0.30.0" From 93c9d637f0c4d7a8f958653c53b434330fafbe15 Mon Sep 17 00:00:00 2001 From: TIANYOU CHEN <42710806+CTY-git@users.noreply.github.com> Date: Mon, 10 Mar 2025 09:59:38 +0800 Subject: [PATCH 02/11] retrigger ci --- patchwork/steps/FileAgent/FileAgent.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/patchwork/steps/FileAgent/FileAgent.py b/patchwork/steps/FileAgent/FileAgent.py index 27291b939..043e6ccb9 100644 --- a/patchwork/steps/FileAgent/FileAgent.py +++ b/patchwork/steps/FileAgent/FileAgent.py @@ -61,5 +61,3 @@ def run(self) -> dict: agentic_strategy = AgenticStrategyV2(**kwargs) result = agentic_strategy.execute(limit=10) return {**result, **agentic_strategy.usage()} - - From 8c2dc8b160d9ad13bdaf14f51ba1fa544ed13a51 Mon Sep 17 00:00:00 2001 From: TIANYOU CHEN <42710806+CTY-git@users.noreply.github.com> Date: Mon, 10 Mar 2025 10:03:06 +0800 Subject: [PATCH 03/11] bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c467389be..bbc2ea921 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "patchwork-cli" -version = "0.0.110" +version = "0.0.111" description = "" authors = ["patched.codes"] license = "AGPL" From b71858a45534fceb23fc393e283b2613883e4f31 Mon Sep 17 00:00:00 2001 From: TIANYOU CHEN <42710806+CTY-git@users.noreply.github.com> Date: Mon, 10 Mar 2025 10:07:28 +0800 Subject: [PATCH 04/11] lint --- .../TestExtractPackageManagerFile.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/patchwork/steps/ExtractPackageManagerFile/TestExtractPackageManagerFile.py b/patchwork/steps/ExtractPackageManagerFile/TestExtractPackageManagerFile.py index c7bb136db..894b5ec2e 100644 --- a/patchwork/steps/ExtractPackageManagerFile/TestExtractPackageManagerFile.py +++ b/patchwork/steps/ExtractPackageManagerFile/TestExtractPackageManagerFile.py @@ -2,7 +2,9 @@ import unittest from pathlib import Path -from patchwork.steps.ExtractPackageManagerFile.ExtractPackageManagerFile import ExtractPackageManagerFile +from patchwork.steps.ExtractPackageManagerFile.ExtractPackageManagerFile import ( + ExtractPackageManagerFile, +) class TestExtractPackageManagerFile(unittest.TestCase): From 85902d31adce23658bb3d4e4725d8568aee6ff19 Mon Sep 17 00:00:00 2001 From: TIANYOU CHEN <42710806+CTY-git@users.noreply.github.com> Date: Mon, 10 Mar 2025 10:10:44 +0800 Subject: [PATCH 05/11] try all extras --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4ef31ac6c..b35b6aac4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -96,7 +96,7 @@ jobs: - name: Install dependencies if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction --only main --extras security + run: poetry install --no-interaction --all-extras - name: AutoFix Patchwork run: | @@ -151,7 +151,7 @@ jobs: - name: Install dependencies if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction --only main + run: poetry install --no-interaction --all-extras - name: PR Review run: | From 7c0b9fd0ec7f6431d3fb9da49557670a9aee42c8 Mon Sep 17 00:00:00 2001 From: "patched.codes[bot]" <298395+patched.codes[bot]@users.noreply.github.com> Date: Mon, 10 Mar 2025 02:13:53 +0000 Subject: [PATCH 06/11] Patched patchwork/steps/CallShell/CallShell.py --- patchwork/steps/CallShell/CallShell.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/patchwork/steps/CallShell/CallShell.py b/patchwork/steps/CallShell/CallShell.py index 98ee55a74..4228c2b21 100644 --- a/patchwork/steps/CallShell/CallShell.py +++ b/patchwork/steps/CallShell/CallShell.py @@ -46,7 +46,8 @@ def __parse_env_text(env_text: str) -> dict[str, str]: return env def run(self) -> dict: - p = subprocess.run(self.script, shell=True, capture_output=True, text=True, cwd=self.working_dir, env=self.env) + command_list = shlex.split(self.script) + p = subprocess.run(command_list, shell=False, capture_output=True, text=True, cwd=self.working_dir, env=self.env) try: p.check_returncode() except subprocess.CalledProcessError as e: @@ -57,3 +58,4 @@ def run(self) -> dict: logger.info(f"stdout: \n{p.stdout}") logger.info(f"stderr:\n{p.stderr}") return dict(stdout_output=p.stdout, stderr_output=p.stderr) + From bed2661cddf1399bec546857d271721237313799 Mon Sep 17 00:00:00 2001 From: "patched.codes[bot]" <298395+patched.codes[bot]@users.noreply.github.com> Date: Mon, 10 Mar 2025 02:13:53 +0000 Subject: [PATCH 07/11] Patched patchwork/common/utils/dependency.py --- patchwork/common/utils/dependency.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/patchwork/common/utils/dependency.py b/patchwork/common/utils/dependency.py index 27b89bfed..541d34ff3 100644 --- a/patchwork/common/utils/dependency.py +++ b/patchwork/common/utils/dependency.py @@ -9,6 +9,9 @@ @lru_cache(maxsize=None) def import_with_dependency_group(name): + if not any(name in dependencies for dependencies in __DEPENDENCY_GROUPS.values()): + raise ImportError(f"Unauthorized import attempt for {name}") + try: return importlib.import_module(name) except ImportError: From 34a951c7142b47e8b4e071b7a94c451b14f1efc8 Mon Sep 17 00:00:00 2001 From: "patched.codes[bot]" <298395+patched.codes[bot]@users.noreply.github.com> Date: Mon, 10 Mar 2025 02:13:53 +0000 Subject: [PATCH 08/11] Patched patchwork/common/utils/step_typing.py --- patchwork/common/utils/step_typing.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/patchwork/common/utils/step_typing.py b/patchwork/common/utils/step_typing.py index d349f7fc1..8a3a134de 100644 --- a/patchwork/common/utils/step_typing.py +++ b/patchwork/common/utils/step_typing.py @@ -108,6 +108,13 @@ def validate_step_type_config_with_inputs( def validate_step_with_inputs(input_keys: Set[str], step: Type[Step]) -> Tuple[Set[str], Dict[str, str]]: module_path, _, _ = step.__module__.rpartition(".") step_name = step.__name__ + + # Whitelist of trusted module paths + trusted_modules = {"valid_module1.typed", "valid_module2.typed"} + + if f"{module_path}.typed" not in trusted_modules: + raise ValueError(f"Untrusted module import attempt: {module_path}.typed") + type_module = importlib.import_module(f"{module_path}.typed") step_input_model = getattr(type_module, f"{step_name}Inputs", __NOT_GIVEN) step_output_model = getattr(type_module, f"{step_name}Outputs", __NOT_GIVEN) From a62d3a65c8dd92d67937d562c41cbaa7c35029e1 Mon Sep 17 00:00:00 2001 From: "patched.codes[bot]" <298395+patched.codes[bot]@users.noreply.github.com> Date: Mon, 10 Mar 2025 02:13:53 +0000 Subject: [PATCH 09/11] Patched patchwork/common/tools/bash_tool.py --- patchwork/common/tools/bash_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patchwork/common/tools/bash_tool.py b/patchwork/common/tools/bash_tool.py index 8440f179a..143480098 100644 --- a/patchwork/common/tools/bash_tool.py +++ b/patchwork/common/tools/bash_tool.py @@ -45,7 +45,7 @@ def execute( try: result = subprocess.run( - command, shell=True, cwd=self.path, capture_output=True, text=True, timeout=60 # Add timeout for safety + command.split(), cwd=self.path, capture_output=True, text=True, timeout=60 # Add timeout for safety ) return result.stdout if result.returncode == 0 else f"Error: {result.stderr}" except subprocess.TimeoutExpired: From 7c7cb4f50075a5be6d9aed7af76f86ece85c2157 Mon Sep 17 00:00:00 2001 From: "patched.codes[bot]" <298395+patched.codes[bot]@users.noreply.github.com> Date: Mon, 10 Mar 2025 02:13:53 +0000 Subject: [PATCH 10/11] Patched patchwork/common/tools/csvkit_tool.py --- patchwork/common/tools/csvkit_tool.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/patchwork/common/tools/csvkit_tool.py b/patchwork/common/tools/csvkit_tool.py index a1ef8dc59..a3a3a78aa 100644 --- a/patchwork/common/tools/csvkit_tool.py +++ b/patchwork/common/tools/csvkit_tool.py @@ -118,9 +118,8 @@ def execute(self, files: list[str], query: str) -> str: if db_path.is_file(): with sqlite3.connect(str(db_path)) as conn: for file in files: - res = conn.execute( - f"SELECT 1 from {file.removesuffix('.csv')}", - ) + table_name = file.removesuffix('.csv') + res = conn.execute("SELECT 1 from ?", (table_name,)) if res.fetchone() is None: files_to_insert.append(file) else: @@ -140,3 +139,4 @@ def execute(self, files: list[str], query: str) -> str: if len(rv) > 5000: return rv[:5000] + "" return rv + From 87b58416cc9e1aa9d3f0f668abb6505a78791bb0 Mon Sep 17 00:00:00 2001 From: "patched.codes[bot]" <298395+patched.codes[bot]@users.noreply.github.com> Date: Mon, 10 Mar 2025 02:13:53 +0000 Subject: [PATCH 11/11] Patched patchwork/app.py --- patchwork/app.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/patchwork/app.py b/patchwork/app.py index 4e33609e7..6e6a977bc 100644 --- a/patchwork/app.py +++ b/patchwork/app.py @@ -59,6 +59,7 @@ def list_option_callback(ctx: click.Context, param: click.Parameter, value: str def find_patchflow(possible_module_paths: Iterable[str], patchflow: str) -> Any | None: + allowed_modules = {'trusted.module1', 'trusted.module2', 'trusted.module3'} for module_path in possible_module_paths: try: spec = importlib.util.spec_from_file_location("custom_module", module_path) @@ -72,9 +73,12 @@ def find_patchflow(possible_module_paths: Iterable[str], patchflow: str) -> Any logger.debug(f"Patchflow {patchflow} not found as a file/directory in {module_path}") try: - module = importlib.import_module(module_path) - logger.info(f"Patchflow {patchflow} loaded from {module_path}") - return getattr(module, patchflow) + if module_path in allowed_modules: + module = importlib.import_module(module_path) + logger.info(f"Patchflow {patchflow} loaded from {module_path}") + return getattr(module, patchflow) + else: + logger.debug(f"Module {module_path} not in allowed list") except ModuleNotFoundError: logger.debug(f"Patchflow {patchflow} not found as a module in {module_path}") except AttributeError: