Skip to content

Commit

Permalink
stricter access to builting in pandas query engine (#12278)
Browse files Browse the repository at this point in the history
  • Loading branch information
logan-markewich authored Mar 26, 2024
1 parent 6882171 commit 2c92e88
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 10 deletions.
32 changes: 25 additions & 7 deletions llama-index-core/llama_index/core/exec_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,16 @@ def _restricted_import(
"float": float,
"format": format,
"frozenset": frozenset,
"getattr": getattr,
"hasattr": hasattr,
"hash": hash,
"hex": hex,
"int": int,
"isinstance": isinstance,
"issubclass": issubclass,
"iter": iter,
"len": len,
"list": list,
"map": map,
"max": max,
"min": min,
"next": next,
"oct": oct,
"ord": ord,
"pow": pow,
Expand All @@ -68,7 +64,6 @@ def _restricted_import(
"reversed": reversed,
"round": round,
"set": set,
"setattr": setattr,
"slice": slice,
"sorted": sorted,
"str": str,
Expand All @@ -94,23 +89,45 @@ def _get_restricted_globals(__globals: Union[dict, None]) -> Any:
class DunderVisitor(ast.NodeVisitor):
def __init__(self) -> None:
self.has_access_to_private_entity = False
self.has_access_to_disallowed_builtin = False

builtins = globals()["__builtins__"].keys()
self._builtins = builtins

def visit_Name(self, node: ast.Name) -> None:
if node.id.startswith("_"):
self.has_access_to_private_entity = True
if node.id not in ALLOWED_BUILTINS and node.id in self._builtins:
self.has_access_to_disallowed_builtin = True
self.generic_visit(node)

def visit_Attribute(self, node: ast.Attribute) -> None:
if node.attr.startswith("_"):
self.has_access_to_private_entity = True
if node.attr not in ALLOWED_BUILTINS and node.attr in self._builtins:
self.has_access_to_disallowed_builtin = True
self.generic_visit(node)


def _contains_protected_access(code: str) -> bool:
# do not allow imports
imports_modules = False
tree = ast.parse(code)
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.Import):
imports_modules = True
elif isinstance(node, ast.ImportFrom):
imports_modules = True
else:
continue

dunder_visitor = DunderVisitor()
dunder_visitor.visit(tree)
return dunder_visitor.has_access_to_private_entity
return (
dunder_visitor.has_access_to_private_entity
or dunder_visitor.has_access_to_disallowed_builtin
or imports_modules
)


def _verify_source_safety(__source: Union[str, bytes, CodeType]) -> None:
Expand All @@ -124,7 +141,8 @@ def _verify_source_safety(__source: Union[str, bytes, CodeType]) -> None:
__source = __source.decode()
if _contains_protected_access(__source):
raise RuntimeError(
"Execution of code containing references to private or dunder methods is forbidden!"
"Execution of code containing references to private or dunder methods, "
"disallowed builtins, or any imports, is forbidden!"
)


Expand Down
5 changes: 2 additions & 3 deletions llama-index-core/tests/query_engine/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,7 @@ def test_default_output_processor_rce2() -> None:
output = parser.parse(injected_code)

assert (
"Execution of code containing references to private or dunder methods is forbidden!"
in output
"Execution of code containing references to private or dunder methods" in output
), "Injected code executed successfully!"


Expand Down Expand Up @@ -152,7 +151,7 @@ def test_default_output_processor_e2e(tmp_path: Path) -> None:
assert isinstance(response, Response)
# raw df should be equal to slice of dataframe that's just population at location 2
rmetadata = cast(Dict[str, Any], response.metadata)
assert rmetadata["raw_pandas_output"] == str(df["population"].iloc[2:3])
assert rmetadata["raw_pandas_output"] == str(df["population"].iloc[2])

# attack 1: fail!
print("[+] Attack 1 starts, it should fail!")
Expand Down

0 comments on commit 2c92e88

Please sign in to comment.