From 2e2c87282f155ee40e44d5e1b8e7a50b98630ad5 Mon Sep 17 00:00:00 2001 From: Yiheng Tao Date: Thu, 24 Jul 2025 08:56:25 -0700 Subject: [PATCH] feat: Introduce PyEval functionality for secure Python expression evaluation - Added a new endpoint for evaluating Python expressions via the PyEval API. - Implemented a dedicated page in the web interface for users to input and evaluate expressions safely. - Integrated the RestrictedPythonEvaluator to ensure secure execution of user-provided code. - Updated the main application routes and templates to include navigation to the new PyEval feature. - Enhanced the evaluator with additional safe built-in functions and improved error handling. This update significantly expands the capabilities of the application, allowing users to safely evaluate Python expressions in a controlled environment. --- server/api/__init__.py | 4 +- server/api/pyeval.py | 141 ++++++++++++ server/main.py | 6 + server/templates/base.html | 1 + server/templates/pyeval.html | 433 +++++++++++++++++++++++++++++++++++ utils/pyeval/evaluator.py | 40 +++- 6 files changed, 623 insertions(+), 2 deletions(-) create mode 100644 server/api/pyeval.py create mode 100644 server/templates/pyeval.html diff --git a/server/api/__init__.py b/server/api/__init__.py index cc90ad5..b2df8f3 100644 --- a/server/api/__init__.py +++ b/server/api/__init__.py @@ -67,6 +67,8 @@ api_export_dataframe, ) +from .pyeval import pyeval_routes + # Aggregate all routes into a single list api_routes = [ # Knowledge management endpoints @@ -127,4 +129,4 @@ Route("/api/dataframes/{df_id}/summary", endpoint=api_get_dataframe_summary, methods=["GET"]), Route("/api/dataframes/{df_id}/execute", endpoint=api_execute_dataframe_operation, methods=["POST"]), Route("/api/dataframes/{df_id}/export", endpoint=api_export_dataframe, methods=["POST"]), -] +] + pyeval_routes diff --git a/server/api/pyeval.py b/server/api/pyeval.py new file mode 100644 index 0000000..71053c6 --- /dev/null +++ b/server/api/pyeval.py @@ -0,0 +1,141 @@ +"""API endpoints for Python expression evaluation using the pyeval utility.""" + +import logging +from typing import Any, Dict, Optional + +from starlette.requests import Request +from starlette.responses import JSONResponse +from starlette.routing import Route + +from utils.pyeval.evaluator import RestrictedPythonEvaluator, EvaluationError + +logger = logging.getLogger(__name__) + + +async def evaluate_expression(request: Request) -> JSONResponse: + """Evaluate a Python expression using the RestrictedPythonEvaluator. + + POST /api/pyeval/evaluate + + Request body: + { + "expression": "python_expression_to_evaluate", + "context": { + "variable_name": "variable_value", + ... + } + } + + Response: + { + "success": true/false, + "result": "evaluation_result" | null, + "execution_time_ms": 123.45, + "error_message": "error_description" | null + } + """ + try: + # Parse request body + body = await request.json() + expression = body.get("expression", "").strip() + context = body.get("context", {}) + + # Validate input + if not expression: + return JSONResponse({ + "success": False, + "result": None, + "execution_time_ms": 0.0, + "error_message": "Expression cannot be empty" + }, status_code=400) + + if not isinstance(context, dict): + return JSONResponse({ + "success": False, + "result": None, + "execution_time_ms": 0.0, + "error_message": "Context must be a dictionary" + }, status_code=400) + + logger.info(f"Evaluating expression: {expression[:100]}{'...' if len(expression) > 100 else ''}") + logger.debug(f"Context variables: {list(context.keys())}") + + # Create evaluator and evaluate expression + evaluator = RestrictedPythonEvaluator() + result = evaluator.evaluate_expression(expression, context) + + # Format result for JSON response + if result.success: + # Convert result to string representation for JSON serialization + result_str = _format_result_for_json(result.result) + logger.info(f"Expression evaluated successfully in {result.execution_time_ms:.2f}ms") + + return JSONResponse({ + "success": True, + "result": result_str, + "execution_time_ms": result.execution_time_ms, + "error_message": None + }) + else: + logger.warning(f"Expression evaluation failed: {result.error_message}") + return JSONResponse({ + "success": False, + "result": None, + "execution_time_ms": result.execution_time_ms, + "error_message": result.error_message + }) + + except Exception as e: + logger.error(f"Error in evaluate_expression endpoint: {e}", exc_info=True) + return JSONResponse({ + "success": False, + "result": None, + "execution_time_ms": 0.0, + "error_message": f"Server error: {str(e)}" + }, status_code=500) + + +def _format_result_for_json(result: Any) -> str: + """Format evaluation result for JSON serialization. + + Args: + result: The result from expression evaluation + + Returns: + String representation of the result suitable for JSON response + """ + try: + # Handle pandas DataFrames specially + if hasattr(result, 'to_string'): + # This covers pandas DataFrames and Series + return result.to_string() + + # Handle numpy arrays + elif hasattr(result, 'tolist'): + return str(result) + + # Handle other iterables (but not strings) + elif hasattr(result, '__iter__') and not isinstance(result, (str, bytes)): + # Convert to string representation, but limit length for very large iterables + str_result = str(result) + if len(str_result) > 10000: + return str_result[:10000] + "... (truncated)" + return str_result + + # Handle basic types + else: + str_result = str(result) + # Limit very long string results + if len(str_result) > 10000: + return str_result[:10000] + "... (truncated)" + return str_result + + except Exception as e: + logger.warning(f"Error formatting result for JSON: {e}") + return f"" + + +# API routes for pyeval functionality +pyeval_routes = [ + Route("/api/pyeval/evaluate", endpoint=evaluate_expression, methods=["POST"]), +] diff --git a/server/main.py b/server/main.py index 05b81da..1a90de9 100644 --- a/server/main.py +++ b/server/main.py @@ -391,6 +391,11 @@ async def dataframe_detail_page(request: Request): "dataframe_detail.html", {"request": request, "current_page": "dataframes", "df_id": df_id} ) +async def pyeval_page(request: Request): + return templates.TemplateResponse( + "pyeval.html", {"request": request, "current_page": "pyeval"} + ) + # --- Add routes --- routes = [ @@ -402,6 +407,7 @@ async def dataframe_detail_page(request: Request): Route("/visualizations", endpoint=visualizations_page, methods=["GET"]), Route("/dataframes", endpoint=dataframes_page, methods=["GET"]), Route("/dataframes/{df_id}", endpoint=dataframe_detail_page, methods=["GET"]), + Route("/pyeval", endpoint=pyeval_page, methods=["GET"]), Route("/config", endpoint=config_page, methods=["GET"]), Route("/sse", endpoint=handle_sse), Mount("/messages/", app=sse.handle_post_message), diff --git a/server/templates/base.html b/server/templates/base.html index 4a626ee..40a2740 100644 --- a/server/templates/base.html +++ b/server/templates/base.html @@ -370,6 +370,7 @@

MCP Knowledge Server

Knowledge Background Jobs DataFrames + PyEval Tools Tool History diff --git a/server/templates/pyeval.html b/server/templates/pyeval.html new file mode 100644 index 0000000..bcc7b2d --- /dev/null +++ b/server/templates/pyeval.html @@ -0,0 +1,433 @@ +{% extends "base.html" %} + +{% block title %}PyEval - MCP Knowledge Server{% endblock %} + +{% block extra_styles %} + + + +{% endblock %} + +{% block content %} +
+
+

Python Expression Evaluator

+

Safely evaluate Python expressions using RestrictedPython. Perfect for DataFrame operations and data analysis.

+
+ +
+

Examples

+
+
Basic Operations
+
DataFrame Operations
+
Mathematical
+
+ +
+

Basic Python Operations

+

Simple arithmetic and list operations:

+
# Basic arithmetic +sum([1, 2, 3, 4, 5]) + +# List comprehension +[x**2 for x in range(5)] + +# String operations +"hello world".title()
+ +
+ +
+

DataFrame Operations

+

Working with pandas DataFrames:

+
# DataFrame operations (requires DataFrame in context) +df.head(10) + +# Column statistics +df['column_name'].describe() + +# Filtering +df[df['age'] > 25]
+

Context needed:

+
{"df": "your_dataframe_variable", "pd": "pandas"}
+ +
+ +
+

Mathematical Operations

+

Advanced mathematical computations:

+
# Statistical operations (statistics module is available) +statistics.mean([1, 2, 3, 4, 5]) + +# Math operations +math.sqrt(16) + math.sin(math.pi / 2) + +# Complex calculations with generators +sum(x**2 for x in range(10) if x % 2 == 0)
+ +
+
+ +
+
+

Python Expression

+ + +
+

Context Variables (JSON)

+ +
+ +
+ + +
+
+
+ +
+

Result

+ +
Ready to evaluate your Python expression...
+
+
+
+{% endblock %} + +{% block extra_scripts %} + + + +{% endblock %} diff --git a/utils/pyeval/evaluator.py b/utils/pyeval/evaluator.py index deeb406..d67f8c9 100644 --- a/utils/pyeval/evaluator.py +++ b/utils/pyeval/evaluator.py @@ -12,7 +12,7 @@ import pandas as pd from RestrictedPython import compile_restricted_exec, safe_globals, limited_builtins -from RestrictedPython.Guards import safer_getattr, guarded_setattr +from RestrictedPython.Guards import safer_getattr, guarded_setattr, guarded_iter_unpack_sequence logger = logging.getLogger(__name__) @@ -52,6 +52,9 @@ def _create_safe_builtins(self) -> Dict[str, Any]: Returns: Dictionary containing safe built-in functions. """ + import statistics + import math + safe_builtins = limited_builtins.copy() safe_builtins.update({ # Mathematical functions @@ -61,12 +64,15 @@ def _create_safe_builtins(self) -> Dict[str, Any]: 'sum': sum, 'abs': abs, 'round': round, + 'pow': pow, # Collection functions 'sorted': sorted, 'enumerate': enumerate, 'zip': zip, 'range': range, + 'filter': filter, + 'map': map, # Type constructors 'list': list, @@ -77,9 +83,39 @@ def _create_safe_builtins(self) -> Dict[str, Any]: 'int': int, 'float': float, 'bool': bool, + + # Safe modules + 'statistics': statistics, + 'math': math, }) return safe_builtins + def _create_safe_getiter(self) -> callable: + """Create a safe iterator function for safe iteration. + + Returns: + Safe iterator function for use in restricted environment. + """ + def safe_getiter(obj): + """Safe iterator access for various objects. + + Args: + obj: Object to iterate over + + Returns: + Iterator for the object + + Raises: + TypeError: If object is not iterable + """ + try: + return iter(obj) + except TypeError as e: + self._logger.warning(f"Safe getiter failed for object: {e}") + raise + + return safe_getiter + def _create_safe_getitem(self) -> callable: """Create a safe getitem function for accessing DataFrame columns and Series values. @@ -124,6 +160,8 @@ def _create_restricted_globals(self, context: Dict[str, Any]) -> Dict[str, Any]: '_getattr_': safer_getattr, '_setattr_': guarded_setattr, '_getitem_': self._create_safe_getitem(), + '_getiter_': self._create_safe_getiter(), + '_iter_unpack_sequence_': guarded_iter_unpack_sequence, }) # Add user-provided context