In [1]:
import sys
sys.path.append("..")  # Add the project root to Python path

import requests
from datetime import datetime, timedelta
from typing import List, Dict, Optional


from notebooks.optimize_plan import get_task_answer, update_plan, execute_task_using_new_plan, evaulate_task_answer, stackvm_host

def get_evaluation_pending_tasks(        
    start_time: Optional[datetime] = None,
    end_time: Optional[datetime] = None,
    evaluation_statuses: Optional[List[str]] = None
) -> List[Dict]:
    """
    Fetches the list of tasks pending evaluation from the API.

    Args:
        start_time (Optional[datetime]): The start time to filter tasks.
        end_time (Optional[datetime]): The end time to filter tasks.
        evaluation_statuses (Optional[List[str]]): List of evaluation statuses to filter by. Defaults to ['NOT_EVALUATED'].

    Returns:
        List[Dict]: A list of tasks pending evaluation.
    
    Raises:
        requests.exceptions.RequestException: If the request fails.
        ValueError: If the response cannot be decoded.
    """
    endpoint = f"{stackvm_host}/api/tasks/evaluation"
    params = {}
    
    if start_time:
        params['start_time'] = start_time.isoformat()
    if end_time:
        params['end_time'] = end_time.isoformat()
    if evaluation_statuses:
        # Join multiple statuses with commas
        params['evaluation_status'] = ','.join(evaluation_statuses)
    else:
        # Default to NOT_EVALUATED if no statuses are provided
        params['evaluation_status'] = 'NOT_EVALUATED'
    
    try:
        response = requests.get(endpoint, params=params)
        response.raise_for_status()  # Raise an HTTPError for bad responses (4XX or 5XX)
        data = response.json()
        
        if not isinstance(data, list):
            raise ValueError("Unexpected response format: Expected a list of tasks.")
        
        return data
    except requests.exceptions.RequestException as e:
        # Handle network-related errors
        print(f"An error occurred while making the request: {e}")
        raise
    except ValueError as ve:
        # Handle JSON decoding errors or unexpected data formats
        print(f"An error occurred while processing the response: {ve}")
        raise

def record_evaluation(
    task_id: str,
    evaluation_status: str,
    evaluation_reason: Optional[str] = "",
    timeout: int = 60
) -> Dict:
    """
    Records the evaluation result of a specific task by calling the API endpoint.

    Args:
        base_url (str): The base URL of the API (e.g., 'http://stackvm-dev.tidb.ai:5556').
        task_id (str): The ID of the task to be evaluated.
        evaluation_status (str): The evaluation status (e.g., "APPROVED", "REJECTED").
        evaluation_reason (Optional[str]): The reason for the evaluation decision.
        api_token (Optional[str]): API token for authentication, if required.
        timeout (int): Timeout in seconds for the API request.

    Returns:
        Dict: The JSON response from the API indicating success or failure.
    
    Raises:
        requests.exceptions.RequestException: If the request fails.
        ValueError: If the response cannot be decoded or contains an error.
    """
    endpoint = f"{stackvm_host}/api/tasks/{task_id}/evaluation"
    payload = {
        "evaluation_status": evaluation_status,
        "evaluation_reason": evaluation_reason
    }
    headers = {
        "Content-Type": "application/json"
    }

    try:
        response = requests.post(endpoint, json=payload, headers=headers, timeout=timeout)
        response.raise_for_status()
        data = response.json()

        if not isinstance(data, dict):
            raise ValueError("Unexpected response format: Expected a JSON object.")

        if not data.get("success", False):
            error_message = data.get("error", "Unknown error occurred.")
            raise ValueError(f"API Error: {error_message}")

        return data

    except requests.exceptions.RequestException as e:
        print(f"An error occurred while making the request: {e}")
        raise
    except ValueError as ve:
        print(f"An error occurred while processing the response: {ve}")
        raise

def record_human_evaluation(
    task_id: str,
    evaluation_status: str,
    feedback: Optional[str] = "",
    timeout: int = 60
) -> Dict:
    """
    Records the evaluation result of a specific task by calling the API endpoint.

    Args:
        base_url (str): The base URL of the API (e.g., 'http://stackvm-dev.tidb.ai:5556').
        task_id (str): The ID of the task to be evaluated.
        evaluation_status (str): The evaluation status (e.g., "APPROVED", "REJECTED").
        evaluation_reason (Optional[str]): The reason for the evaluation decision.
        api_token (Optional[str]): API token for authentication, if required.
        timeout (int): Timeout in seconds for the API request.

    Returns:
        Dict: The JSON response from the API indicating success or failure.
    
    Raises:
        requests.exceptions.RequestException: If the request fails.
        ValueError: If the response cannot be decoded or contains an error.
    """
    endpoint = f"{stackvm_host}/api/tasks/{task_id}/human_evaluation"
    payload = {
        "evaluation_status": evaluation_status,
        "feedback": feedback
    }
    headers = {
        "Content-Type": "application/json"
    }

    try:
        response = requests.post(endpoint, json=payload, headers=headers, timeout=timeout)
        response.raise_for_status()
        data = response.json()

        if not isinstance(data, dict):
            raise ValueError("Unexpected response format: Expected a JSON object.")

        if not data.get("success", False):
            error_message = data.get("error", "Unknown error occurred.")
            raise ValueError(f"API Error: {error_message}")

        return data

    except requests.exceptions.RequestException as e:
        print(f"An error occurred while making the request: {e}")
        raise
    except ValueError as ve:
        print(f"An error occurred while processing the response: {ve}")
        raise


In [2]:
import json
from app.utils.json import extract_json

def optimize_plan(task_id:str, branch_name:Optional[str]="main", max_iteration=2):
    current_branch_name = branch_name
    error_message = None
    iteration_round = 0

    while True:
        print(f"Start to evaluate plan for task(id={task_id},branch={current_branch_name})")
        detail = get_task_answer(task_id, current_branch_name)

        if detail is not None:
            goal = detail.get("goal")
            final_answer = detail.get("final_answer")
            plan = detail.get("plan")
            metadata = detail.get("metadata")

            response = evaulate_task_answer(goal, metadata, final_answer, plan)
            try:
                eval_res_str = extract_json(response)
                eval_res = json.loads(eval_res_str)
            except Exception as e:
                print(f"Failed to decode evaluation result {e}: {response}")
                return

            eval_status = "APPROVED" if eval_res.get("accept", False) else "REJECTED"
            eval_reason = json.dumps(eval_res, indent=2) 

            record_evaluation(task_id, eval_status, eval_reason)

            if eval_res.get("accept", False) is True:
                print(f"Goal Pass! {goal}, evaluation result:{eval_reason}")
                return

            print(f"Goal Not Pass! {goal}, the evaluation result:{eval_reason}")

            if iteration_round >= max_iteration:
                break

            revised_plan_response = update_plan(goal, metadata, plan, eval_reason)

            try:
                revised_plan_str = extract_json(revised_plan_response)
                revised_plan = json.loads(revised_plan_str)
            except Exception as e:
                error_message = f"Failed to decode revised plan {e}: {revised_plan_response}"
                break

            print("revised plan:", revised_plan)

            try:
                updated_result = execute_task_using_new_plan(task_id, revised_plan)
                print(f"Revised plan execution result {updated_result}")
            except Exception as e:
                error_message = f"Failed to execute task using new plan {e}"
                break
            
            current_branch_name = updated_result.get("branch_name", None)
            current_final_answer = updated_result.get("final_answer", None)
            if current_branch_name is None or current_final_answer is None:
                error_message = "Failed to execut task using new plan, get empty answer"
                break

            iteration_round += 1
    
    if error_message is None:
        error_message = "Still failed after two evaluations round."
    print(f"Failed to evaluate plan for task(id={task_id}): {error_message}")
    record_human_evaluation(task_id, "WAITTING_FOR_EVALUATION", error_message)


In [4]:
from app.controller.label_classifier import LabelClassifier


optimize_plan("267c8250-9431-490f-8c2d-e503ee6f98bf", "main")

"""
classifier = LabelClassifier()

end_time = datetime.utcnow()
start_time = end_time - timedelta(hours=2)

pending_tasks = get_evaluation_pending_tasks(
    start_time=start_time
)

for task in pending_tasks:
    task_id = task["id"]
    optimize_plan(task_id, "main")
    break
"""

Start to evaluate plan for task(id=267c8250-9431-490f-8c2d-e503ee6f98bf,branch=main)


2025-01-13 20:53:15,085 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Goal Not Pass! How can I convert a timestamp to a TSO value in TiDB?, the evaluation result:{
  "accept": false,
  "answer_quality_assessment_explanation": "The final answer does not fully resolve the goal. While it provides a general understanding of the TSO structure and mentions the lack of a direct SQL function for conversion, it fails to offer a clear, actionable solution or workaround for converting a timestamp to a TSO value in TiDB. The answer also lacks a detailed explanation of why such a conversion might not be feasible or necessary, which could help the user understand the limitations. This falls short of the 'Direct Problem Resolution' guideline, as it does not provide a concrete method or alternative approach to achieve the user's goal.",
  "plan_adjustment_suggestion": "The plan should be adjusted to include a more thorough investigation into potential workarounds or alternative methods for achieving the conversion, even if indirect. This could involve exploring TiDB's i

2025-01-13 20:53:43,276 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


revised plan: [{'seq_no': 0, 'type': 'reasoning', 'parameters': {'chain_of_thoughts': 'To convert a timestamp to a TSO value in TiDB, we need to understand the structure of a TSO and how TiDB generates it. A TSO is composed of a physical timestamp and a logical counter. The physical timestamp is derived from the current time in milliseconds, and the logical counter is used to ensure uniqueness. Our approach will involve understanding the TSO generation process, exploring any available documentation or tools that can assist in this conversion, and determining if a direct method or workaround exists. Additionally, we will explore potential architectural reasons why a direct conversion might not be feasible.', 'dependency_analysis': 'The plan involves understanding the TSO structure, exploring TiDB documentation, and determining if a conversion method exists. Each step builds on the previous one to ensure a comprehensive understanding of the process. We will also consider consulting with 

'\nclassifier = LabelClassifier()\n\nend_time = datetime.utcnow()\nstart_time = end_time - timedelta(hours=2)\n\npending_tasks = get_evaluation_pending_tasks(\n    start_time=start_time\n)\n\nfor task in pending_tasks:\n    task_id = task["id"]\n    optimize_plan(task_id, "main")\n    break\n'