In [1]:
import sys
sys.path.append("..")  # Add the project root to Python path
from app.controller.label_classifier import LabelClassifier, get_labels_tree_with_task_goals

classifer = LabelClassifier()



In [2]:
labels_tree_with_task = get_labels_tree_with_task_goals()
labels_tree_with_task

[{'name': 'Basic Knowledge',
  'description': 'Queries about simple facts or common knowledge, such as Concept Explanation, Feature Support.',
  'children': [{'name': 'Deployment',
    'description': 'Inquiries about the deployment guide and high-level deployment strategies of a specific product.',
    'children': [],
    'tasks': [{'id': 'cb14f66f-d91a-4bbb-8016-5bca76cd445a',
      'goal': 'What deployment methods does TiDB support?'}]},
   {'name': 'Feature Support',
    'description': 'Information about the features supported by TiDB, including capabilities and limitations.',
    'children': [{'name': 'RU',
      'description': 'A unit of measurement used to quantify the resource consumption of queries or transactions. It helps in evaluating system performance and optimizing resource allocation.',
      'children': [],
      'tasks': [{'id': '175ab194-0516-44de-8255-5d5025669720',
        'goal': 'What are the key indicators or metrics to consider when adjusting RU allocation in Ti

In [4]:
import requests

def save_best_plan_from_url(url):
    
    try:
        # Split URL by '/' and extract components
        parts = url.split('/')
        
        # Find index of 'tasks' and extract task_id and commit_hash
        if 'tasks' in parts:
            tasks_index = parts.index('tasks')
            if len(parts) < tasks_index + 4:  # Ensure we have enough parts after 'tasks'
                raise ValueError("Invalid URL format")
                
            task_id = parts[tasks_index + 1]
            commit_hash = parts[tasks_index + 3]

            url = f"https://stackvm.tidb.ai/api/tasks/{task_id}/commits/{commit_hash}/save_best_plan"
            response = requests.post(url)
            if response.status_code != 200:
                raise ValueError("Failed to save best plan")
            
    except Exception as e:
        print(e)
        return False
    
    return True

url = "https://stackvm-ui.vercel.app/tasks/d30be387-f053-4b3b-9445-c1afdc6fbb4f/plan_update_20241218_060107/43546c31dceb6f2f4757d3e3a4a7c2657992053f"
save_best_plan_from_url(url)


True

In [None]:
from app.controller.label_classifier import find_longest_matching_node, get_all_tasks_under_node, get_task_plans

labels = [
    {
        "label": "Basic Knowledge",
        "description": "Queries about simple facts or common knowledge, such as Concept Explanation, Feature Support."
    }
]

matching_node = find_longest_matching_node(labels_tree_with_task, labels)
tasks = get_all_tasks_under_node(matching_node)
tasks_plan = get_task_plans([task["id"] for task in tasks])
tasks_plan

In [4]:
from typing import Dict, Any, List

def format_tasks_pans(
    node: Dict[str, Any],
    task_plans: Dict[str, Dict[str, Any]],
    current_path: List[str] = None
) -> str:
    """
    Traverses the tree structure and formats each task into a specified text format.

    Parameters:
    - node (Dict[str, Any]): The current node in the tree.
    - task_plans (Dict[str, Dict[str, Any]]): A dictionary mapping task IDs to their best plans.
    - current_path (List[str], optional): The path of labels from the root to the current node.

    Returns:
    - str: The formatted text containing all tasks.
    """
    if current_path is None:
        current_path = []

    output = []

    # Update the current path with the current node's name
    node_name = node.get("name", "")
    new_path = current_path + [node_name] if node_name else current_path

    # Process tasks in the current node
    tasks = node.get("tasks", [])
    for task in tasks:
        task_id = task.get("id")
        task_goal = task.get("goal")
        best_plan = task_plans.get(task_id, {}).get("best_plan",  None)
        if best_plan is None:
            continue

        task_text = (
            f"task: {task_goal}\n"
            f"label_path: {' -> '.join(new_path)}\n"
            f"best_plan: {best_plan}\n"
            "-------\n"
        )
        output.append(task_text)

    # Recursively process child nodes
    children = node.get("children", [])
    for child in children:
        output.append(format_tasks_pans(child, task_plans, new_path))

    return ''.join(output)

In [None]:
from app.services.llm_interface import LLMInterface
from app.config.settings import LLM_PROVIDER, FAST_LLM_MODEL
from app.services import get_best_pratices_prompt


formatted_task_plan =  format_tasks_pans(matching_node, tasks_plan)
label_path = ' -> '.join([label["label"] for label in labels])
prompt = get_best_pratices_prompt(label_path, formatted_task_plan)

best_pratices_str = LLMInterface(LLM_PROVIDER, FAST_LLM_MODEL).generate(prompt)
print(best_pratices_str)

In [13]:
best_pratices_str = """ To solve tasks in the <Basic Knowledge> category, please follow this approach:
1.	Breakdown the goal into small question (if Necessary):
  - Purpose: Break down complex questions into smaller sub-questions to ensure all aspects are thoroughly covered.
  - Note: For straightforward questions, you can skip this step and proceed directly.

2. Simultaneously Use Multiple Search Tools to Retrieve Information:
  - Knowledge Graph Search: Obtain structured, high-level information.
  - Vector Search: Retrieve detailed, unstructured documents and examples.
  - Strategy: Use both search tools simultaneously for each (sub) question to ensure comprehensive information gathering.

3. Use LLM to Generate the Final Answer:
  - Integrate Information: Compile all the retrieved data.
  - Generate Answer: Utilize the LLM to summarize the information, producing an accurate and coherent response.

Essence Summary:

- Comprehensive Retrieval Enhances Information Coverage: By simultaneously using knowledge graph search and vector search, you gather rich and comprehensive information.
- Efficient Process with Simplified Steps: If needed, split the question, then proceed directly to information retrieval and answer generation, avoiding unnecessary procedures.
- Accurate Generation Ensures Answer Quality: Use the LLM to integrate the retrieved information, providing precise and valuable answers."""


In [17]:
from app.models.label import Label
from app.database import SessionLocal

with SessionLocal() as session:
    label = session.query(Label).filter(Label.name == labels[-1].get("label")).first()
    label.best_practices = best_pratices_str
    session.add(label)
    session.commit()