In [1]:
import sys
sys.path.append("..")  # Add the project root to Python path
from app.core.labels.classifier import LabelClassifier
from app.llm.interface import LLMInterface
from app.config.settings import REASON_LLM_MODEL, REASON_LLM_PROVIDER

classifer = LabelClassifier(LLMInterface(REASON_LLM_PROVIDER, REASON_LLM_MODEL))



In [2]:
classifer.label_tree.light_trees

{'Default': [{'label': 'Basic Knowledge',
   'description': 'Queries about simple facts or common knowledge, such as Concept Explanation, Feature Support.',
   'tasks': [],
   'children': [{'label': 'Deployment',
     'description': 'Inquiries about the deployment guide and high-level deployment strategies of a specific product.',
     'tasks': ['What deployment methods does TiDB support?'],
     'children': []},
    {'label': 'Feature Support',
     'description': 'Information about the features supported by TiDB, including capabilities and limitations.',
     'tasks': ['Does TiDB support FOREIGN KEY constraints?',
      'Is there a maximum value limit for auto_increment in TiDB?',
      'PD 在 v7 版本以后有没有一个功能会导致 region leader 处理请求即将超载时候，马上会先切换 leader 保证 leader 可用的功能？',
      'What are the primary key constraints in TiDB?',
      'What is the default size of the instance plan cache size in TiDB 8.4?',
      'TiDB 是否支持平滑升级？请提供详细信息。',
      'Is it possible to adjust the TiDB version in Ti

In [3]:
task_id = 'a0b45c26-50e0-487c-8235-493b171ea93e'
task_goal = '在 TiDB 中,如果某个节点发生故障 (down机), 并且该节点的实例一直存在, 那么在故障节点的实例副本全部迁移完成后, down-peer 的数量会减少吗？请详细说明 TiDB 的副本迁移机制和 down-peer 数量变化的过程。'
label_path = classifer.generate_label_description("Default", task_goal)
label_path

API request failed. Retrying in 1.00 seconds...


[{'label': 'Basic Knowledge',
  'description': 'Queries about simple facts or common knowledge, such as Concept Explanation, Feature Support.'},
 {'label': 'Feature Support',
  'description': 'Information about the features supported by TiDB, including capabilities and limitations.'}]

In [None]:
label_path = [
    {
        'label': 'Basic Knowledge',
        'description': 'Queries about simple facts or common knowledge, such as Concept Explanation, Feature Support.'
    },{
        'label': 'Feature Support',
        'description': 'Information about the features supported by TiDB, including capabilities and limitations.'
    }
]

label_path

In [5]:
classifer.insert_label_path("Default", task_id, label_path)

In [None]:
import requests

def save_best_plan_from_url(url):
    
    try:
        # Split URL by '/' and extract components
        parts = url.split('/')
        
        # Find index of 'tasks' and extract task_id and commit_hash
        if 'tasks' in parts:
            tasks_index = parts.index('tasks')
            if len(parts) < tasks_index + 4:  # Ensure we have enough parts after 'tasks'
                raise ValueError("Invalid URL format")
                
            task_id = parts[tasks_index + 1]
            commit_hash = parts[tasks_index + 3]

            url = f"https://stackvm.tidb.ai/api/tasks/{task_id}/commits/{commit_hash}/save_best_plan"
            response = requests.post(url)
            if response.status_code != 200:
                raise ValueError("Failed to save best plan")
            
    except Exception as e:
        print(e)
        return False
    
    return True

url = "https://stackvm-ui.vercel.app/tasks/a0b45c26-50e0-487c-8235-493b171ea93e/mcts_optimized_20250208_175143/5c89c8cc9a2b4c24a6fd4d09565692a0/final-answer"
save_best_plan_from_url(url)


In [None]:
labels = [
    {
        "label": "Troubleshooting",
        "description": "Diagnostic guidance and problem-solving approaches for system issues, error conditions, or unexpected behaviors."
    }
]

matching_node = classifer.label_tree.find_longest_matching_label(labels)
tasks = classifer.label_tree.get_all_tasks_under_label(matching_node)
tasks

#tasks_plan = get_task_plans([task["id"] for task in tasks])
#tasks_plan



In [6]:
from typing import Dict, Any, List

def format_tasks_plans(
    node: Dict[str, Any],
    current_path: List[str] = None
) -> str:
    """
    Traverses the tree structure and formats each task into a specified text format.

    Parameters:
    - node (Dict[str, Any]): The current node in the tree.
    - task_plans (Dict[str, Dict[str, Any]]): A dictionary mapping task IDs to their best plans.
    - current_path (List[str], optional): The path of labels from the root to the current node.

    Returns:
    - str: The formatted text containing all tasks.
    """
    if current_path is None:
        current_path = []

    output = []

    # Update the current path with the current node's name
    node_name = node.get("name", "")
    new_path = current_path + [node_name] if node_name else current_path

    # Process tasks in the current node
    tasks = node.get("tasks", [])
    for task in tasks:
        task_id = task.get("id")
        task_goal = task.get("goal")
        best_plan = task.get("best_plan",  None)
        if best_plan is None:
            continue

        task_text = (
            f"task: {task_goal}\n"
            f"label_path: {' -> '.join(new_path)}\n"
            f"best_plan: {best_plan}\n"
            "-------\n"
        )
        output.append(task_text)

    # Recursively process child nodes
    children = node.get("children", [])
    for child in children:
        output.append(format_tasks_plans(child, new_path))

    return ''.join(output)

In [None]:
from app.llm.interface import LLMInterface
from app.config.settings import LLM_PROVIDER, LLM_MODEL
from app.core.plan.prompts import get_best_pratices_prompt


formatted_task_plan =  format_tasks_plans(matching_node)
label_path = ' -> '.join([label["label"] for label in labels])
prompt = get_best_pratices_prompt(label_path, formatted_task_plan)

best_pratices_str = LLMInterface(LLM_PROVIDER, LLM_MODEL).generate(prompt)
print(best_pratices_str)

In [10]:
best_practices_str = """To solve tasks in the <Troubleshooting> category, please follow this approach:
1. Problem Analysis and Hypothesis Generation
  - Clearly define the scope and impact of the issue during the initial reasoning phase (Problem Framing).
  - Generate multiple hypotheses or potential causes (Hypothesis Generation) to avoid narrowing the investigation too early.
  - This step ensures a thorough understanding of the problem and provides a roadmap for subsequent research and analysis.

  The example of chain_of_thoughts: "The main issue stems from the concentrated write operations due to a date prefix index, resulting in an index hotspot. Optimizing the index by replacing the date prefix could help scatter writes more evenly, potentially using attributes like user ID for better distribution. Moreover, partitioning tables can be a robust method to further distribute writes across multiple regions, spreading out the load and addressing the hotspot problem effectively. This approach involves both adjusting the index structure and implementing partitioned tables as a combined strategy.",


2. Multi-Channel Information Gathering and Cross-Verification
  - Leverage a Knowledge Graph for structured insights and connections related to the issue.
  - Use Vector Search to locate similar cases or documentation based on content similarity.
  - Ttailor your queries/prompts to address specific error points rather than general issues/topics. This ensures that both the data collected and the subsequent analysis are closely aligned with the problem at hand.
  -	Employ LLM-based summarization and inference to synthesize findings from various sources.
  - By integrating different sources of information, you gain a more comprehensive understanding of the root causes and possible solutions.

3.	Reasoning and Solution Generation (With Irrelevant Solution Filtering)

  - During the reasoning phase, evaluate the collected information against your initial hypotheses.
  - Filter out any solutions or suggestions that do not directly address the identified problem to ensure relevancy.
  - Consolidate valid insights into clear, actionable recommendations.
  - This ensures that the final solutions are both targeted and feasible for resolving the specific issue at hand.

By following these steps—starting with thorough problem analysis, then gathering and verifying data from multiple sources, and finally synthesizing a well-founded solution while filtering out irrelevant approaches—you can enhance the accuracy and efficiency of your troubleshooting efforts.
"""


In [11]:
from app.storage.models import Label
from app.config.database import SessionLocal

with SessionLocal() as session:
    label = session.query(Label).filter(Label.name == labels[-1].get("label")).first()
    label.best_practices = best_practices_str
    session.add(label)
    session.commit()