In [1]:
from app.core.db import Scoped_Session

session = Scoped_Session()

In [2]:
import dspy
import os
from dotenv import load_dotenv

load_dotenv()

turbo = dspy.OpenAI(model='gpt-o1', api_key=os.getenv("OPENAI_API_KEY"), max_tokens=4096)
dspy.settings.configure(lm=turbo)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import warnings
from sqlalchemy.exc import SAWarning

warnings.filterwarnings("ignore", category=SAWarning)

In [4]:
from app.rag.knowledge_graph.graph_store import TiDBGraphStore
from app.rag.knowledge_graph import KnowledgeGraphIndex
from llama_index.embeddings.openai import OpenAIEmbedding, OpenAIEmbeddingModelType

_embed_model = OpenAIEmbedding(
    model=OpenAIEmbeddingModelType.TEXT_EMBED_3_SMALL
)

graph_store = TiDBGraphStore(
    dspy_lm=turbo,
    session=session,
    embed_model=_embed_model,
)
graph_index =  KnowledgeGraphIndex = KnowledgeGraphIndex.from_existing(
    dspy_lm=turbo,
    kg_store=graph_store,
)

In [5]:
def retrieve_knowledge_graph(query):
    return graph_index.retrieve_with_weight(
        query,
        [],
        depth=1,
    )

In [6]:
from app.rag.vector_store.tidb_vector_store import TiDBVectorStore
from llama_index.core import VectorStoreIndex

vector_store = TiDBVectorStore(session=session)
vector_index = VectorStoreIndex.from_vector_store(
    vector_store,
    embed_model=_embed_model
)

In [7]:
def retrieve_knowledge_embedded_chunks(query, top_k=5):
    retriver = vector_index.as_retriever(
        similarity_top_k=5
    )

    nodes = retriver.retrieve(query)
    return [node.text for node in nodes]

In [11]:
import json
import enum
import openai
from pydantic import BaseModel, Field, field_validator
from sqlmodel import Session
import traceback
from typing import Generator, Any, List
from dataclasses import dataclass


fc_llm = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


class EventType(str, enum.Enum):
    LLM_CONTENT_STREAMING = "LLM_CONTENT_STREAMING"
    TOOL_CALL = "TOOL_CALL"
    TOOL_CALL_RESPONSE = "TOOL_CALL_RESPONSE"
    FINISHED = "FINISHED"
    ERROR = "ERROR"


@dataclass
class ChatEvent:
    event_type: EventType
    payload: str | dict | None = None

    def encode(self, charset) -> bytes:
        body = self.payload
        body = json.dumps(body, separators=(",", ":"))
        return f"{self.event_type.value}:{body}\n".encode(charset)


class MessageRole(str, enum.Enum):
    SYSTEM = "system"
    USER = "user"
    ASSISTANT = "assistant"
    TOOL = "tool"


class ChatMessage(BaseModel):
    role: MessageRole
    content: str
    additional_kwargs: dict[str, Any] = {}


# chat api definition
class ChatRequest(BaseModel):
    user_id: str
    messages: list[ChatMessage] = []
    metadata: dict = {}
    stream: bool = True

    @field_validator("messages")
    @classmethod
    def check_messages(cls, messages: List[ChatMessage]) -> List[ChatMessage]:
        if not messages:
            raise ValueError("messages cannot be empty")
        for m in messages:
            if m.role not in [
                MessageRole.USER,
                MessageRole.ASSISTANT,
                MessageRole.TOOL,
            ]:
                raise ValueError("role must be either 'user' or 'assistant'")
            if len(m.content) > 8096:
                raise ValueError("message content cannot exceed 8096 characters")
        if messages[-1].role != MessageRole.USER:
            raise ValueError("last message must be from user")
        return messages


class GraphKnowledge(BaseModel):
    """
    Represents structured knowledge in the form of a graph, focusing on entities and the relationships between them.

    This tool enables users to query and navigate structured relationships between various entities. It is designed to answer questions where understanding the relationships or attributes of specific entities is key to providing an accurate response.

    Typical use cases include:
    1. **Entity Queries**: Answering questions about the relationships or properties of specific entities.
    2. **Relationship Navigation**: Navigating through structured relationships to retrieve specific knowledge tied to entities within the organization's domain.
    """

    query: str = Field(
        description=(
            "A query for retrieving structured relationships and attributes of specific entities."
        )
    )

class VectorChunks(BaseModel):
    """
    Represents detailed source data in the form of vectorized chunks, focusing on the content of original documents.

    This tool is used to retrieve detailed information based on content similarity. It excels at answering questions requiring deeper context or extended information from original documents, making it suitable for handling more complex or background-intensive queries.

    Typical use cases include:
    1. **Content Queries**: Providing in-depth answers to questions that require extracting detailed information from original documents.
    2. **Context Retrieval**: Handling queries that need extensive background or supporting information by matching content chunks based on similarity.
    """

    query: str = Field(
        description=(
            "A query for retrieving similar chunks of text that provide detailed context or background information to answer the user's query."
        )
    )

system_instruction = """As the Advanced Query Solver, your primary role is to assist users by breaking down complex queries into manageable subquestions and providing a clear action plan for resolving them. You are responsible for ensuring that each step of the query-solving process is methodically planned and executed, with attention to dependencies between different subquestions.

When interacting with users, adhere to the following instructions:

1. Query Analysis:
   - Upon receiving a user query, begin by analyzing the query to determine its structure and dependencies. 
   - Prioritize the **Knowledge Graph Search** for querying structured data, which typically provides accurate and concise information about entities and their relationships.
   - Create a dependency graph that outlines how each subquestion relates to others and the order in which they should be resolved.
      - Example: If the user asks about the status of a project and its potential impact on product delivery, identify the key subquestions: 
        'What is the current status of the project?'
        'Are there any blockers or delays affecting the timeline?'
        'What is the estimated impact on the upcoming product release?'
      - Ensure that subquestions dependent on the resolution of earlier steps are properly sequenced.

2. Subquestion Generation:
   - Once the dependency graph is established, break down the original query into a series of subquestions. Each subquestion should be clear, concise, and directly address a portion of the user's original inquiry.
   - Ensure that all generated subquestions are relevant to solving the user’s overall query and follow the logical structure of the dependency graph.
     - Example: If investigating a system error, break it down into:
       'What caused the error?'
       'Which systems are affected?'
       'What are the potential solutions or workarounds?'
   - **Use the Knowledge Graph** whenever possible to answer subquestions involving entities, relationships, or concise information that can be retrieved through structured queries.
   - **Search Vector Chunks** when more detailed context or background information is needed, particularly for complex or detailed inquiries that require the retrieval of extensive content.

3. Action Plan Generation:
   - After breaking down the query into subquestions, generate a clear action plan that specifies how to answer each subquestion and resolve the entire query.
   - The action plan should be structured sequentially, reflecting the dependencies outlined in the graph, with clear steps for retrieving or processing the information required to answer each subquestion.
     - **Utilize Knowledge Graph** for structured, entity-based information, and **shift to Vector Chunks** when more granular details or in-depth context is needed.
     - Example: For a product update query, the action plan might include:
       'Step 1: Retrieve the latest project status from the knowledge graph.'
       'Step 2: Identify any blockers or delays reported in the last week using vector chunks to gather more details.'
       'Step 3: Analyze how these issues might impact the product release timeline.'

4. Problem Solving Execution:
   - Execute each step of the action plan sequentially, ensuring that all subquestions are answered and that dependencies between them are respected.
   - As you solve each subquestion, aggregate the results into a comprehensive response that directly addresses the user's original query.
   - Ensure that the final response provides clarity on how each part of the solution was derived and how the user can proceed with the information.

5. Communication:
   - Clearly explain the steps taken to resolve the user’s query, ensuring they understand the rationale behind each subquestion and how it contributed to the final solution.
   - Confirm with the user if additional clarification is needed on any specific subquestion or action plan step.
   - Maintain a professional and supportive tone, ensuring that your responses are structured, informative, and actionable for the user’s needs.

Your goal is to help users navigate complex queries by decomposing them into logical steps and providing a structured action plan for resolving them. **Prioritize Knowledge Graph Search** for concise, structured answers and **use Vector Chunks** for detailed, context-rich information. Always ensure accuracy, clarity, and logical flow to meet the user’s needs and expectations.
"""


class ChatService:
    def __init__(self):
        self.tools = [
            openai.pydantic_function_tool(GraphKnowledge),
            openai.pydantic_function_tool(VectorChunks),
        ]
        self._syste_message = [{"role": "system", "content": system_instruction}]

    def chat(
        self, session: Session, messages: list = []
    ) -> Generator[ChatEvent, None, None]:

        if not messages or len(messages) == 0:
            yield ChatEvent(event_type=EventType.ERROR, payload="No messages provided")
            return

        # Step 1: Check if the user exists in the `users` table
        response_messages = []

        # while condition, if  response.choices[0].message.tool_calls is None, then return; otherwise loop
        while True:
            response = fc_llm.chat.completions.create(
                model="gpt-4o",
                messages=(self._syste_message + messages + response_messages),
                tools=self.tools,
            )
            if response.choices[0].message.tool_calls is None:
                yield ChatEvent(
                    event_type=EventType.FINISHED,
                    payload={
                        "role": "assistant",
                        "content": response.choices[0].message.content,
                    },
                )
                return

            try:
                tool_call_message = response.choices[0].message.model_dump()
                yield ChatEvent(
                    event_type=EventType.TOOL_CALL, payload=tool_call_message
                )
                response_messages.append(tool_call_message)

                for tool_call in response.choices[0].message.tool_calls:
                    if tool_call.function.name == "GraphKnowledge":
                        args = json.loads(tool_call.function.arguments)
                        print(f"call function GraphKnowledge", args)
                        graph_data = retrieve_knowledge_graph(args["query"])
                        tool_call_result = {
                            "graph_data": graph_data
                        }
                    elif tool_call.function.name == "VectorChunks":
                        args = json.loads(tool_call.function.arguments)
                        print(f"call function VectorChunks", args)
                        graph_data = retrieve_knowledge_graph(args["query"])
                        chunks = retrieve_knowledge_embedded_chunks(args["query"])
                        tool_call_result = {
                            "chunks_data": chunks
                        }
                    else:
                        print(f"unknow function calling", tool_call)
                        raise ValueError(
                            f"Unknown tool call and message: {response.choices[0].message.model_dump_json()}"
                        )

                    tool_call_result_message = {
                        "role": "tool",
                        "content": json.dumps(tool_call_result),
                        "tool_call_id": tool_call.id,
                    }

                    yield ChatEvent(
                        event_type=EventType.TOOL_CALL_RESPONSE,
                        payload=tool_call_result_message,
                    )

                    response_messages.append(tool_call_result_message)
            except Exception as e:
                traceback.print_exc()
                session.rollback

                yield ChatEvent(
                    event_type=EventType.ERROR,
                    payload=f"An error occurred while processing the request.{e}",
                )


cs = ChatService()

In [13]:
try:
    for event in cs.chat(session, [{
        "role":"user",
        "content": "could you summary the performance improvement of tidb from version 6.5 to newest version"
    }]):
        print(f"data: {json.dumps(event.payload)}\n\n")
except Exception as e:
    print(f"data: {json.dumps({'event_type': 'ERROR', 'payload': str(e)})}\n\n")

data: {"content": "To provide a comprehensive summary of the performance improvements in TiDB from version 6.5 to the newest version, we need to break this query into several manageable subquestions to thoroughly capture and analyze the data:\n\n1. **Identify the Versions**: Determine the newest version of TiDB so we can define the scope of the comparison.\n2. **Review Release Notes**: Extract performance-related improvements, bug fixes, and feature enhancements from the release notes of the versions between 6.5 and the newest version.\n3. **Categorize Improvements**: Classify the improvements into different categories such as query performance, storage optimization, system stability, new features, etc.\n4. **Compare Performance Metrics**: If available, compare quantitative performance metrics (e.g., query execution time, transaction per second) between version 6.5 and the newest version.\n5. **Summarize Findings**: Consolidate the extracted data into a coherent summary that highlights