@@ -34,6 +34,22 @@ class ModelsResponse(BaseModel):
3434 )
3535
3636
37+ class RAGChunk (BaseModel ):
38+ """Model representing a RAG chunk used in the response."""
39+
40+ content : str = Field (description = "The content of the chunk" )
41+ source : Optional [str ] = Field (None , description = "Source document or URL" )
42+ score : Optional [float ] = Field (None , description = "Relevance score" )
43+
44+
45+ class ToolCall (BaseModel ):
46+ """Model representing a tool call made during response generation."""
47+
48+ tool_name : str = Field (description = "Name of the tool called" )
49+ arguments : dict [str , Any ] = Field (description = "Arguments passed to the tool" )
50+ result : Optional [dict [str , Any ]] = Field (None , description = "Result from the tool" )
51+
52+
3753class ReferencedDocument (BaseModel ):
3854 """Model representing a document referenced in generating a response.
3955
@@ -42,27 +58,27 @@ class ReferencedDocument(BaseModel):
4258 doc_title: Title of the referenced doc.
4359 """
4460
45- doc_url : AnyUrl = Field (description = "URL of the referenced document" )
61+ doc_url : Optional [AnyUrl ] = Field (
62+ None , description = "URL of the referenced document"
63+ )
4664
4765 doc_title : str = Field (description = "Title of the referenced document" )
4866
4967
50- # TODO(lucasagomes): a lot of fields to add to QueryResponse. For now
51- # we are keeping it simple. The missing fields are:
52- # - truncated: Set to True if conversation history was truncated to be within context window.
53- # - input_tokens: Number of tokens sent to LLM
54- # - output_tokens: Number of tokens received from LLM
55- # - available_quotas: Quota available as measured by all configured quota limiters
56- # - tool_calls: List of tool requests.
57- # - tool_results: List of tool results.
58- # See LLMResponse in ols-service for more details.
5968class QueryResponse (BaseModel ):
6069 """Model representing LLM response to a query.
6170
6271 Attributes:
6372 conversation_id: The optional conversation ID (UUID).
6473 response: The response.
74+ rag_chunks: List of RAG chunks used to generate the response.
6575 referenced_documents: The URLs and titles for the documents used to generate the response.
76+ tool_calls: List of tool calls made during response generation.
77+ TODO: truncated: Whether conversation history was truncated.
78+ TODO: input_tokens: Number of tokens sent to LLM.
79+ TODO: output_tokens: Number of tokens received from LLM.
80+ TODO: available_quotas: Quota available as measured by all configured quota limiters
81+ TODO: tool_results: List of tool results.
6682 """
6783
6884 conversation_id : Optional [str ] = Field (
@@ -78,6 +94,13 @@ class QueryResponse(BaseModel):
7894 ],
7995 )
8096
97+ rag_chunks : list [RAGChunk ] = []
98+
99+ tool_calls : Optional [list [ToolCall ]] = Field (
100+ None ,
101+ description = "List of tool calls made during response generation" ,
102+ )
103+
81104 referenced_documents : list [ReferencedDocument ] = Field (
82105 default_factory = list ,
83106 description = "List of documents referenced in generating the response" ,
@@ -99,6 +122,20 @@ class QueryResponse(BaseModel):
99122 {
100123 "conversation_id" : "123e4567-e89b-12d3-a456-426614174000" ,
101124 "response" : "Operator Lifecycle Manager (OLM) helps users install..." ,
125+ "rag_chunks" : [
126+ {
127+ "content" : "OLM is a component of the Operator Framework toolkit..." ,
128+ "source" : "kubernetes-docs/operators.md" ,
129+ "score" : 0.95 ,
130+ }
131+ ],
132+ "tool_calls" : [
133+ {
134+ "tool_name" : "knowledge_search" ,
135+ "arguments" : {"query" : "operator lifecycle manager" },
136+ "result" : {"chunks_found" : 5 },
137+ }
138+ ],
102139 "referenced_documents" : [
103140 {
104141 "doc_url" : "https://docs.openshift.com/"
0 commit comments