diff --git a/docs/openapi.json b/docs/openapi.json
index 743e1039..4f887ca7 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -418,12 +418,15 @@
                         "description": "Streaming response with Server-Sent Events",
                         "content": {
                             "application/json": {
-                                "schema": {}
+                                "schema": {
+                                    "type": "string",
+                                    "example": "data: {\"event\": \"start\", \"data\": {\"conversation_id\": \"123e4567-e89b-12d3-a456-426614174000\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 0, \"token\": \"Hello\"}}\n\ndata: {\"event\": \"end\", \"data\": {\"referenced_documents\": [], \"truncated\": null, \"input_tokens\": 0, \"output_tokens\": 0}, \"available_quotas\": {}}\n\n"
+                                }
                             },
-                            "text/event-stream": {
+                            "text/plain": {
                                 "schema": {
                                     "type": "string",
-                                    "example": "data: {\"event\": \"start\", \"data\": {\"conversation_id\": \"123e4567-e89b-12d3-a456-426614174000\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 0, \"role\": \"inference\", \"token\": \"Hello\"}}\n\ndata: {\"event\": \"end\", \"data\": {\"referenced_documents\": [], \"truncated\": null, \"input_tokens\": 0, \"output_tokens\": 0}, \"available_quotas\": {}}\n\n"
+                                    "example": "Hello world!\n\n---\n\nReference: https://example.com/doc"
                                 }
                             }
                         }
@@ -1425,7 +1428,7 @@
                     }
                 ]
             },
-            "AuthenticationConfiguration": {
+            "AuthenticationConfiguration-Input": {
                 "properties": {
                     "module": {
                         "type": "string",
@@ -1465,7 +1468,7 @@
                     "jwk_config": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/JwkConfiguration"
+                                "$ref": "#/components/schemas/JwkConfiguration-Input"
                             },
                             {
                                 "type": "null"
@@ -1478,7 +1481,75 @@
                 "title": "AuthenticationConfiguration",
                 "description": "Authentication configuration."
             },
-            "AuthorizationConfiguration": {
+            "AuthenticationConfiguration-Output": {
+                "properties": {
+                    "module": {
+                        "type": "string",
+                        "title": "Module",
+                        "default": "noop"
+                    },
+                    "skip_tls_verification": {
+                        "type": "boolean",
+                        "title": "Skip Tls Verification",
+                        "default": false
+                    },
+                    "k8s_cluster_api": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "minLength": 1,
+                                "format": "uri"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "K8S Cluster Api"
+                    },
+                    "k8s_ca_cert_path": {
+                        "anyOf": [
+                            {
+                                "type": "string",
+                                "format": "file-path"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "K8S Ca Cert Path"
+                    },
+                    "jwk_config": {
+                        "anyOf": [
+                            {
+                                "$ref": "#/components/schemas/JwkConfiguration-Output"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "AuthenticationConfiguration",
+                "description": "Authentication configuration."
+            },
+            "AuthorizationConfiguration-Input": {
+                "properties": {
+                    "access_rules": {
+                        "items": {
+                            "$ref": "#/components/schemas/AccessRule"
+                        },
+                        "type": "array",
+                        "title": "Access Rules"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "AuthorizationConfiguration",
+                "description": "Authorization configuration."
+            },
+            "AuthorizationConfiguration-Output": {
                 "properties": {
                     "access_rules": {
                         "items": {
@@ -1653,12 +1724,12 @@
                         "title": "Mcp Servers"
                     },
                     "authentication": {
-                        "$ref": "#/components/schemas/AuthenticationConfiguration"
+                        "$ref": "#/components/schemas/AuthenticationConfiguration-Output"
                     },
                     "authorization": {
                         "anyOf": [
                             {
-                                "$ref": "#/components/schemas/AuthorizationConfiguration"
+                                "$ref": "#/components/schemas/AuthorizationConfiguration-Output"
                             },
                             {
                                 "type": "null"
@@ -2595,7 +2666,7 @@
                 "title": "JsonPathOperator",
                 "description": "Supported operators for JSONPath evaluation."
             },
-            "JwkConfiguration": {
+            "JwkConfiguration-Input": {
                 "properties": {
                     "url": {
                         "type": "string",
@@ -2604,7 +2675,7 @@
                         "title": "Url"
                     },
                     "jwt_configuration": {
-                        "$ref": "#/components/schemas/JwtConfiguration"
+                        "$ref": "#/components/schemas/JwtConfiguration-Input"
                     }
                 },
                 "additionalProperties": false,
@@ -2615,7 +2686,52 @@
                 "title": "JwkConfiguration",
                 "description": "JWK configuration."
             },
-            "JwtConfiguration": {
+            "JwkConfiguration-Output": {
+                "properties": {
+                    "url": {
+                        "type": "string",
+                        "minLength": 1,
+                        "format": "uri",
+                        "title": "Url"
+                    },
+                    "jwt_configuration": {
+                        "$ref": "#/components/schemas/JwtConfiguration-Output"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "required": [
+                    "url"
+                ],
+                "title": "JwkConfiguration",
+                "description": "JWK configuration."
+            },
+            "JwtConfiguration-Input": {
+                "properties": {
+                    "user_id_claim": {
+                        "type": "string",
+                        "title": "User Id Claim",
+                        "default": "user_id"
+                    },
+                    "username_claim": {
+                        "type": "string",
+                        "title": "Username Claim",
+                        "default": "username"
+                    },
+                    "role_rules": {
+                        "items": {
+                            "$ref": "#/components/schemas/JwtRoleRule"
+                        },
+                        "type": "array",
+                        "title": "Role Rules"
+                    }
+                },
+                "additionalProperties": false,
+                "type": "object",
+                "title": "JwtConfiguration",
+                "description": "JWT configuration."
+            },
+            "JwtConfiguration-Output": {
                 "properties": {
                     "user_id_claim": {
                         "type": "string",
@@ -3207,9 +3323,10 @@
                             }
                         ],
                         "title": "Media Type",
-                        "description": "Media type (used just to enable compatibility)",
+                        "description": "Media type for the response format",
                         "examples": [
-                            "application/json"
+                            "application/json",
+                            "text/plain"
                         ]
                     }
                 },
@@ -3219,7 +3336,7 @@
                     "query"
                 ],
                 "title": "QueryRequest",
-                "description": "Model representing a request for the LLM (Language Model).\n\nAttributes:\n    query: The query string.\n    conversation_id: The optional conversation ID (UUID).\n    provider: The optional provider.\n    model: The optional model.\n    system_prompt: The optional system prompt.\n    attachments: The optional attachments.\n    no_tools: Whether to bypass all tools and MCP servers (default: False).\n\nExample:\n    ```python\n    query_request = QueryRequest(query=\"Tell me about Kubernetes\")\n    ```",
+                "description": "Model representing a request for the LLM (Language Model).\n\nAttributes:\n    query: The query string.\n    conversation_id: The optional conversation ID (UUID).\n    provider: The optional provider.\n    model: The optional model.\n    system_prompt: The optional system prompt.\n    attachments: The optional attachments.\n    no_tools: Whether to bypass all tools and MCP servers (default: False).\n    media_type: The optional media type for response format (application/json or text/plain).\n\nExample:\n    ```python\n    query_request = QueryRequest(query=\"Tell me about Kubernetes\")\n    ```",
                 "examples": [
                     {
                         "attachments": [
@@ -3312,6 +3429,52 @@
                                 }
                             ]
                         ]
+                    },
+                    "truncated": {
+                        "type": "boolean",
+                        "title": "Truncated",
+                        "description": "Whether conversation history was truncated",
+                        "default": false,
+                        "examples": [
+                            false,
+                            true
+                        ]
+                    },
+                    "input_tokens": {
+                        "type": "integer",
+                        "title": "Input Tokens",
+                        "description": "Number of tokens sent to LLM",
+                        "default": 0,
+                        "examples": [
+                            150,
+                            250,
+                            500
+                        ]
+                    },
+                    "output_tokens": {
+                        "type": "integer",
+                        "title": "Output Tokens",
+                        "description": "Number of tokens received from LLM",
+                        "default": 0,
+                        "examples": [
+                            50,
+                            100,
+                            200
+                        ]
+                    },
+                    "available_quotas": {
+                        "additionalProperties": {
+                            "type": "integer"
+                        },
+                        "type": "object",
+                        "title": "Available Quotas",
+                        "description": "Quota available as measured by all configured quota limiters",
+                        "examples": [
+                            {
+                                "daily": 1000,
+                                "monthly": 50000
+                            }
+                        ]
                     }
                 },
                 "type": "object",
@@ -3319,10 +3482,16 @@
                     "response"
                 ],
                 "title": "QueryResponse",
-                "description": "Model representing LLM response to a query.\n\nAttributes:\n    conversation_id: The optional conversation ID (UUID).\n    response: The response.\n    rag_chunks: List of RAG chunks used to generate the response.\n    referenced_documents: The URLs and titles for the documents used to generate the response.\n    tool_calls: List of tool calls made during response generation.\n    TODO: truncated: Whether conversation history was truncated.\n    TODO: input_tokens: Number of tokens sent to LLM.\n    TODO: output_tokens: Number of tokens received from LLM.\n    TODO: available_quotas: Quota available as measured by all configured quota limiters\n    TODO: tool_results: List of tool results.",
+                "description": "Model representing LLM response to a query.\n\nAttributes:\n    conversation_id: The optional conversation ID (UUID).\n    response: The response.\n    rag_chunks: List of RAG chunks used to generate the response.\n    referenced_documents: The URLs and titles for the documents used to generate the response.\n    tool_calls: List of tool calls made during response generation.\n    truncated: Whether conversation history was truncated.\n    input_tokens: Number of tokens sent to LLM.\n    output_tokens: Number of tokens received from LLM.\n    available_quotas: Quota available as measured by all configured quota limiters.",
                 "examples": [
                     {
+                        "available_quotas": {
+                            "daily": 1000,
+                            "monthly": 50000
+                        },
                         "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                        "input_tokens": 150,
+                        "output_tokens": 75,
                         "rag_chunks": [
                             {
                                 "content": "OLM is a component of the Operator Framework toolkit...",
@@ -3347,7 +3516,8 @@
                                 },
                                 "tool_name": "knowledge_search"
                             }
-                        ]
+                        ],
+                        "truncated": false
                     }
                 ]
             },
@@ -3807,4 +3977,4 @@
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/docs/openapi.md b/docs/openapi.md
index 8a23abfc..34c22652 100644
--- a/docs/openapi.md
+++ b/docs/openapi.md
@@ -261,7 +261,8 @@ Raises:
 
 | Status Code | Description | Component |
 |-------------|-------------|-----------|
-| 200 | Streaming response with Server-Sent Events | ...string |
+| 200 | Streaming response with Server-Sent Events | string
+string |
 | 400 | Missing or invalid credentials provided by client | [UnauthorizedResponse](#unauthorizedresponse) |
 | 401 | Unauthorized: Invalid or missing Bearer token for k8s auth | [UnauthorizedResponse](#unauthorizedresponse) |
 | 403 | User is not authorized | [ForbiddenResponse](#forbiddenresponse) |
@@ -695,7 +696,7 @@ metadata:
 | content | string | The actual attachment content |
 
 
-## AuthenticationConfiguration
+## AuthenticationConfiguration-Input
 
 
 Authentication configuration.
@@ -710,7 +711,33 @@ Authentication configuration.
 | jwk_config |  |  |
 
 
-## AuthorizationConfiguration
+## AuthenticationConfiguration-Output
+
+
+Authentication configuration.
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| module | string |  |
+| skip_tls_verification | boolean |  |
+| k8s_cluster_api |  |  |
+| k8s_ca_cert_path |  |  |
+| jwk_config |  |  |
+
+
+## AuthorizationConfiguration-Input
+
+
+Authorization configuration.
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| access_rules | array |  |
+
+
+## AuthorizationConfiguration-Output
 
 
 Authorization configuration.
@@ -1265,7 +1292,7 @@ Supported operators for JSONPath evaluation.
 
 
 
-## JwkConfiguration
+## JwkConfiguration-Input
 
 
 JWK configuration.
@@ -1277,7 +1304,32 @@ JWK configuration.
 | jwt_configuration |  |  |
 
 
-## JwtConfiguration
+## JwkConfiguration-Output
+
+
+JWK configuration.
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| url | string |  |
+| jwt_configuration |  |  |
+
+
+## JwtConfiguration-Input
+
+
+JWT configuration.
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| user_id_claim | string |  |
+| username_claim | string |  |
+| role_rules | array |  |
+
+
+## JwtConfiguration-Output
 
 
 JWT configuration.
@@ -1438,6 +1490,7 @@ Attributes:
     system_prompt: The optional system prompt.
     attachments: The optional attachments.
     no_tools: Whether to bypass all tools and MCP servers (default: False).
+    media_type: The optional media type for response format (application/json or text/plain).
 
 Example:
     ```python
@@ -1454,7 +1507,7 @@ Example:
 | system_prompt |  | The optional system prompt. |
 | attachments |  | The optional list of attachments. |
 | no_tools |  | Whether to bypass all tools and MCP servers |
-| media_type |  | Media type (used just to enable compatibility) |
+| media_type |  | Media type for the response format |
 
 
 ## QueryResponse
@@ -1468,11 +1521,10 @@ Attributes:
     rag_chunks: List of RAG chunks used to generate the response.
     referenced_documents: The URLs and titles for the documents used to generate the response.
     tool_calls: List of tool calls made during response generation.
-    TODO: truncated: Whether conversation history was truncated.
-    TODO: input_tokens: Number of tokens sent to LLM.
-    TODO: output_tokens: Number of tokens received from LLM.
-    TODO: available_quotas: Quota available as measured by all configured quota limiters
-    TODO: tool_results: List of tool results.
+    truncated: Whether conversation history was truncated.
+    input_tokens: Number of tokens sent to LLM.
+    output_tokens: Number of tokens received from LLM.
+    available_quotas: Quota available as measured by all configured quota limiters.
 
 
 | Field | Type | Description |
@@ -1482,6 +1534,10 @@ Attributes:
 | rag_chunks | array | List of RAG chunks used to generate the response |
 | tool_calls |  | List of tool calls made during response generation |
 | referenced_documents | array | List of documents referenced in generating the response |
+| truncated | boolean | Whether conversation history was truncated |
+| input_tokens | integer | Number of tokens sent to LLM |
+| output_tokens | integer | Number of tokens received from LLM |
+| available_quotas | object | Quota available as measured by all configured quota limiters |
 
 
 ## RAGChunk
diff --git a/docs/output.md b/docs/output.md
index 8a23abfc..34c22652 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -261,7 +261,8 @@ Raises:
 
 | Status Code | Description | Component |
 |-------------|-------------|-----------|
-| 200 | Streaming response with Server-Sent Events | ...string |
+| 200 | Streaming response with Server-Sent Events | string
+string |
 | 400 | Missing or invalid credentials provided by client | [UnauthorizedResponse](#unauthorizedresponse) |
 | 401 | Unauthorized: Invalid or missing Bearer token for k8s auth | [UnauthorizedResponse](#unauthorizedresponse) |
 | 403 | User is not authorized | [ForbiddenResponse](#forbiddenresponse) |
@@ -695,7 +696,7 @@ metadata:
 | content | string | The actual attachment content |
 
 
-## AuthenticationConfiguration
+## AuthenticationConfiguration-Input
 
 
 Authentication configuration.
@@ -710,7 +711,33 @@ Authentication configuration.
 | jwk_config |  |  |
 
 
-## AuthorizationConfiguration
+## AuthenticationConfiguration-Output
+
+
+Authentication configuration.
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| module | string |  |
+| skip_tls_verification | boolean |  |
+| k8s_cluster_api |  |  |
+| k8s_ca_cert_path |  |  |
+| jwk_config |  |  |
+
+
+## AuthorizationConfiguration-Input
+
+
+Authorization configuration.
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| access_rules | array |  |
+
+
+## AuthorizationConfiguration-Output
 
 
 Authorization configuration.
@@ -1265,7 +1292,7 @@ Supported operators for JSONPath evaluation.
 
 
 
-## JwkConfiguration
+## JwkConfiguration-Input
 
 
 JWK configuration.
@@ -1277,7 +1304,32 @@ JWK configuration.
 | jwt_configuration |  |  |
 
 
-## JwtConfiguration
+## JwkConfiguration-Output
+
+
+JWK configuration.
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| url | string |  |
+| jwt_configuration |  |  |
+
+
+## JwtConfiguration-Input
+
+
+JWT configuration.
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| user_id_claim | string |  |
+| username_claim | string |  |
+| role_rules | array |  |
+
+
+## JwtConfiguration-Output
 
 
 JWT configuration.
@@ -1438,6 +1490,7 @@ Attributes:
     system_prompt: The optional system prompt.
     attachments: The optional attachments.
     no_tools: Whether to bypass all tools and MCP servers (default: False).
+    media_type: The optional media type for response format (application/json or text/plain).
 
 Example:
     ```python
@@ -1454,7 +1507,7 @@ Example:
 | system_prompt |  | The optional system prompt. |
 | attachments |  | The optional list of attachments. |
 | no_tools |  | Whether to bypass all tools and MCP servers |
-| media_type |  | Media type (used just to enable compatibility) |
+| media_type |  | Media type for the response format |
 
 
 ## QueryResponse
@@ -1468,11 +1521,10 @@ Attributes:
     rag_chunks: List of RAG chunks used to generate the response.
     referenced_documents: The URLs and titles for the documents used to generate the response.
     tool_calls: List of tool calls made during response generation.
-    TODO: truncated: Whether conversation history was truncated.
-    TODO: input_tokens: Number of tokens sent to LLM.
-    TODO: output_tokens: Number of tokens received from LLM.
-    TODO: available_quotas: Quota available as measured by all configured quota limiters
-    TODO: tool_results: List of tool results.
+    truncated: Whether conversation history was truncated.
+    input_tokens: Number of tokens sent to LLM.
+    output_tokens: Number of tokens received from LLM.
+    available_quotas: Quota available as measured by all configured quota limiters.
 
 
 | Field | Type | Description |
@@ -1482,6 +1534,10 @@ Attributes:
 | rag_chunks | array | List of RAG chunks used to generate the response |
 | tool_calls |  | List of tool calls made during response generation |
 | referenced_documents | array | List of documents referenced in generating the response |
+| truncated | boolean | Whether conversation history was truncated |
+| input_tokens | integer | Number of tokens sent to LLM |
+| output_tokens | integer | Number of tokens received from LLM |
+| available_quotas | object | Quota available as measured by all configured quota limiters |
 
 
 ## RAGChunk