diff --git a/docs/openapi.json b/docs/openapi.json index 8dae125f8..e0df7d8a9 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -6176,8 +6176,7 @@ "content": { "text/event-stream": { "schema": { - "type": "string", - "format": "text/event-stream" + "type": "string" }, "example": "data: {\"event\": \"start\", \"data\": {\"conversation_id\": \"123e4567-e89b-12d3-a456-426614174000\", \"request_id\": \"123e4567-e89b-12d3-a456-426614174001\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 0, \"token\": \"No Violation\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 1, \"token\": \"\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 2, \"token\": \"Hello\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 3, \"token\": \"!\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 4, \"token\": \" How\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 5, \"token\": \" can\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 6, \"token\": \" I\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 7, \"token\": \" assist\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 8, \"token\": \" you\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 9, \"token\": \" today\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 10, \"token\": \"?\"}}\n\ndata: {\"event\": \"turn_complete\", \"data\": {\"token\": \"Hello! How can I assist you today?\"}}\n\ndata: {\"event\": \"end\", \"data\": {\"referenced_documents\": [], \"truncated\": null, \"input_tokens\": 11, \"output_tokens\": 19}, \"available_quotas\": {}}\n\n" } @@ -12048,7 +12047,7 @@ "configuration" ], "title": "ConfigurationResponse", - "description": "Success response model for the config endpoint.", + "description": "Success response model for the config endpoint.\n\nAttributes:\n configuration: Parsed application configuration returned to the client.", "examples": [ { "configuration": { @@ -12624,7 +12623,7 @@ "message" ], "title": "ConversationUpdateResponse", - "description": "Model representing a response for updating a conversation topic summary.\n\nAttributes:\n conversation_id: The conversation ID (UUID) that was updated.\n success: Whether the update was successful.\n message: A message about the update result.\n\nExample:\n ```python\n update_response = ConversationUpdateResponse(\n conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n success=True,\n message=\"Topic summary updated successfully\",\n )\n ```", + "description": "Model representing a response for updating a conversation topic summary.\n\nAttributes:\n conversation_id: The conversation ID (UUID) that was updated.\n success: Whether the update was successful.\n message: A message about the update result.", "examples": [ { "conversation_id": "123e4567-e89b-12d3-a456-426614174000", @@ -13012,7 +13011,7 @@ "response" ], "title": "FeedbackResponse", - "description": "Model representing a response to a feedback request.\n\nAttributes:\n response: The response of the feedback request.\n\nExample:\n ```python\n feedback_response = FeedbackResponse(response=\"feedback received\")\n ```", + "description": "Model representing a response to a feedback request.\n\nAttributes:\n response: The response of the feedback request.", "examples": [ { "response": "feedback received" @@ -13050,7 +13049,7 @@ "status" ], "title": "FeedbackStatusUpdateResponse", - "description": "Model representing a response to a feedback status update request.\n\nAttributes:\n status: The previous and current status of the service and who updated it.\n\nExample:\n ```python\n status_response = StatusResponse(\n status={\n \"previous_status\": true,\n \"updated_status\": false,\n \"updated_by\": \"user/test\",\n \"timestamp\": \"2023-03-15 12:34:56\"\n },\n )\n ```", + "description": "Model representing a response to a feedback status update request.\n\nAttributes:\n status: The previous and current status of the service and who updated it.", "examples": [ { "status": { @@ -13424,7 +13423,7 @@ "llama_stack_version" ], "title": "InfoResponse", - "description": "Model representing a response to an info request.\n\nAttributes:\n name: Service name.\n service_version: Service version.\n llama_stack_version: Llama Stack version.\n\nExample:\n ```python\n info_response = InfoResponse(\n name=\"Lightspeed Stack\",\n service_version=\"1.0.0\",\n llama_stack_version=\"0.2.22\",\n )\n ```", + "description": "Model representing a response to an info request.\n\nAttributes:\n name: Service name.\n service_version: Service version.\n llama_stack_version: Llama Stack version.", "examples": [ { "llama_stack_version": "1.0.0", @@ -13639,7 +13638,7 @@ "alive" ], "title": "LivenessResponse", - "description": "Model representing a response to a liveness request.\n\nAttributes:\n alive: If app is alive.\n\nExample:\n ```python\n liveness_response = LivenessResponse(alive=True)\n ```", + "description": "Model representing a response to a liveness request.\n\nAttributes:\n alive: If app is alive.", "examples": [ { "alive": true @@ -13726,7 +13725,7 @@ }, "type": "object", "title": "MCPClientAuthOptionsResponse", - "description": "Response containing MCP servers that accept client-provided authorization.", + "description": "Response containing MCP servers that accept client-provided authorization.\n\nAttributes:\n servers: MCP servers that declare client authentication headers.", "examples": [ { "servers": [ @@ -13821,7 +13820,7 @@ "message" ], "title": "MCPServerDeleteResponse", - "description": "Response for a successful MCP server deletion.", + "description": "Response for a successful MCP server deletion.\n\nAttributes:\n name: Deleted MCP server name.\n message: Status message.", "examples": [ { "message": "MCP server 'test-mcp-server' unregistered successfully", @@ -13879,7 +13878,7 @@ }, "type": "object", "title": "MCPServerListResponse", - "description": "Response listing all registered MCP servers.", + "description": "Response listing all registered MCP servers.\n\nAttributes:\n servers: All registered MCP servers (static and dynamic).", "examples": [ { "servers": [ @@ -14063,7 +14062,7 @@ "message" ], "title": "MCPServerRegistrationResponse", - "description": "Response for a successful MCP server registration.", + "description": "Response for a successful MCP server registration.\n\nAttributes:\n name: Registered MCP server name.\n url: Registered MCP server URL.\n provider_id: MCP provider identification.\n message: Status message.", "examples": [ { "message": "MCP server 'mcp-integration-tools' registered successfully", @@ -16494,7 +16493,7 @@ "response" ], "title": "PromptDeleteResponse", - "description": "Result of deleting a stored prompt (always HTTP 200, like conversations v2).", + "description": "Result of deleting a stored prompt (always HTTP 200, like conversations v2).\n\nAttributes:\n prompt_id: Prompt identifier that was passed to delete.\n deleted: Whether the prompt was deleted successfully\n response: Human readable response", "examples": [ { "label": "deleted", @@ -16573,7 +16572,7 @@ "version" ], "title": "PromptResourceResponse", - "description": "A stored prompt template as returned by Llama Stack.", + "description": "A stored prompt template as returned by Llama Stack.\n\nAttributes:\n prompt_id: Prompt identifier from Llama Stack.\n version: Version number for this prompt.\n is_default: Whether this version is the default.\n prompt: Prompt text with placeholders.\n variables: Variable names used in the template.", "examples": [ { "is_default": true, @@ -16711,7 +16710,7 @@ "additionalProperties": false, "type": "object", "title": "PromptsListResponse", - "description": "List of stored prompt templates returned by Llama Stack.", + "description": "List of stored prompt templates returned by Llama Stack.\n\nAttributes:\n data: Prompt entries as returned by the Llama Stack list API.", "examples": [ { "data": [ @@ -17184,7 +17183,7 @@ "truncated": { "type": "boolean", "title": "Truncated", - "description": "Deprecated:Whether conversation history was truncated", + "description": "Deprecated: whether conversation history was truncated", "default": false, "examples": [ false, @@ -17767,7 +17766,7 @@ "providers" ], "title": "ReadinessResponse", - "description": "Model representing response to a readiness request.\n\nAttributes:\n ready: If service is ready.\n reason: The reason for the readiness.\n providers: List of unhealthy providers in case of readiness failure.\n\nExample:\n ```python\n readiness_response = ReadinessResponse(\n ready=False,\n reason=\"Service is not ready\",\n providers=[\n ProviderHealthStatus(\n provider_id=\"ollama\",\n status=\"unhealthy\",\n message=\"Server is unavailable\"\n )\n ]\n )\n ```", + "description": "Model representing response to a readiness request.\n\nAttributes:\n ready: If service is ready.\n reason: The reason for the readiness.\n providers: List of unhealthy providers in case of readiness failure.", "examples": [ { "providers": [], @@ -19392,7 +19391,7 @@ "status" ], "title": "StatusResponse", - "description": "Model representing a response to a status request.\n\nAttributes:\n functionality: The functionality of the service.\n status: The status of the service.\n\nExample:\n ```python\n status_response = StatusResponse(\n functionality=\"feedback\",\n status={\"enabled\": True},\n )\n ```", + "description": "Model representing a response to a status request.\n\nAttributes:\n functionality: The functionality of the service.\n status: The status of the service.", "examples": [ { "functionality": "feedback", @@ -19460,7 +19459,7 @@ "message" ], "title": "StreamingInterruptResponse", - "description": "Model representing a response to a streaming interrupt request.\n\nAttributes:\n request_id: The streaming request ID targeted by the interrupt call.\n interrupted: Whether an in-progress stream was interrupted.\n message: Human-readable interruption status message.\n\nExample:\n ```python\n response = StreamingInterruptResponse(\n request_id=\"123e4567-e89b-12d3-a456-426614174000\",\n interrupted=True,\n message=\"Streaming request interrupted\",\n )\n ```", + "description": "Model representing a response to a streaming interrupt request.\n\nAttributes:\n request_id: The streaming request ID targeted by the interrupt call.\n interrupted: Whether an in-progress stream was interrupted.\n message: Human-readable interruption status message.", "examples": [ { "interrupted": true, diff --git a/src/app/endpoints/authorized.py b/src/app/endpoints/authorized.py index 63a3b5b06..175c42a1f 100644 --- a/src/app/endpoints/authorized.py +++ b/src/app/endpoints/authorized.py @@ -7,15 +7,13 @@ from authentication import get_auth_dependency from authentication.interface import AuthTuple from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, ServiceUnavailableResponse, UnauthorizedResponse, ) -from models.responses import ( - AuthorizedResponse, -) +from models.api.responses.successful import AuthorizedResponse logger = get_logger(__name__) router = APIRouter(tags=["authorized"]) diff --git a/src/app/endpoints/config.py b/src/app/endpoints/config.py index a1190a959..21dea5097 100644 --- a/src/app/endpoints/config.py +++ b/src/app/endpoints/config.py @@ -9,17 +9,15 @@ from authorization.middleware import authorize from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, ServiceUnavailableResponse, UnauthorizedResponse, ) +from models.api.responses.successful import ConfigurationResponse from models.config import Action -from models.responses import ( - ConfigurationResponse, -) from utils.endpoints import check_configuration_loaded logger = get_logger(__name__) diff --git a/src/app/endpoints/conversations_v1.py b/src/app/endpoints/conversations_v1.py index 9cde18542..937d6a9e8 100644 --- a/src/app/endpoints/conversations_v1.py +++ b/src/app/endpoints/conversations_v1.py @@ -16,8 +16,8 @@ from client import AsyncLlamaStackClientHolder from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( BadRequestResponse, ForbiddenResponse, InternalServerErrorResponse, @@ -25,18 +25,18 @@ ServiceUnavailableResponse, UnauthorizedResponse, ) -from models.config import Action -from models.database.conversations import ( - UserConversation, -) -from models.requests import ConversationUpdateRequest -from models.responses import ( +from models.api.responses.successful import ( ConversationDeleteResponse, - ConversationDetails, ConversationResponse, ConversationsListResponse, ConversationUpdateResponse, ) +from models.common import ConversationDetails +from models.config import Action +from models.database.conversations import ( + UserConversation, +) +from models.requests import ConversationUpdateRequest from utils.conversations import ( build_conversation_turns_from_items, get_all_conversation_items, diff --git a/src/app/endpoints/conversations_v2.py b/src/app/endpoints/conversations_v2.py index 773ce4a0a..4109ee40d 100644 --- a/src/app/endpoints/conversations_v2.py +++ b/src/app/endpoints/conversations_v2.py @@ -8,8 +8,8 @@ from authorization.middleware import authorize from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( BadRequestResponse, ForbiddenResponse, InternalServerErrorResponse, @@ -17,17 +17,19 @@ ServiceUnavailableResponse, UnauthorizedResponse, ) -from models.cache_entry import CacheEntry -from models.config import Action -from models.requests import ConversationUpdateRequest -from models.responses import ( +from models.api.responses.successful import ( ConversationDeleteResponse, ConversationResponse, ConversationsListResponseV2, - ConversationTurn, ConversationUpdateResponse, +) +from models.cache_entry import CacheEntry +from models.common import ( + ConversationTurn, Message, ) +from models.config import Action +from models.requests import ConversationUpdateRequest from utils.endpoints import check_configuration_loaded from utils.suid import check_suid diff --git a/src/app/endpoints/feedback.py b/src/app/endpoints/feedback.py index 8779a6189..94cdd6a42 100644 --- a/src/app/endpoints/feedback.py +++ b/src/app/endpoints/feedback.py @@ -13,21 +13,21 @@ from authorization.middleware import authorize from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, NotFoundResponse, ServiceUnavailableResponse, UnauthorizedResponse, ) -from models.config import Action -from models.requests import FeedbackRequest, FeedbackStatusUpdateRequest -from models.responses import ( +from models.api.responses.successful import ( FeedbackResponse, FeedbackStatusUpdateResponse, StatusResponse, ) +from models.config import Action +from models.requests import FeedbackRequest, FeedbackStatusUpdateRequest from utils.endpoints import check_configuration_loaded, retrieve_conversation from utils.suid import get_suid diff --git a/src/app/endpoints/health.py b/src/app/endpoints/health.py index 57e349990..d2f56efc7 100644 --- a/src/app/endpoints/health.py +++ b/src/app/endpoints/health.py @@ -17,18 +17,18 @@ from client import AsyncLlamaStackClientHolder from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, ServiceUnavailableResponse, UnauthorizedResponse, ) -from models.config import Action -from models.responses import ( +from models.api.responses.successful import ( LivenessResponse, - ProviderHealthStatus, ReadinessResponse, ) +from models.common import ProviderHealthStatus +from models.config import Action logger = get_logger(__name__) router = APIRouter(tags=["health"]) diff --git a/src/app/endpoints/info.py b/src/app/endpoints/info.py index a58fdd1ec..2acd89b03 100644 --- a/src/app/endpoints/info.py +++ b/src/app/endpoints/info.py @@ -11,16 +11,14 @@ from client import AsyncLlamaStackClientHolder from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, ServiceUnavailableResponse, UnauthorizedResponse, ) +from models.api.responses.successful import InfoResponse from models.config import Action -from models.responses import ( - InfoResponse, -) from version import __version__ logger = get_logger(__name__) diff --git a/src/app/endpoints/mcp_auth.py b/src/app/endpoints/mcp_auth.py index 85ca8ac48..62aea7615 100644 --- a/src/app/endpoints/mcp_auth.py +++ b/src/app/endpoints/mcp_auth.py @@ -10,18 +10,16 @@ from authorization.middleware import authorize from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, ServiceUnavailableResponse, UnauthorizedResponse, ) +from models.api.responses.successful import MCPClientAuthOptionsResponse +from models.common import MCPServerAuthInfo from models.config import Action -from models.responses import ( - MCPClientAuthOptionsResponse, - MCPServerAuthInfo, -) from utils.endpoints import check_configuration_loaded logger = get_logger(__name__) diff --git a/src/app/endpoints/mcp_servers.py b/src/app/endpoints/mcp_servers.py index 90b524760..5f34bea38 100644 --- a/src/app/endpoints/mcp_servers.py +++ b/src/app/endpoints/mcp_servers.py @@ -11,8 +11,8 @@ from client import AsyncLlamaStackClientHolder from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ConflictResponse, ForbiddenResponse, InternalServerErrorResponse, @@ -20,14 +20,14 @@ ServiceUnavailableResponse, UnauthorizedResponse, ) -from models.config import Action, ModelContextProtocolServer -from models.requests import MCPServerRegistrationRequest -from models.responses import ( +from models.api.responses.successful import ( MCPServerDeleteResponse, - MCPServerInfo, MCPServerListResponse, MCPServerRegistrationResponse, ) +from models.common import MCPServerInfo +from models.config import Action, ModelContextProtocolServer +from models.requests import MCPServerRegistrationRequest from utils.endpoints import check_configuration_loaded logger = get_logger(__name__) diff --git a/src/app/endpoints/metrics.py b/src/app/endpoints/metrics.py index 134f88944..c33a6866c 100644 --- a/src/app/endpoints/metrics.py +++ b/src/app/endpoints/metrics.py @@ -13,8 +13,8 @@ from authentication.interface import AuthTuple from authorization.middleware import authorize from metrics.utils import setup_model_metrics -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, ServiceUnavailableResponse, diff --git a/src/app/endpoints/models.py b/src/app/endpoints/models.py index 59b6e4178..ddd093214 100644 --- a/src/app/endpoints/models.py +++ b/src/app/endpoints/models.py @@ -12,18 +12,16 @@ from client import AsyncLlamaStackClientHolder from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, ServiceUnavailableResponse, UnauthorizedResponse, ) +from models.api.responses.successful import ModelsResponse from models.config import Action from models.requests import ModelFilter -from models.responses import ( - ModelsResponse, -) from utils.endpoints import check_configuration_loaded logger = get_logger(__name__) diff --git a/src/app/endpoints/prompts.py b/src/app/endpoints/prompts.py index 0c66d4768..956bb3029 100644 --- a/src/app/endpoints/prompts.py +++ b/src/app/endpoints/prompts.py @@ -13,8 +13,8 @@ from client import AsyncLlamaStackClientHolder from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( BadRequestResponse, ForbiddenResponse, InternalServerErrorResponse, @@ -22,13 +22,13 @@ ServiceUnavailableResponse, UnauthorizedResponse, ) -from models.config import Action -from models.requests import PromptCreateRequest, PromptUpdateRequest -from models.responses import ( +from models.api.responses.successful import ( PromptDeleteResponse, PromptResourceResponse, PromptsListResponse, ) +from models.config import Action +from models.requests import PromptCreateRequest, PromptUpdateRequest from utils.endpoints import check_configuration_loaded from utils.query import handle_known_apistatus_errors from utils.suid import check_suid_prompt diff --git a/src/app/endpoints/providers.py b/src/app/endpoints/providers.py index b2060de86..0d7592ae0 100644 --- a/src/app/endpoints/providers.py +++ b/src/app/endpoints/providers.py @@ -13,19 +13,19 @@ from client import AsyncLlamaStackClientHolder from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, NotFoundResponse, ServiceUnavailableResponse, UnauthorizedResponse, ) -from models.config import Action -from models.responses import ( +from models.api.responses.successful import ( ProviderResponse, ProvidersListResponse, ) +from models.config import Action from utils.endpoints import check_configuration_loaded logger = get_logger(__name__) diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py index 096feef5e..8a04a4d2a 100644 --- a/src/app/endpoints/query.py +++ b/src/app/endpoints/query.py @@ -24,8 +24,8 @@ from configuration import configuration from constants import ENDPOINT_PATH_QUERY from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, NotFoundResponse, @@ -35,12 +35,12 @@ UnauthorizedResponse, UnprocessableEntityResponse, ) +from models.api.responses.successful import QueryResponse +from models.common.moderation import ShieldModerationResult from models.common.responses.responses_api_params import ResponsesApiParams +from models.common.turn_summary import TurnSummary from models.config import Action from models.requests import QueryRequest -from models.responses import ( - QueryResponse, -) from utils.conversations import append_turn_items_to_conversation from utils.endpoints import ( check_configuration_loaded, @@ -68,10 +68,6 @@ ) from utils.shields import run_shield_moderation, validate_shield_ids_override from utils.suid import normalize_conversation_id -from utils.types import ( - ShieldModerationResult, - TurnSummary, -) from utils.vector_search import build_rag_context logger = get_logger(__name__) diff --git a/src/app/endpoints/rags.py b/src/app/endpoints/rags.py index 955a4b447..c60c6db64 100644 --- a/src/app/endpoints/rags.py +++ b/src/app/endpoints/rags.py @@ -12,19 +12,19 @@ from client import AsyncLlamaStackClientHolder from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, NotFoundResponse, ServiceUnavailableResponse, UnauthorizedResponse, ) -from models.config import Action, ByokRag -from models.responses import ( +from models.api.responses.successful import ( RAGInfoResponse, RAGListResponse, ) +from models.config import Action, ByokRag from utils.endpoints import check_configuration_loaded logger = get_logger(__name__) diff --git a/src/app/endpoints/responses.py b/src/app/endpoints/responses.py index 34f819199..6c3757de4 100644 --- a/src/app/endpoints/responses.py +++ b/src/app/endpoints/responses.py @@ -38,8 +38,8 @@ from configuration import configuration from constants import ENDPOINT_PATH_RESPONSES, SUBSTITUTED_INSTRUCTIONS_PLACEHOLDER from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH +from models.api.responses.error import ( ConflictResponse, ForbiddenResponse, InternalServerErrorResponse, @@ -50,13 +50,13 @@ UnauthorizedResponse, UnprocessableEntityResponse, ) +from models.api.responses.successful import ResponsesResponse +from models.common.moderation import ShieldModerationBlocked from models.common.responses.responses_api_params import ResponsesApiParams from models.common.responses.responses_context import ResponsesContext +from models.common.turn_summary import TurnSummary from models.config import Action from models.requests import ResponsesRequest -from models.responses import ( - ResponsesResponse, -) from observability import ResponsesEventData, build_responses_event, send_splunk_event from utils.conversations import append_turn_items_to_conversation from utils.endpoints import ( @@ -100,10 +100,6 @@ normalize_conversation_id, ) from utils.tool_formatter import translate_vector_store_ids_to_user_facing -from utils.types import ( - ShieldModerationBlocked, - TurnSummary, -) from utils.vector_search import ( append_inline_rag_context_to_responses_input, build_rag_context, diff --git a/src/app/endpoints/rlsapi_v1.py b/src/app/endpoints/rlsapi_v1.py index 72d7958c3..08555a4d5 100644 --- a/src/app/endpoints/rlsapi_v1.py +++ b/src/app/endpoints/rlsapi_v1.py @@ -26,8 +26,8 @@ from constants import ENDPOINT_PATH_INFER from log import get_logger from metrics import recording -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, NotFoundResponse, diff --git a/src/app/endpoints/root.py b/src/app/endpoints/root.py index e966a5cef..5b88f8b87 100644 --- a/src/app/endpoints/root.py +++ b/src/app/endpoints/root.py @@ -9,8 +9,8 @@ from authentication.interface import AuthTuple from authorization.middleware import authorize from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, ServiceUnavailableResponse, UnauthorizedResponse, diff --git a/src/app/endpoints/shields.py b/src/app/endpoints/shields.py index 779a73ee9..480e02d50 100644 --- a/src/app/endpoints/shields.py +++ b/src/app/endpoints/shields.py @@ -12,17 +12,15 @@ from client import AsyncLlamaStackClientHolder from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, ServiceUnavailableResponse, UnauthorizedResponse, ) +from models.api.responses.successful import ShieldsResponse from models.config import Action -from models.responses import ( - ShieldsResponse, -) from utils.endpoints import check_configuration_loaded logger = get_logger(__name__) diff --git a/src/app/endpoints/stream_interrupt.py b/src/app/endpoints/stream_interrupt.py index d4ee6239a..7c58b21ad 100644 --- a/src/app/endpoints/stream_interrupt.py +++ b/src/app/endpoints/stream_interrupt.py @@ -7,18 +7,16 @@ from authentication import get_auth_dependency from authentication.interface import AuthTuple from authorization.middleware import authorize -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, NotFoundResponse, ServiceUnavailableResponse, UnauthorizedResponse, ) +from models.api.responses.successful import StreamingInterruptResponse from models.config import Action from models.requests import StreamingInterruptRequest -from models.responses import ( - StreamingInterruptResponse, -) from utils.stream_interrupts import ( CancelStreamResult, StreamInterruptRegistry, diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py index 1ac7a73d4..faccdc920 100644 --- a/src/app/endpoints/streaming_query.py +++ b/src/app/endpoints/streaming_query.py @@ -60,8 +60,8 @@ ) from log import get_logger from metrics import recording -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH +from models.api.responses.error import ( AbstractErrorResponse, ForbiddenResponse, InternalServerErrorResponse, @@ -72,13 +72,12 @@ UnauthorizedResponse, UnprocessableEntityResponse, ) +from models.api.responses.successful import StreamingQueryResponse from models.common.responses.responses_api_params import ResponsesApiParams +from models.common.turn_summary import ReferencedDocument, TurnSummary from models.config import Action from models.context import ResponseGeneratorContext from models.requests import QueryRequest -from models.responses import ( - StreamingQueryResponse, -) from utils.conversations import append_turn_items_to_conversation from utils.endpoints import ( check_configuration_loaded, @@ -119,7 +118,6 @@ from utils.stream_interrupts import get_stream_interrupt_registry from utils.suid import get_suid, normalize_conversation_id from utils.token_counter import TokenCounter -from utils.types import ReferencedDocument, TurnSummary from utils.vector_search import build_rag_context logger = get_logger(__name__) @@ -832,7 +830,8 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat # Completed response - capture final text and response object elif event_type == "response.completed": latest_response_object = cast( - OpenAIResponseObject, getattr(chunk, "response") # noqa: B009 + OpenAIResponseObject, + getattr(chunk, "response"), # noqa: B009 ) turn_summary.llm_response = turn_summary.llm_response or "".join(text_parts) yield stream_event( @@ -848,7 +847,8 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat # Incomplete or failed response - emit error elif event_type in ("response.incomplete", "response.failed"): latest_response_object = cast( - OpenAIResponseObject, getattr(chunk, "response") # noqa: B009 + OpenAIResponseObject, + getattr(chunk, "response"), # noqa: B009 ) error_message = ( latest_response_object.error.message diff --git a/src/app/endpoints/tools.py b/src/app/endpoints/tools.py index eac339e36..222e1fc7a 100644 --- a/src/app/endpoints/tools.py +++ b/src/app/endpoints/tools.py @@ -11,17 +11,15 @@ from client import AsyncLlamaStackClientHolder from configuration import configuration from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, ServiceUnavailableResponse, UnauthorizedResponse, ) +from models.api.responses.successful import ToolsResponse from models.config import Action -from models.responses import ( - ToolsResponse, -) from utils.endpoints import check_configuration_loaded from utils.mcp_headers import ( McpHeaders, diff --git a/src/app/endpoints/vector_stores.py b/src/app/endpoints/vector_stores.py index 34f3090ea..0d24c9c6a 100644 --- a/src/app/endpoints/vector_stores.py +++ b/src/app/endpoints/vector_stores.py @@ -22,8 +22,8 @@ from configuration import configuration from constants import DEFAULT_MAX_FILE_UPLOAD_SIZE from log import get_logger -from models.api.responses import ( - UNAUTHORIZED_OPENAPI_EXAMPLES, +from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES +from models.api.responses.error import ( FileTooLargeResponse, ForbiddenResponse, InternalServerErrorResponse, @@ -31,13 +31,7 @@ ServiceUnavailableResponse, UnauthorizedResponse, ) -from models.config import Action -from models.requests import ( - VectorStoreCreateRequest, - VectorStoreFileCreateRequest, - VectorStoreUpdateRequest, -) -from models.responses import ( +from models.api.responses.successful import ( FileResponse, VectorStoreDeleteResponse, VectorStoreFileDeleteResponse, @@ -46,6 +40,12 @@ VectorStoreResponse, VectorStoresListResponse, ) +from models.config import Action +from models.requests import ( + VectorStoreCreateRequest, + VectorStoreFileCreateRequest, + VectorStoreUpdateRequest, +) from utils.endpoints import check_configuration_loaded from utils.query import handle_known_apistatus_errors diff --git a/src/app/main.py b/src/app/main.py index 6b68c54bb..af42dd9f5 100644 --- a/src/app/main.py +++ b/src/app/main.py @@ -23,7 +23,7 @@ from configuration import configuration from log import get_logger from metrics import recording -from models.api.responses import InternalServerErrorResponse +from models.api.responses.error import InternalServerErrorResponse from sentry import initialize_sentry from utils.common import register_mcp_servers_async from utils.llama_stack_version import check_llama_stack_version diff --git a/src/authentication/jwk_token.py b/src/authentication/jwk_token.py index 756cb593a..7cc15870b 100644 --- a/src/authentication/jwk_token.py +++ b/src/authentication/jwk_token.py @@ -22,7 +22,7 @@ DEFAULT_VIRTUAL_PATH, ) from log import get_logger -from models.api.responses import UnauthorizedResponse +from models.api.responses.error import UnauthorizedResponse from models.config import JwkConfiguration logger = get_logger(__name__) diff --git a/src/authentication/k8s.py b/src/authentication/k8s.py index e3ac0c9de..5d59d7844 100644 --- a/src/authentication/k8s.py +++ b/src/authentication/k8s.py @@ -14,7 +14,7 @@ from configuration import configuration from constants import DEFAULT_VIRTUAL_PATH from log import get_logger -from models.api.responses import ( +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, ServiceUnavailableResponse, diff --git a/src/authentication/utils.py b/src/authentication/utils.py index 26da5c049..aad460c00 100644 --- a/src/authentication/utils.py +++ b/src/authentication/utils.py @@ -3,7 +3,7 @@ from fastapi import HTTPException from starlette.datastructures import Headers -from models.api.responses import UnauthorizedResponse +from models.api.responses.error import UnauthorizedResponse def extract_user_token(headers: Headers) -> str: diff --git a/src/authorization/middleware.py b/src/authorization/middleware.py index f29fa7cc5..2aaa8d415 100644 --- a/src/authorization/middleware.py +++ b/src/authorization/middleware.py @@ -18,7 +18,7 @@ ) from configuration import configuration from log import get_logger -from models.api.responses import ( +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, ) diff --git a/src/cache/cache.py b/src/cache/cache.py index 461d94b6b..b0122ada9 100644 --- a/src/cache/cache.py +++ b/src/cache/cache.py @@ -3,7 +3,7 @@ from abc import ABC, abstractmethod from models.cache_entry import CacheEntry -from models.responses import ConversationData +from models.common import ConversationData from utils.suid import check_suid diff --git a/src/cache/in_memory_cache.py b/src/cache/in_memory_cache.py index c6b017289..cf2b85a76 100644 --- a/src/cache/in_memory_cache.py +++ b/src/cache/in_memory_cache.py @@ -3,8 +3,8 @@ from cache.cache import Cache from log import get_logger from models.cache_entry import CacheEntry +from models.common import ConversationData from models.config import InMemoryCacheConfig -from models.responses import ConversationData from utils.connection_decorator import connection logger = get_logger(__name__) diff --git a/src/cache/noop_cache.py b/src/cache/noop_cache.py index fb76f6679..e7426885b 100644 --- a/src/cache/noop_cache.py +++ b/src/cache/noop_cache.py @@ -3,7 +3,7 @@ from cache.cache import Cache from log import get_logger from models.cache_entry import CacheEntry -from models.responses import ConversationData +from models.common import ConversationData from utils.connection_decorator import connection logger = get_logger(__name__) diff --git a/src/cache/postgres_cache.py b/src/cache/postgres_cache.py index 4503c9eb3..b51b06893 100644 --- a/src/cache/postgres_cache.py +++ b/src/cache/postgres_cache.py @@ -9,10 +9,14 @@ from cache.cache_error import CacheError from log import get_logger from models.cache_entry import CacheEntry +from models.common import ConversationData +from models.common.turn_summary import ( + ReferencedDocument, + ToolCallSummary, + ToolResultSummary, +) from models.config import PostgreSQLDatabaseConfiguration -from models.responses import ConversationData from utils.connection_decorator import connection -from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary logger = get_logger(__name__) @@ -166,8 +170,7 @@ def connect(self) -> None: raise ValueError(f"Invalid namespace: {namespace}") if len(namespace) > 63: raise ValueError( - f"Invalid namespace: {namespace}. " - "Maximum length is 63 characters." + f"Invalid namespace: {namespace}. Maximum length is 63 characters." ) try: self.connection = psycopg2.connect( @@ -305,8 +308,7 @@ def get( # pylint: disable=R0914 ] except (ValueError, TypeError) as e: logger.warning( - "Failed to deserialize tool_calls for " - "conversation %s: %s", + "Failed to deserialize tool_calls for conversation %s: %s", conversation_id, e, ) diff --git a/src/cache/sqlite_cache.py b/src/cache/sqlite_cache.py index 25aafd4a8..ee1c4c2e4 100644 --- a/src/cache/sqlite_cache.py +++ b/src/cache/sqlite_cache.py @@ -8,10 +8,14 @@ from cache.cache_error import CacheError from log import get_logger from models.cache_entry import CacheEntry +from models.common import ConversationData +from models.common.turn_summary import ( + ReferencedDocument, + ToolCallSummary, + ToolResultSummary, +) from models.config import SQLiteDatabaseConfiguration -from models.responses import ConversationData from utils.connection_decorator import connection -from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary logger = get_logger(__name__) @@ -357,8 +361,7 @@ def insert_or_append( referenced_documents_json = json.dumps(docs_as_dicts) except (TypeError, ValueError) as e: logger.warning( - "Failed to serialize referenced_documents for " - "conversation %s: %s", + "Failed to serialize referenced_documents for conversation %s: %s", conversation_id, e, ) diff --git a/src/client.py b/src/client.py index 8ece5d374..a503c0094 100644 --- a/src/client.py +++ b/src/client.py @@ -13,7 +13,7 @@ from configuration import configuration from llama_stack_configuration import YamlDumper, enrich_byok_rag, enrich_solr from log import get_logger -from models.api.responses import ServiceUnavailableResponse +from models.api.responses.error import ServiceUnavailableResponse from models.config import LlamaStackConfiguration from utils.types import Singleton diff --git a/src/metrics/utils.py b/src/metrics/utils.py index 0456c47e6..806e7a336 100644 --- a/src/metrics/utils.py +++ b/src/metrics/utils.py @@ -7,7 +7,7 @@ from client import AsyncLlamaStackClientHolder from configuration import configuration from log import get_logger -from models.api.responses import ServiceUnavailableResponse +from models.api.responses.error import ServiceUnavailableResponse from utils.common import run_once_async from utils.endpoints import check_configuration_loaded diff --git a/src/models/api/README.md b/src/models/api/README.md index 58243fa92..29e945e42 100644 --- a/src/models/api/README.md +++ b/src/models/api/README.md @@ -1,5 +1,7 @@ # List of source files stored in `src/models/api` directory ## [__init__.py](__init__.py) -Typed HTTP API models (OpenAPI-oriented) for FastAPI routes. +Typed HTTP API models (OpenAPI-oriented) for FastAPI routes. Exposes the [`responses`](responses/README.md) subpackage. +## [responses/](responses/README.md) +HTTP response shapes (successful payloads, errors, and OpenAPI description constants). diff --git a/src/models/api/responses/README.md b/src/models/api/responses/README.md index dff4ff4c1..91ece582b 100644 --- a/src/models/api/responses/README.md +++ b/src/models/api/responses/README.md @@ -6,3 +6,8 @@ HTTP response models and shared OpenAPI description constants. ## [constants.py](constants.py) OpenAPI description strings and shared example-label lists for API responses. +## [error/](error/README.md) +Structured HTTP error response models for OpenAPI documentation. + +## [successful/](successful/README.md) +Concrete successful HTTP response models grouped by domain (barrel in `successful/__init__.py`). diff --git a/src/models/api/responses/__init__.py b/src/models/api/responses/__init__.py index 0ead1c15e..3a48064fe 100644 --- a/src/models/api/responses/__init__.py +++ b/src/models/api/responses/__init__.py @@ -1,65 +1,5 @@ """HTTP response models and shared OpenAPI description constants.""" -from models.api.responses.constants import ( - BAD_REQUEST_DESCRIPTION, - CONFLICT_DESCRIPTION, - FILE_UPLOAD_EXCEEDS_SIZE_LIMIT_DESCRIPTION, - FORBIDDEN_DESCRIPTION, - INTERNAL_SERVER_ERROR_DESCRIPTION, - INVALID_FEEDBACK_PATH_DESCRIPTION, - NOT_FOUND_DESCRIPTION, - PROMPT_TOO_LONG_DESCRIPTION, - QUOTA_EXCEEDED_DESCRIPTION, - SERVICE_UNAVAILABLE_DESCRIPTION, - SUCCESSFUL_RESPONSE_DESCRIPTION, - UNAUTHORIZED_DESCRIPTION, - UNAUTHORIZED_OPENAPI_EXAMPLES, - UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH, - UNPROCESSABLE_CONTENT_DESCRIPTION, -) -from models.api.responses.error import ( - AbstractErrorResponse, - BadRequestResponse, - ConflictResponse, - DetailModel, - FileTooLargeResponse, - ForbiddenResponse, - InternalServerErrorResponse, - NotFoundResponse, - PromptTooLongResponse, - QuotaExceededResponse, - ServiceUnavailableResponse, - UnauthorizedResponse, - UnprocessableEntityResponse, -) +from models.api.responses import constants, error, successful -__all__ = [ - "BAD_REQUEST_DESCRIPTION", - "CONFLICT_DESCRIPTION", - "FILE_UPLOAD_EXCEEDS_SIZE_LIMIT_DESCRIPTION", - "FORBIDDEN_DESCRIPTION", - "INTERNAL_SERVER_ERROR_DESCRIPTION", - "INVALID_FEEDBACK_PATH_DESCRIPTION", - "NOT_FOUND_DESCRIPTION", - "PROMPT_TOO_LONG_DESCRIPTION", - "QUOTA_EXCEEDED_DESCRIPTION", - "SERVICE_UNAVAILABLE_DESCRIPTION", - "SUCCESSFUL_RESPONSE_DESCRIPTION", - "UNAUTHORIZED_DESCRIPTION", - "UNAUTHORIZED_OPENAPI_EXAMPLES", - "UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH", - "UNPROCESSABLE_CONTENT_DESCRIPTION", - "AbstractErrorResponse", - "BadRequestResponse", - "ConflictResponse", - "DetailModel", - "ForbiddenResponse", - "InternalServerErrorResponse", - "NotFoundResponse", - "PromptTooLongResponse", - "FileTooLargeResponse", - "QuotaExceededResponse", - "ServiceUnavailableResponse", - "UnauthorizedResponse", - "UnprocessableEntityResponse", -] +__all__ = ["error", "successful", "constants"] diff --git a/src/models/api/responses/error/README.md b/src/models/api/responses/error/README.md index 6dcaa4ca9..7525a98b4 100644 --- a/src/models/api/responses/error/README.md +++ b/src/models/api/responses/error/README.md @@ -35,4 +35,3 @@ OpenAPI-aligned error response models: HTTP 401 Unauthorized. ## [unprocessable_entity.py](unprocessable_entity.py) OpenAPI-aligned error response models: HTTP 422 Unprocessable Entity. - diff --git a/src/models/api/responses/successful/README.md b/src/models/api/responses/successful/README.md new file mode 100644 index 000000000..bdcf4ac2c --- /dev/null +++ b/src/models/api/responses/successful/README.md @@ -0,0 +1,37 @@ +# List of source files stored in `src/models/api/responses/successful` directory + +## [__init__.py](__init__.py) +Concrete successful HTTP response models grouped by domain. + +## [bases.py](bases.py) +Base classes for successful API response models. + +## [catalog.py](catalog.py) +Successful responses for models, tools, shields, RAG, and providers. + +## [configuration.py](configuration.py) +Successful response model for the configuration endpoint. + +## [conversations.py](conversations.py) +Successful responses for conversation CRUD and listing. + +## [feedback.py](feedback.py) +Successful responses for feedback and feedback status endpoints. + +## [mcp_servers.py](mcp_servers.py) +Successful responses for MCP server registration and listing. + +## [probes.py](probes.py) +Successful responses for service probes and related endpoints (info, readiness, liveness, status, auth). + +## [prompts.py](prompts.py) +Successful responses for stored prompt templates. + +## [query.py](query.py) +Successful response models for synchronous query and streaming query documentation. + +## [responses_openai.py](responses_openai.py) +Successful response model for the OpenAI-compatible Responses API. + +## [vector_stores.py](vector_stores.py) +Successful responses for vector stores and vector store files. diff --git a/src/models/api/responses/successful/__init__.py b/src/models/api/responses/successful/__init__.py new file mode 100644 index 000000000..d84bf2197 --- /dev/null +++ b/src/models/api/responses/successful/__init__.py @@ -0,0 +1,97 @@ +"""Concrete successful HTTP response models grouped by domain.""" + +from models.api.responses.successful.catalog import ( + ModelsResponse, + ProviderResponse, + ProvidersListResponse, + RAGInfoResponse, + RAGListResponse, + ShieldsResponse, + ToolsResponse, +) +from models.api.responses.successful.configuration import ConfigurationResponse +from models.api.responses.successful.conversations import ( + ConversationDeleteResponse, + ConversationResponse, + ConversationsListResponse, + ConversationsListResponseV2, + ConversationUpdateResponse, +) +from models.api.responses.successful.feedback import ( + FeedbackResponse, + FeedbackStatusUpdateResponse, +) +from models.api.responses.successful.mcp_servers import ( + MCPClientAuthOptionsResponse, + MCPServerDeleteResponse, + MCPServerListResponse, + MCPServerRegistrationResponse, +) +from models.api.responses.successful.probes import ( + AuthorizedResponse, + InfoResponse, + LivenessResponse, + ReadinessResponse, + StatusResponse, +) +from models.api.responses.successful.prompts import ( + PromptDeleteResponse, + PromptResourceResponse, + PromptsListResponse, +) +from models.api.responses.successful.query import ( + QueryResponse, + StreamingInterruptResponse, + StreamingQueryResponse, +) +from models.api.responses.successful.responses_openai import ResponsesResponse +from models.api.responses.successful.vector_stores import ( + FileResponse, + VectorStoreDeleteResponse, + VectorStoreFileDeleteResponse, + VectorStoreFileResponse, + VectorStoreFilesListResponse, + VectorStoreResponse, + VectorStoresListResponse, +) + +__all__ = [ + "AuthorizedResponse", + "ConfigurationResponse", + "ConversationDeleteResponse", + "ConversationResponse", + "ConversationsListResponse", + "ConversationsListResponseV2", + "ConversationUpdateResponse", + "FeedbackResponse", + "FeedbackStatusUpdateResponse", + "FileResponse", + "InfoResponse", + "LivenessResponse", + "MCPClientAuthOptionsResponse", + "MCPServerDeleteResponse", + "MCPServerListResponse", + "MCPServerRegistrationResponse", + "ModelsResponse", + "PromptDeleteResponse", + "PromptResourceResponse", + "PromptsListResponse", + "ProviderResponse", + "ProvidersListResponse", + "QueryResponse", + "RAGInfoResponse", + "RAGListResponse", + "ReadinessResponse", + "ResponsesResponse", + "ShieldsResponse", + "StatusResponse", + "StreamingInterruptResponse", + "StreamingQueryResponse", + "ToolsResponse", + "VectorStoreDeleteResponse", + "VectorStoreFileDeleteResponse", + "VectorStoreFileResponse", + "VectorStoreFilesListResponse", + "VectorStoreResponse", + "VectorStoresListResponse", +] diff --git a/src/models/api/responses/successful/bases.py b/src/models/api/responses/successful/bases.py new file mode 100644 index 000000000..c4355d25b --- /dev/null +++ b/src/models/api/responses/successful/bases.py @@ -0,0 +1,84 @@ +"""Base classes for successful API response models.""" + +from typing import Any, ClassVar + +from pydantic import BaseModel, Field, computed_field +from pydantic_core import SchemaError + +from log import get_logger +from models.api.responses.constants import SUCCESSFUL_RESPONSE_DESCRIPTION + +logger = get_logger(__name__) + + +class AbstractSuccessfulResponse(BaseModel): + """Base class for all successful response models.""" + + @classmethod + def openapi_response(cls) -> dict[str, Any]: + """Generate FastAPI response dict with a single example from model_config.""" + schema = cls.model_json_schema() + model_examples = schema.get("examples") + if not model_examples: + raise SchemaError(f"Examples not found in {cls.__name__}") + example_value = model_examples[0] + content = {"application/json": {"example": example_value}} + + return { + "description": SUCCESSFUL_RESPONSE_DESCRIPTION, + "model": cls, + "content": content, + } + + +class AbstractDeleteResponse(BaseModel): + """Base model for successful delete responses.""" + + deleted: bool = Field( + ..., + description="Whether the deletion was successful.", + examples=[True, False], + ) + resource_name: ClassVar[str] + + @computed_field + def response(self) -> str: + """Human-readable outcome of the delete operation.""" + return ( + f"{self.resource_name} deleted successfully" + if self.deleted + else f"{self.resource_name} not found" + ) + + @classmethod + def openapi_response(cls) -> dict[str, Any]: + """Build FastAPI/OpenAPI metadata with named application/json examples. + + Returns: + A response dict with description, model, and content keys. + + Raises: + SchemaError: If the model JSON schema has no examples list. + """ + schema = cls.model_json_schema() + model_examples = schema.get("examples") + if not model_examples: + raise SchemaError(f"Examples not found in {cls.__name__}") + + examples: dict[str, dict[str, Any]] = {} + for index, example in enumerate(model_examples): + if "label" not in example: + raise SchemaError( + f"Example at index {index} in {cls.__name__} has no label" + ) + if "value" not in example: + raise SchemaError( + f"Example at index {index} in {cls.__name__} has no value" + ) + examples[example["label"]] = {"value": example["value"]} + + return { + "description": SUCCESSFUL_RESPONSE_DESCRIPTION, + "model": cls, + "content": {"application/json": {"examples": examples}}, + } diff --git a/src/models/api/responses/successful/catalog.py b/src/models/api/responses/successful/catalog.py new file mode 100644 index 000000000..3d357a724 --- /dev/null +++ b/src/models/api/responses/successful/catalog.py @@ -0,0 +1,258 @@ +"""Successful response bodies for catalog-style endpoints.""" + +from typing import Any, Optional + +from pydantic import Field + +from models.api.responses.successful.bases import AbstractSuccessfulResponse + + +class ModelsResponse(AbstractSuccessfulResponse): + """Model representing a response to models request.""" + + models: list[dict[str, Any]] = Field( + ..., + description="List of models available", + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "models": [ + { + "identifier": "openai/gpt-4-turbo", + "metadata": {}, + "api_model_type": "llm", + "provider_id": "openai", + "type": "model", + "provider_resource_id": "gpt-4-turbo", + "model_type": "llm", + }, + ], + } + ] + } + } + + +class ToolsResponse(AbstractSuccessfulResponse): + """Model representing a response to tools request.""" + + tools: list[dict[str, Any]] = Field( + description=( + "List of tools available from all configured MCP servers and built-in toolgroups" + ), + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "tools": [ + { + "identifier": "filesystem_read", + "description": "Read contents of a file from the filesystem", + "parameters": [ + { + "name": "path", + "description": "Path to the file to read", + "parameter_type": "string", + "required": True, + "default": None, + } + ], + "provider_id": "model-context-protocol", + "toolgroup_id": "filesystem-tools", + "server_source": "http://localhost:3000", + "type": "tool", + } + ], + } + ] + } + } + + +class ShieldsResponse(AbstractSuccessfulResponse): + """Model representing a response to shields request.""" + + shields: list[dict[str, Any]] = Field( + ..., + description="List of shields available", + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "shields": [ + { + "identifier": "lightspeed_question_validity-shield", + "provider_resource_id": "lightspeed_question_validity-shield", + "provider_id": "lightspeed_question_validity", + "type": "shield", + "params": {}, + } + ], + } + ] + } + } + + +class RAGInfoResponse(AbstractSuccessfulResponse): + """Model representing a response with information about RAG DB.""" + + id: str = Field( + ..., description="Vector DB unique ID", examples=["vs_00000000_0000_0000"] + ) + name: Optional[str] = Field( + None, + description="Human readable vector DB name", + examples=["Faiss Store with Knowledge base"], + ) + created_at: int = Field( + ..., + description="When the vector store was created, represented as Unix time", + examples=[1763391371], + ) + last_active_at: Optional[int] = Field( + None, + description="When the vector store was last active, represented as Unix time", + examples=[1763391371], + ) + usage_bytes: int = Field( + ..., + description="Storage byte(s) used by this vector DB", + examples=[0], + ) + expires_at: Optional[int] = Field( + None, + description="When the vector store expires, represented as Unix time", + examples=[1763391371], + ) + object: str = Field( + ..., + description="Object type", + examples=["vector_store"], + ) + status: str = Field( + ..., + description="Vector DB status", + examples=["completed"], + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "id": "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", + "name": "Faiss Store with Knowledge base", + "created_at": 1763391371, + "last_active_at": 1763391371, + "usage_bytes": 1024000, + "expires_at": None, + "object": "vector_store", + "status": "completed", + } + ] + } + } + + +class RAGListResponse(AbstractSuccessfulResponse): + """Model representing a response to list RAGs request.""" + + rags: list[str] = Field( + ..., + title="RAG list response", + description="List of RAG identifiers", + examples=[ + "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", + "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3", + ], + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "rags": [ + "vs_00000000-cafe-babe-0000-000000000000", + "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", + "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3", + ] + } + ] + } + } + + +class ProvidersListResponse(AbstractSuccessfulResponse): + """Model representing a response to providers request.""" + + providers: dict[str, list[dict[str, Any]]] = Field( + ..., + description="List of available API types and their corresponding providers", + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "providers": { + "inference": [ + { + "provider_id": "sentence-transformers", + "provider_type": "inline::sentence-transformers", + }, + { + "provider_id": "openai", + "provider_type": "remote::openai", + }, + ], + "agents": [ + { + "provider_id": "meta-reference", + "provider_type": "inline::meta-reference", + }, + ], + }, + } + ] + } + } + + +class ProviderResponse(AbstractSuccessfulResponse): + """Model representing a response to get specific provider request.""" + + api: str = Field( + ..., + description="The API this provider implements", + ) + config: dict[str, Any] = Field( + ..., + description="Provider configuration parameters", + ) + health: dict[str, Any] = Field( + ..., + description="Current health status of the provider", + ) + provider_id: str = Field(..., description="Unique provider identifier") + provider_type: str = Field(..., description="Provider implementation type") + + model_config = { + "json_schema_extra": { + "examples": [ + { + "api": "inference", + "config": {"api_key": "********"}, + "health": {"status": "OK", "message": "Healthy"}, + "provider_id": "openai", + "provider_type": "remote::openai", + } + ] + } + } diff --git a/src/models/api/responses/successful/configuration.py b/src/models/api/responses/successful/configuration.py new file mode 100644 index 000000000..d41e8ff20 --- /dev/null +++ b/src/models/api/responses/successful/configuration.py @@ -0,0 +1,94 @@ +"""Successful response model for the configuration endpoint.""" + +from pydantic import ConfigDict + +from models.api.responses.successful.bases import AbstractSuccessfulResponse +from models.config import Configuration + + +class ConfigurationResponse(AbstractSuccessfulResponse): + """Success response model for the config endpoint. + + Attributes: + configuration: Parsed application configuration returned to the client. + """ + + configuration: Configuration + + model_config = ConfigDict( + json_schema_extra={ + "examples": [ + { + "configuration": { + "name": "lightspeed-stack", + "service": { + "host": "localhost", + "port": 8080, + "auth_enabled": False, + "workers": 1, + "color_log": True, + "access_log": True, + "tls_config": { + "tls_certificate_path": None, + "tls_key_path": None, + "tls_key_password": None, + }, + "cors": { + "allow_origins": ["*"], + "allow_credentials": False, + "allow_methods": ["*"], + "allow_headers": ["*"], + }, + }, + "llama_stack": { + "url": "http://localhost:8321", + "api_key": "*****", + "use_as_library_client": False, + "library_client_config_path": None, + }, + "user_data_collection": { + "feedback_enabled": True, + "feedback_storage": "/tmp/data/feedback", + "transcripts_enabled": False, + "transcripts_storage": "/tmp/data/transcripts", + }, + "database": { + "sqlite": {"db_path": "/tmp/lightspeed-stack.db"}, + "postgres": None, + }, + "mcp_servers": [ + { + "name": "server1", + "provider_id": "provider1", + "url": "http://url.com:1", + }, + ], + "authentication": { + "module": "noop", + "skip_tls_verification": False, + }, + "authorization": {"access_rules": []}, + "customization": None, + "inference": { + "default_model": "gpt-4-turbo", + "default_provider": "openai", + }, + "conversation_cache": { + "type": None, + "memory": None, + "sqlite": None, + "postgres": None, + }, + "byok_rag": [], + "quota_handlers": { + "sqlite": None, + "postgres": None, + "limiters": [], + "scheduler": {"period": 1}, + "enable_token_history": False, + }, + } + } + ] + } + ) diff --git a/src/models/api/responses/successful/conversations.py b/src/models/api/responses/successful/conversations.py new file mode 100644 index 000000000..c1bacae42 --- /dev/null +++ b/src/models/api/responses/successful/conversations.py @@ -0,0 +1,219 @@ +"""Successful responses for conversation CRUD and listing.""" + +from typing import ClassVar + +from pydantic import Field, computed_field + +from log import get_logger +from models.api.responses.successful.bases import ( + AbstractDeleteResponse, + AbstractSuccessfulResponse, +) +from models.common.conversation import ( + ConversationData, + ConversationDetails, + ConversationTurn, +) + +logger = get_logger(__name__) + + +class ConversationResponse(AbstractSuccessfulResponse): + """Model representing a response for retrieving a conversation. + + Attributes: + conversation_id: The conversation ID (UUID). + chat_history: The chat history as a list of conversation turns. + """ + + conversation_id: str = Field( + ..., + description="Conversation ID (UUID)", + examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"], + ) + + chat_history: list[ConversationTurn] = Field( + ..., + description="The simplified chat history as a list of conversation turns", + examples=[ + { + "messages": [ + {"content": "Hello", "type": "user"}, + {"content": "Hi there!", "type": "assistant"}, + ], + "tool_calls": [], + "tool_results": [], + "provider": "openai", + "model": "gpt-4o-mini", + "started_at": "2024-01-01T00:01:00Z", + "completed_at": "2024-01-01T00:01:05Z", + } + ], + ) + + # provides examples for /docs endpoint + model_config = { + "json_schema_extra": { + "examples": [ + { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "chat_history": [ + { + "messages": [ + {"content": "Hello", "type": "user"}, + {"content": "Hi there!", "type": "assistant"}, + ], + "tool_calls": [], + "tool_results": [], + "provider": "openai", + "model": "gpt-4o-mini", + "started_at": "2024-01-01T00:01:00Z", + "completed_at": "2024-01-01T00:01:05Z", + } + ], + } + ] + } + } + + +class ConversationDeleteResponse(AbstractDeleteResponse): + """Response for deleting a conversation.""" + + resource_name: ClassVar[str] = "Conversation" + conversation_id: str = Field( + ..., + description="Conversation identifier that was passed to delete.", + examples=["123e4567-e89b-12d3-a456-426614174000"], + ) + + @computed_field(json_schema_extra={"deprecated": True}) + def success(self) -> bool: + """Successful response flag.""" + logger.warning("DEPRECATED: Will be removed in a future release.") + return True + + model_config = { + "json_schema_extra": { + "examples": [ + { + "label": "deleted", + "value": { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "deleted": True, + "response": "Conversation deleted successfully", + }, + }, + { + "label": "not found", + "value": { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "deleted": False, + "response": "Conversation not found", + }, + }, + ] + } + } + + +class ConversationsListResponse(AbstractSuccessfulResponse): + """Model representing a response for listing conversations of a user. + + Attributes: + conversations: List of conversation details associated with the user. + """ + + conversations: list[ConversationDetails] + + model_config = { + "json_schema_extra": { + "examples": [ + { + "conversations": [ + { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "created_at": "2024-01-01T00:00:00Z", + "last_message_at": "2024-01-01T00:05:00Z", + "message_count": 5, + "last_used_model": "gemini/gemini-2.0-flash", + "last_used_provider": "gemini", + "topic_summary": "Openshift Microservices Deployment Strategies", + }, + { + "conversation_id": "456e7890-e12b-34d5-a678-901234567890", + "created_at": "2024-01-01T01:00:00Z", + "message_count": 2, + "last_used_model": "gemini/gemini-2.5-flash", + "last_used_provider": "gemini", + "topic_summary": "RHDH Purpose Summary", + }, + ] + } + ] + } + } + + +class ConversationsListResponseV2(AbstractSuccessfulResponse): + """Model representing a response for listing conversations of a user. + + Attributes: + conversations: List of conversation data associated with the user. + """ + + conversations: list[ConversationData] + + model_config = { + "json_schema_extra": { + "examples": [ + { + "conversations": [ + { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "topic_summary": "Openshift Microservices Deployment Strategies", + "last_message_timestamp": 1704067200.0, + } + ], + } + ] + } + } + + +class ConversationUpdateResponse(AbstractSuccessfulResponse): + """Model representing a response for updating a conversation topic summary. + + Attributes: + conversation_id: The conversation ID (UUID) that was updated. + success: Whether the update was successful. + message: A message about the update result. + """ + + conversation_id: str = Field( + ..., + description="The conversation ID (UUID) that was updated", + examples=["123e4567-e89b-12d3-a456-426614174000"], + ) + success: bool = Field( + ..., + description="Whether the update was successful", + examples=[True], + ) + message: str = Field( + ..., + description="A message about the update result", + examples=["Topic summary updated successfully"], + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "success": True, + "message": "Topic summary updated successfully", + } + ] + } + } diff --git a/src/models/api/responses/successful/feedback.py b/src/models/api/responses/successful/feedback.py new file mode 100644 index 000000000..c6bc86113 --- /dev/null +++ b/src/models/api/responses/successful/feedback.py @@ -0,0 +1,58 @@ +"""Successful responses for feedback and feedback status endpoints.""" + +from typing import Any + +from pydantic import Field + +from models.api.responses.successful.bases import AbstractSuccessfulResponse + + +class FeedbackResponse(AbstractSuccessfulResponse): + """Model representing a response to a feedback request. + + Attributes: + response: The response of the feedback request. + """ + + response: str = Field( + ..., + description="The response of the feedback request.", + examples=["feedback received"], + ) + + # provides examples for /docs endpoint + model_config = { + "json_schema_extra": { + "examples": [ + { + "response": "feedback received", + } + ] + } + } + + +class FeedbackStatusUpdateResponse(AbstractSuccessfulResponse): + """Model representing a response to a feedback status update request. + + Attributes: + status: The previous and current status of the service and who updated it. + """ + + status: dict[str, Any] + + # provides examples for /docs endpoint + model_config = { + "json_schema_extra": { + "examples": [ + { + "status": { + "previous_status": True, + "updated_status": False, + "updated_by": "user/test", + "timestamp": "2023-03-15 12:34:56", + }, + } + ] + } + } diff --git a/src/models/api/responses/successful/mcp_servers.py b/src/models/api/responses/successful/mcp_servers.py new file mode 100644 index 000000000..b19e9b5af --- /dev/null +++ b/src/models/api/responses/successful/mcp_servers.py @@ -0,0 +1,126 @@ +"""Successful responses for MCP server registration and listing.""" + +from pydantic import Field + +from models.api.responses.successful.bases import AbstractSuccessfulResponse +from models.common.mcp import MCPServerAuthInfo, MCPServerInfo + + +class MCPClientAuthOptionsResponse(AbstractSuccessfulResponse): + """Response containing MCP servers that accept client-provided authorization. + + Attributes: + servers: MCP servers that declare client authentication headers. + """ + + servers: list[MCPServerAuthInfo] = Field( + default_factory=list, + description="List of MCP servers that accept client-provided authorization", + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "servers": [ + { + "name": "github", + "client_auth_headers": ["Authorization"], + }, + { + "name": "gitlab", + "client_auth_headers": ["Authorization", "X-API-Key"], + }, + ] + } + ] + } + } + + +class MCPServerRegistrationResponse(AbstractSuccessfulResponse): + """Response for a successful MCP server registration. + + Attributes: + name: Registered MCP server name. + url: Registered MCP server URL. + provider_id: MCP provider identification. + message: Status message. + """ + + name: str = Field(..., description="Registered MCP server name") + url: str = Field(..., description="Registered MCP server URL") + provider_id: str = Field(..., description="MCP provider identification") + message: str = Field(..., description="Status message") + + model_config = { + "json_schema_extra": { + "examples": [ + { + "name": "mcp-integration-tools", + "url": "http://host.docker.internal:7008/api/mcp-actions/v1", + "provider_id": "model-context-protocol", + "message": "MCP server 'mcp-integration-tools' registered successfully", + } + ] + } + } + + +class MCPServerListResponse(AbstractSuccessfulResponse): + """Response listing all registered MCP servers. + + Attributes: + servers: All registered MCP servers (static and dynamic). + """ + + servers: list[MCPServerInfo] = Field( + default_factory=list, + description="List of all registered MCP servers (static and dynamic)", + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "servers": [ + { + "name": "mcp-integration-tools", + "url": "http://host.docker.internal:7008/api/mcp-actions/v1", + "provider_id": "model-context-protocol", + "source": "config", + }, + { + "name": "test-mcp-server", + "url": "http://host.docker.internal:8888/mcp", + "provider_id": "model-context-protocol", + "source": "api", + }, + ] + } + ] + } + } + + +class MCPServerDeleteResponse(AbstractSuccessfulResponse): + """Response for a successful MCP server deletion. + + Attributes: + name: Deleted MCP server name. + message: Status message. + """ + + name: str = Field(..., description="Deleted MCP server name") + message: str = Field(..., description="Status message") + + model_config = { + "json_schema_extra": { + "examples": [ + { + "name": "test-mcp-server", + "message": "MCP server 'test-mcp-server' unregistered successfully", + } + ] + } + } diff --git a/src/models/api/responses/successful/probes.py b/src/models/api/responses/successful/probes.py new file mode 100644 index 000000000..2fe966843 --- /dev/null +++ b/src/models/api/responses/successful/probes.py @@ -0,0 +1,182 @@ +"""Successful probe-related API responses (info, readiness, liveness, status, auth).""" + +from pydantic import Field + +from models.api.responses.successful.bases import AbstractSuccessfulResponse +from models.common.health import ProviderHealthStatus + + +class InfoResponse(AbstractSuccessfulResponse): + """Model representing a response to an info request. + + Attributes: + name: Service name. + service_version: Service version. + llama_stack_version: Llama Stack version. + """ + + name: str = Field( + description="Service name", + examples=["Lightspeed Stack"], + ) + + service_version: str = Field( + description="Service version", + examples=["0.1.0", "0.2.0", "1.0.0"], + ) + + llama_stack_version: str = Field( + description="Llama Stack version", + examples=["0.2.1", "0.2.2", "0.2.18", "0.2.21", "0.2.22"], + ) + + # provides examples for /docs endpoint + model_config = { + "json_schema_extra": { + "examples": [ + { + "name": "Lightspeed Stack", + "service_version": "1.0.0", + "llama_stack_version": "1.0.0", + } + ] + } + } + + +class ReadinessResponse(AbstractSuccessfulResponse): + """Model representing response to a readiness request. + + Attributes: + ready: If service is ready. + reason: The reason for the readiness. + providers: List of unhealthy providers in case of readiness failure. + """ + + ready: bool = Field( + ..., + description="Flag indicating if service is ready", + examples=[True, False], + ) + + reason: str = Field( + ..., + description="The reason for the readiness", + examples=["Service is ready"], + ) + + providers: list[ProviderHealthStatus] = Field( + ..., + description="List of unhealthy providers in case of readiness failure.", + examples=[], + ) + + # provides examples for /docs endpoint + model_config = { + "json_schema_extra": { + "examples": [ + { + "ready": True, + "reason": "Service is ready", + "providers": [], + } + ] + } + } + + +class LivenessResponse(AbstractSuccessfulResponse): + """Model representing a response to a liveness request. + + Attributes: + alive: If app is alive. + """ + + alive: bool = Field( + ..., + description="Flag indicating that the app is alive", + examples=[True, False], + ) + + # provides examples for /docs endpoint + model_config = { + "json_schema_extra": { + "examples": [ + { + "alive": True, + } + ] + } + } + + +class StatusResponse(AbstractSuccessfulResponse): + """Model representing a response to a status request. + + Attributes: + functionality: The functionality of the service. + status: The status of the service. + """ + + functionality: str = Field( + ..., + description="The functionality of the service", + examples=["feedback"], + ) + + status: dict = Field( + ..., + description="The status of the service", + examples=[{"enabled": True}], + ) + + # provides examples for /docs endpoint + model_config = { + "json_schema_extra": { + "examples": [ + { + "functionality": "feedback", + "status": {"enabled": True}, + } + ] + } + } + + +class AuthorizedResponse(AbstractSuccessfulResponse): + """Model representing a response to an authorization request. + + Attributes: + user_id: The ID of the logged in user. + username: The name of the logged in user. + skip_userid_check: Whether to skip the user ID check. + """ + + user_id: str = Field( + ..., + description="User ID, for example UUID", + examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"], + ) + username: str = Field( + ..., + description="User name", + examples=["John Doe", "Adam Smith"], + ) + skip_userid_check: bool = Field( + ..., + description="Whether to skip the user ID check", + examples=[True, False], + ) + + # provides examples for /docs endpoint + model_config = { + "json_schema_extra": { + "examples": [ + { + "user_id": "123e4567-e89b-12d3-a456-426614174000", + "username": "user1", + "skip_userid_check": False, + } + ] + } + } diff --git a/src/models/api/responses/successful/prompts.py b/src/models/api/responses/successful/prompts.py new file mode 100644 index 000000000..b06fd8977 --- /dev/null +++ b/src/models/api/responses/successful/prompts.py @@ -0,0 +1,119 @@ +"""Successful responses for stored prompt templates.""" + +from typing import ClassVar, Optional + +from pydantic import Field + +from models.api.responses.successful.bases import ( + AbstractDeleteResponse, + AbstractSuccessfulResponse, +) + + +class PromptResourceResponse(AbstractSuccessfulResponse): + """A stored prompt template as returned by Llama Stack. + + Attributes: + prompt_id: Prompt identifier from Llama Stack. + version: Version number for this prompt. + is_default: Whether this version is the default. + prompt: Prompt text with placeholders. + variables: Variable names used in the template. + """ + + prompt_id: str = Field(..., description="Prompt identifier from Llama Stack") + version: int = Field(..., description="Version number for this prompt") + is_default: Optional[bool] = Field( + None, description="Whether this version is the default" + ) + prompt: Optional[str] = Field(None, description="Prompt text with placeholders") + variables: Optional[list[str]] = Field( + None, description="Variable names used in the template" + ) + + model_config = { + "extra": "forbid", + "json_schema_extra": { + "examples": [ + { + "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567", + "version": 1, + "is_default": True, + "prompt": "Summarize: {{text}}", + "variables": ["text"], + } + ] + }, + } + + +class PromptsListResponse(AbstractSuccessfulResponse): + """List of stored prompt templates returned by Llama Stack. + + Attributes: + data: Prompt entries as returned by the Llama Stack list API. + """ + + data: list[PromptResourceResponse] = Field( + default_factory=list, + description="Prompt entries (as returned by Llama Stack list)", + ) + + model_config = { + "extra": "forbid", + "json_schema_extra": { + "examples": [ + { + "data": [ + { + "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567", + "version": 1, + "is_default": True, + "prompt": "Summarize: {{text}}", + "variables": ["text"], + } + ], + } + ] + }, + } + + +class PromptDeleteResponse(AbstractDeleteResponse): + """Result of deleting a stored prompt (always HTTP 200, like conversations v2). + + Attributes: + prompt_id: Prompt identifier that was passed to delete. + deleted: Whether the prompt was deleted successfully + response: Human readable response + """ + + resource_name: ClassVar[str] = "Prompt" + prompt_id: str = Field( + ..., + description="Prompt identifier that was passed to delete.", + examples=["pmpt_0123456789abcdef0123456789abcdef01234567"], + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "label": "deleted", + "value": { + "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567", + "deleted": True, + "response": "Prompt deleted successfully", + }, + }, + { + "label": "not found", + "value": { + "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567", + "deleted": False, + "response": "Prompt not found", + }, + }, + ] + } + } diff --git a/src/models/api/responses/successful/query.py b/src/models/api/responses/successful/query.py new file mode 100644 index 000000000..c59bac766 --- /dev/null +++ b/src/models/api/responses/successful/query.py @@ -0,0 +1,243 @@ +"""Successful response models for synchronous query and streaming query documentation.""" + +from typing import Any, Optional + +from pydantic import Field +from pydantic_core import SchemaError + +from constants import MEDIA_TYPE_EVENT_STREAM +from models.api.responses.constants import SUCCESSFUL_RESPONSE_DESCRIPTION +from models.api.responses.successful.bases import AbstractSuccessfulResponse +from models.common.turn_summary import ( + RAGChunk, + ReferencedDocument, + ToolCallSummary, + ToolResultSummary, +) + + +class QueryResponse(AbstractSuccessfulResponse): + """Model representing LLM response to a query. + + Attributes: + conversation_id: The optional conversation ID (UUID). + response: The response. + rag_chunks: Deprecated. List of RAG chunks used to generate the response. + This information is now available in tool_results under file_search_call type. + referenced_documents: The URLs and titles for the documents used to generate the response. + tool_calls: List of tool calls made during response generation. + tool_results: List of tool results. + truncated: Whether conversation history was truncated. + input_tokens: Number of tokens sent to LLM. + output_tokens: Number of tokens received from LLM. + available_quotas: Quota available as measured by all configured quota limiters. + """ + + conversation_id: Optional[str] = Field( + None, + description="The optional conversation ID (UUID)", + examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"], + ) + + response: str = Field( + description="Response from LLM", + examples=[ + "Kubernetes is an open-source container orchestration system for automating ..." + ], + ) + + rag_chunks: list[RAGChunk] = Field( + default_factory=list, + description="Deprecated: List of RAG chunks used to generate the response.", + ) + + referenced_documents: list[ReferencedDocument] = Field( + default_factory=list, + description="List of documents referenced in generating the response", + examples=[ + [ + { + "doc_url": "https://docs.openshift.com/" + "container-platform/4.15/operators/olm/index.html", + "doc_title": "Operator Lifecycle Manager (OLM)", + } + ] + ], + ) + + truncated: bool = Field( + False, + description="Deprecated: whether conversation history was truncated", + examples=[False, True], + ) + + input_tokens: int = Field( + 0, + description="Number of tokens sent to LLM", + examples=[150, 250, 500], + ) + + output_tokens: int = Field( + 0, + description="Number of tokens received from LLM", + examples=[50, 100, 200], + ) + + available_quotas: dict[str, int] = Field( + default_factory=dict, + description="Quota available as measured by all configured quota limiters", + examples=[{"daily": 1000, "monthly": 50000}], + ) + + tool_calls: list[ToolCallSummary] = Field( + default_factory=list, + description="List of tool calls made during response generation", + ) + + tool_results: list[ToolResultSummary] = Field( + default_factory=list, + description="List of tool results", + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "conversation_id": "123e4567-e89b-12d3-a456-426614174000", + "response": "Operator Lifecycle Manager (OLM) helps users install...", + "referenced_documents": [ + { + "doc_url": "https://docs.openshift.com/container-platform/4.15/" + "operators/understanding/olm/olm-understanding-olm.html", + "doc_title": "Operator Lifecycle Manager concepts and resources", + }, + ], + "truncated": False, + "input_tokens": 123, + "output_tokens": 456, + "available_quotas": { + "UserQuotaLimiter": 998911, + "ClusterQuotaLimiter": 998911, + }, + "tool_calls": [ + {"name": "tool1", "args": {}, "id": "1", "type": "tool_call"} + ], + "tool_results": [ + { + "id": "1", + "status": "success", + "content": "bla", + "type": "tool_result", + "round": 1, + } + ], + } + ] + } + } + + +class StreamingQueryResponse(AbstractSuccessfulResponse): + """Documentation-only model for streaming query responses using Server-Sent Events (SSE).""" + + @classmethod + def openapi_response(cls) -> dict[str, Any]: + """Generate FastAPI response dict for SSE streaming with examples. + + Note: This is used for OpenAPI documentation only. The actual endpoint + returns a StreamingResponse object, not this Pydantic model. + """ + schema = cls.model_json_schema() + model_examples = schema.get("examples") + if not model_examples: + raise SchemaError(f"Examples not found in {cls.__name__}") + example_value = model_examples[0] + content = { + MEDIA_TYPE_EVENT_STREAM: { + "schema": {"type": "string"}, + "example": example_value, + } + } + + return { + "description": SUCCESSFUL_RESPONSE_DESCRIPTION, + "content": content, + # Note: No "model" key since we're not actually serializing this model + } + + model_config = { + "json_schema_extra": { + "examples": [ + ( + 'data: {"event": "start", "data": {' + '"conversation_id": "123e4567-e89b-12d3-a456-426614174000", ' + '"request_id": "123e4567-e89b-12d3-a456-426614174001"}}\n\n' + 'data: {"event": "token", "data": {' + '"id": 0, "token": "No Violation"}}\n\n' + 'data: {"event": "token", "data": {' + '"id": 1, "token": ""}}\n\n' + 'data: {"event": "token", "data": {' + '"id": 2, "token": "Hello"}}\n\n' + 'data: {"event": "token", "data": {' + '"id": 3, "token": "!"}}\n\n' + 'data: {"event": "token", "data": {' + '"id": 4, "token": " How"}}\n\n' + 'data: {"event": "token", "data": {' + '"id": 5, "token": " can"}}\n\n' + 'data: {"event": "token", "data": {' + '"id": 6, "token": " I"}}\n\n' + 'data: {"event": "token", "data": {' + '"id": 7, "token": " assist"}}\n\n' + 'data: {"event": "token", "data": {' + '"id": 8, "token": " you"}}\n\n' + 'data: {"event": "token", "data": {' + '"id": 9, "token": " today"}}\n\n' + 'data: {"event": "token", "data": {' + '"id": 10, "token": "?"}}\n\n' + 'data: {"event": "turn_complete", "data": {' + '"token": "Hello! How can I assist you today?"}}\n\n' + 'data: {"event": "end", "data": {' + '"referenced_documents": [], ' + '"truncated": null, "input_tokens": 11, "output_tokens": 19}, ' + '"available_quotas": {}}\n\n' + ), + ] + } + } + + +class StreamingInterruptResponse(AbstractSuccessfulResponse): + """Model representing a response to a streaming interrupt request. + + Attributes: + request_id: The streaming request ID targeted by the interrupt call. + interrupted: Whether an in-progress stream was interrupted. + message: Human-readable interruption status message. + """ + + request_id: str = Field( + description="The streaming request ID targeted by the interrupt call", + examples=["123e4567-e89b-12d3-a456-426614174000"], + ) + + interrupted: bool = Field( + description="Whether an in-progress stream was interrupted", + examples=[True], + ) + + message: str = Field( + description="Human-readable interruption status message", + examples=["Streaming request interrupted"], + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "request_id": "123e4567-e89b-12d3-a456-426614174000", + "interrupted": True, + "message": "Streaming request interrupted", + } + ] + } + } diff --git a/src/models/api/responses/successful/responses_openai.py b/src/models/api/responses/successful/responses_openai.py new file mode 100644 index 000000000..30ed13fb0 --- /dev/null +++ b/src/models/api/responses/successful/responses_openai.py @@ -0,0 +1,210 @@ +"""Successful response model for the OpenAI-compatible Responses API.""" + +from typing import Any, Literal, Optional, cast + +from llama_stack_api.openai_responses import ( + OpenAIResponseError as Error, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseInputToolChoice as ToolChoice, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseOutput as Output, +) +from llama_stack_api.openai_responses import ( + OpenAIResponsePrompt as Prompt, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseReasoning as Reasoning, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseText as Text, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseTool as OutputTool, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseUsage as Usage, +) + +from models.api.responses.constants import SUCCESSFUL_RESPONSE_DESCRIPTION +from models.api.responses.successful.bases import AbstractSuccessfulResponse + + +class ResponsesResponse(AbstractSuccessfulResponse): + """Model representing a response from the Responses API following LCORE specification. + + Attributes: + created_at: Unix timestamp when the response was created. + completed_at: Unix timestamp when the response was completed, if applicable. + error: Error details if the response failed or was blocked. + id: Unique identifier for this response. + model: Model identifier in "provider/model" format used for generation. + object: Object type identifier, always "response". + output: List of structured output items containing messages, tool calls, and + other content. This is the primary response content. + parallel_tool_calls: Whether the model can make multiple tool calls in parallel. + previous_response_id: Identifier of the previous response in a multi-turn + conversation. + prompt: The input prompt object that was sent to the model. + status: Current status of the response (e.g., "completed", "blocked", + "in_progress"). + temperature: Temperature parameter used for generation (controls randomness). + text: Text response configuration object used for OpenAI responses. + top_p: Top-p sampling parameter used for generation. + tools: List of tools available to the model during generation. + tool_choice: Tool selection strategy used (e.g., "auto", "required", "none"). + truncation: Strategy used for handling content that exceeds context limits. + usage: Token usage statistics including input_tokens, output_tokens, and + total_tokens. + instructions: System instructions or guidelines provided to the model. + max_tool_calls: Maximum number of tool calls allowed in a single response. + reasoning: Reasoning configuration (effort level) used for the response. + max_output_tokens: Upper bound for tokens generated in the response. + safety_identifier: Safety/guardrail identifier applied to the request. + metadata: Additional metadata dictionary with custom key-value pairs. + store: Whether the response was stored. + conversation: Conversation ID linking this response to a conversation thread + (LCORE-specific). + available_quotas: Remaining token quotas for the user (LCORE-specific). + output_text: Aggregated text output from all output_text items in the + output array. + """ + + created_at: int + completed_at: Optional[int] = None + error: Optional[Error] = None + id: str + model: str + object: Literal["response"] = "response" + output: list[Output] + parallel_tool_calls: bool = True + previous_response_id: Optional[str] = None + prompt: Optional[Prompt] = None + status: str + temperature: Optional[float] = None + text: Optional[Text] = None + top_p: Optional[float] = None + tools: Optional[list[OutputTool]] = None + tool_choice: Optional[ToolChoice] = None + truncation: Optional[str] = None + usage: Optional[Usage] = None + instructions: Optional[str] = None + max_tool_calls: Optional[int] = None + reasoning: Optional[Reasoning] = None + max_output_tokens: Optional[int] = None + safety_identifier: Optional[str] = None + metadata: Optional[dict[str, str]] = None + store: Optional[bool] = None + # LCORE-specific attributes + conversation: Optional[str] = None + available_quotas: dict[str, int] + output_text: str + + model_config = { + "json_schema_extra": { + "examples": [ + { + "created_at": 1704067200, + "completed_at": 1704067250, + "id": "resp_abc123", + "model": "openai/gpt-4-turbo", + "object": "response", + "output": [ + { + "type": "message", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": ( + "Kubernetes is an open-source container " + "orchestration system..." + ), + } + ], + } + ], + "parallel_tool_calls": True, + "status": "completed", + "temperature": 0.7, + "text": {"format": {"type": "text"}}, + "usage": { + "input_tokens": 100, + "output_tokens": 50, + "total_tokens": 150, + "input_tokens_details": {"cached_tokens": 0}, + "output_tokens_details": {"reasoning_tokens": 0}, + }, + "instructions": "You are a helpful assistant", + "store": True, + "conversation": "0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e", + "available_quotas": {"daily": 1000, "monthly": 50000}, + "output_text": ( + "Kubernetes is an open-source container orchestration system..." + ), + } + ], + "sse_example": ( + "event: response.created\n" + 'data: {"type":"response.created","sequence_number":0,' + '"response":{"id":"resp_abc","object":"response",' + '"created_at":1704067200,"status":"in_progress","model":"openai/gpt-4o-mini",' + '"output":[],"store":true,"text":{"format":{"type":"text"}},' + '"conversation":"0d21ba731f21f798dc9680125d5d6f49",' + '"available_quotas":{},"output_text":""}}\n\n' + "event: response.output_item.added\n" + 'data: {"type":"response.output_item.added","sequence_number":1,' + '"response_id":"resp_abc","output_index":0,' + '"item":{"id":"msg_abc","type":"message","status":"in_progress",' + '"role":"assistant","content":[]}}\n\n' + "...\n\n" + "event: response.completed\n" + 'data: {"type":"response.completed","sequence_number":30,' + '"response":{"id":"resp_abc","object":"response",' + '"created_at":1704067200,"status":"completed","model":"openai/gpt-4o-mini",' + '"output":[{"id":"msg_abc","type":"message","status":"completed",' + '"role":"assistant","content":[{"type":"output_text",' + '"text":"Hello! How can I help?","annotations":[]}]}],' + '"store":true,"text":{"format":{"type":"text"}},' + '"usage":{"input_tokens":10,"output_tokens":6,"total_tokens":16,' + '"input_tokens_details":{"cached_tokens":0},' + '"output_tokens_details":{"reasoning_tokens":0}},' + '"conversation":"0d21ba731f21f798dc9680125d5d6f49",' + '"available_quotas":{"daily":1000,"monthly":50000},' + '"output_text":"Hello! How can I help?"}}\n\n' + "data: [DONE]\n\n" + ), + } + } + + @classmethod + def openapi_response(cls) -> dict[str, Any]: + """ + Build OpenAPI response dict with application/json and text/event-stream. + + Uses the single JSON example from the model schema and adds + text/event-stream example from json_schema_extra.sse_example. + """ + schema = cls.model_json_schema() + model_examples = schema.get("examples", []) + json_example = model_examples[0] if model_examples else None + + schema_extra = ( + cast(dict[str, Any], dict(cls.model_config)).get("json_schema_extra") or {} + ) + sse_example = schema_extra.get("sse_example", "") + + content: dict[str, Any] = { + "application/json": {"example": json_example} if json_example else {}, + "text/event-stream": { + "schema": {"type": "string"}, + "example": sse_example, + }, + } + + return { + "description": SUCCESSFUL_RESPONSE_DESCRIPTION, + "model": cls, + "content": content, + } diff --git a/src/models/api/responses/successful/vector_stores.py b/src/models/api/responses/successful/vector_stores.py new file mode 100644 index 000000000..08c21ae0d --- /dev/null +++ b/src/models/api/responses/successful/vector_stores.py @@ -0,0 +1,303 @@ +"""Successful responses for vector stores and vector store files.""" + +from typing import Any, ClassVar, Optional + +from pydantic import Field + +from models.api.responses.successful.bases import ( + AbstractDeleteResponse, + AbstractSuccessfulResponse, +) + + +class VectorStoreResponse(AbstractSuccessfulResponse): + """Response model containing a single vector store. + + Attributes: + id: Vector store ID. + name: Vector store name. + created_at: Unix timestamp when created. + last_active_at: Unix timestamp of last activity. + expires_at: Optional Unix timestamp when it expires. + status: Vector store status. + usage_bytes: Storage usage in bytes. + metadata: Optional metadata dictionary for storing session information. + """ + + id: str = Field(..., description="Vector store ID") + name: str = Field(..., description="Vector store name") + created_at: int = Field(..., description="Unix timestamp when created") + last_active_at: Optional[int] = Field( + None, description="Unix timestamp of last activity" + ) + expires_at: Optional[int] = Field( + None, description="Unix timestamp when it expires" + ) + status: str = Field(..., description="Vector store status") + usage_bytes: int = Field(default=0, description="Storage usage in bytes") + metadata: Optional[dict[str, Any]] = Field( + None, + description="Metadata dictionary for storing session information", + examples=[ + {"conversation_id": "conv_123", "document_ids": ["doc_456", "doc_789"]} + ], + ) + + model_config = { + "extra": "forbid", + "json_schema_extra": { + "examples": [ + { + "id": "vs_abc123", + "name": "customer_support_docs", + "created_at": 1704067200, + "last_active_at": 1704153600, + "expires_at": None, + "status": "active", + "usage_bytes": 1048576, + "metadata": { + "conversation_id": "conv_123", + "document_ids": ["doc_456", "doc_789"], + }, + } + ] + }, + } + + +class VectorStoresListResponse(AbstractSuccessfulResponse): + """Response model containing a list of vector stores. + + Attributes: + data: List of vector store objects. + object: Object type (always "list"). + """ + + data: list[VectorStoreResponse] = Field( + default_factory=list, description="List of vector stores" + ) + object: str = Field(default="list", description="Object type") + + model_config = { + "extra": "forbid", + "json_schema_extra": { + "examples": [ + { + "data": [ + { + "id": "vs_abc123", + "name": "customer_support_docs", + "created_at": 1704067200, + "last_active_at": 1704153600, + "expires_at": None, + "status": "active", + "usage_bytes": 1048576, + "metadata": {"conversation_id": "conv_123"}, + }, + { + "id": "vs_def456", + "name": "product_documentation", + "created_at": 1704070800, + "last_active_at": 1704157200, + "expires_at": None, + "status": "active", + "usage_bytes": 2097152, + "metadata": None, + }, + ], + "object": "list", + } + ] + }, + } + + +class VectorStoreDeleteResponse(AbstractDeleteResponse): + """Result of deleting a vector store (always HTTP 200).""" + + resource_name: ClassVar[str] = "Vector store" + vector_store_id: str = Field( + ..., + description="Vector store identifier that was passed to delete.", + examples=["vs_abc123"], + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "label": "deleted", + "value": { + "vector_store_id": "vs_abc123", + "deleted": True, + "response": "Vector store deleted successfully", + }, + }, + { + "label": "not found", + "value": { + "vector_store_id": "vs_abc123", + "deleted": False, + "response": "Vector store not found", + }, + }, + ] + } + } + + +class VectorStoreFileDeleteResponse(AbstractDeleteResponse): + """Result of deleting a file from a vector store (always HTTP 200).""" + + resource_name: ClassVar[str] = "Vector store file" + file_id: str = Field( + ..., + description="File identifier that was passed to delete.", + examples=["file_abc123"], + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "label": "deleted", + "value": { + "file_id": "file_abc123", + "deleted": True, + "response": "Vector store file deleted successfully", + }, + }, + { + "label": "not found", + "value": { + "file_id": "file_abc123", + "deleted": False, + "response": "Vector store file not found", + }, + }, + ] + } + } + + +class FileResponse(AbstractSuccessfulResponse): + """Response model containing a file object. + + Attributes: + id: File ID. + filename: File name. + bytes: File size in bytes. + created_at: Unix timestamp when created. + purpose: File purpose. + object: Object type (always "file"). + """ + + id: str = Field(..., description="File ID") + filename: str = Field(..., description="File name") + bytes: int = Field(..., description="File size in bytes") + created_at: int = Field(..., description="Unix timestamp when created") + purpose: str = Field(default="assistants", description="File purpose") + object: str = Field(default="file", description="Object type") + + model_config = { + "extra": "forbid", + "json_schema_extra": { + "examples": [ + { + "id": "file_abc123", + "filename": "documentation.pdf", + "bytes": 524288, + "created_at": 1704067200, + "purpose": "assistants", + "object": "file", + } + ] + }, + } + + +class VectorStoreFileResponse(AbstractSuccessfulResponse): + """Response model containing a vector store file object. + + Attributes: + id: Vector store file ID. + vector_store_id: ID of the vector store. + status: File processing status. + attributes: Optional metadata key-value pairs. + last_error: Optional error message if processing failed. + object: Object type (always "vector_store.file"). + """ + + id: str = Field(..., description="Vector store file ID") + vector_store_id: str = Field(..., description="ID of the vector store") + status: str = Field(..., description="File processing status") + attributes: Optional[dict[str, str | float | bool]] = Field( + None, + description=( + "Set of up to 16 key-value pairs for storing additional information. " + "Keys: strings (max 64 chars). Values: strings (max 512 chars), booleans, or numbers." + ), + ) + last_error: Optional[str] = Field( + None, description="Error message if processing failed" + ) + object: str = Field(default="vector_store.file", description="Object type") + + model_config = { + "extra": "forbid", + "json_schema_extra": { + "examples": [ + { + "id": "file_abc123", + "vector_store_id": "vs_abc123", + "status": "completed", + "attributes": {"chunk_size": "512", "indexed": True}, + "last_error": None, + "object": "vector_store.file", + } + ] + }, + } + + +class VectorStoreFilesListResponse(AbstractSuccessfulResponse): + """Response model containing a list of vector store files. + + Attributes: + data: List of vector store file objects. + object: Object type (always "list"). + """ + + data: list[VectorStoreFileResponse] = Field( + default_factory=list, description="List of vector store files" + ) + object: str = Field(default="list", description="Object type") + + model_config = { + "extra": "forbid", + "json_schema_extra": { + "examples": [ + { + "data": [ + { + "id": "file_abc123", + "vector_store_id": "vs_abc123", + "status": "completed", + "attributes": {"chunk_size": "512"}, + "last_error": None, + "object": "vector_store.file", + }, + { + "id": "file_def456", + "vector_store_id": "vs_abc123", + "status": "processing", + "attributes": None, + "last_error": None, + "object": "vector_store.file", + }, + ], + "object": "list", + } + ] + }, + } diff --git a/src/models/cache_entry.py b/src/models/cache_entry.py index ce4872ee3..f0768eb4a 100644 --- a/src/models/cache_entry.py +++ b/src/models/cache_entry.py @@ -4,7 +4,11 @@ from pydantic import BaseModel -from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary +from models.common.turn_summary import ( + ReferencedDocument, + ToolCallSummary, + ToolResultSummary, +) class CacheEntry(BaseModel): diff --git a/src/models/common/__init__.py b/src/models/common/__init__.py new file mode 100644 index 000000000..017d9614a --- /dev/null +++ b/src/models/common/__init__.py @@ -0,0 +1,49 @@ +"""Shared Pydantic models used across API layers (not response envelopes).""" + +from models.common.conversation import ( + ConversationData, + ConversationDetails, + ConversationTurn, + Message, +) +from models.common.health import ProviderHealthStatus +from models.common.mcp import MCPServerAuthInfo, MCPServerInfo +from models.common.moderation import ( + ShieldModerationBlocked, + ShieldModerationPassed, + ShieldModerationResult, +) +from models.common.responses.responses_conversation_context import ( + ResponsesConversationContext, +) +from models.common.transcripts import Transcript, TranscriptMetadata +from models.common.turn_summary import ( + RAGChunk, + RAGContext, + ReferencedDocument, + ToolCallSummary, + ToolResultSummary, + TurnSummary, +) + +__all__ = [ + "ConversationData", + "ConversationDetails", + "ConversationTurn", + "MCPServerAuthInfo", + "MCPServerInfo", + "Message", + "ProviderHealthStatus", + "RAGChunk", + "RAGContext", + "ReferencedDocument", + "ResponsesConversationContext", + "ShieldModerationBlocked", + "ShieldModerationPassed", + "ShieldModerationResult", + "ToolCallSummary", + "ToolResultSummary", + "Transcript", + "TranscriptMetadata", + "TurnSummary", +] diff --git a/src/models/common/conversation.py b/src/models/common/conversation.py new file mode 100644 index 000000000..e02884a23 --- /dev/null +++ b/src/models/common/conversation.py @@ -0,0 +1,166 @@ +"""Conversation list rows, metadata, and simplified turn/message shapes for APIs.""" + +from typing import Literal, Optional + +from pydantic import BaseModel, Field + +from models.common.turn_summary import ( + ReferencedDocument, + ToolCallSummary, + ToolResultSummary, +) + + +class ConversationData(BaseModel): + """Model representing conversation data returned by cache list operations. + + Attributes: + conversation_id: The conversation ID + topic_summary: The topic summary for the conversation (can be None) + last_message_timestamp: The timestamp of the last message in the conversation + """ + + conversation_id: str + topic_summary: Optional[str] + last_message_timestamp: float + + +class ConversationDetails(BaseModel): + """Model representing the details of a user conversation. + + Attributes: + conversation_id: The conversation ID (UUID). + created_at: When the conversation was created. + last_message_at: When the last message was sent. + message_count: Number of user messages in the conversation. + last_used_model: The last model used for the conversation. + last_used_provider: The provider of the last used model. + topic_summary: The topic summary for the conversation. + + Example: + ```python + conversation = ConversationDetails( + conversation_id="123e4567-e89b-12d3-a456-426614174000", + created_at="2024-01-01T00:00:00Z", + last_message_at="2024-01-01T00:05:00Z", + message_count=5, + last_used_model="gemini/gemini-2.0-flash", + last_used_provider="gemini", + topic_summary="Openshift Microservices Deployment Strategies", + ) + ``` + """ + + conversation_id: str = Field( + ..., + description="Conversation ID (UUID)", + examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"], + ) + + created_at: Optional[str] = Field( + None, + description="When the conversation was created", + examples=["2024-01-01T01:00:00Z"], + ) + + last_message_at: Optional[str] = Field( + None, + description="When the last message was sent", + examples=["2024-01-01T01:00:00Z"], + ) + + message_count: Optional[int] = Field( + None, + description="Number of user messages in the conversation", + examples=[42], + ) + + last_used_model: Optional[str] = Field( + None, + description="Identification of the last model used for the conversation", + examples=["gpt-4-turbo", "gpt-3.5-turbo-0125"], + ) + + last_used_provider: Optional[str] = Field( + None, + description="Identification of the last provider used for the conversation", + examples=["openai", "gemini"], + ) + + topic_summary: Optional[str] = Field( + None, + description="Topic summary for the conversation", + examples=["Openshift Microservices Deployment Strategies"], + ) + + +class Message(BaseModel): + """Model representing a message in a conversation turn. + + Attributes: + content: The message content. + type: The type of message. + referenced_documents: Optional list of documents referenced in an assistant response. + """ + + content: str = Field( + ..., + description="The message content", + examples=["Hello, how can I help you?"], + ) + type: Literal["user", "assistant", "system", "developer"] = Field( + ..., + description="The type of message", + examples=["user", "assistant", "system", "developer"], + ) + referenced_documents: Optional[list[ReferencedDocument]] = Field( + None, + description="List of documents referenced in the response (assistant messages only)", + ) + + +class ConversationTurn(BaseModel): + """Model representing a single conversation turn. + + Attributes: + messages: List of messages in this turn. + tool_calls: List of tool calls made in this turn. + tool_results: List of tool results from this turn. + provider: Provider identifier used for this turn. + model: Model identifier used for this turn. + started_at: ISO 8601 timestamp when the turn started. + completed_at: ISO 8601 timestamp when the turn completed. + """ + + messages: list[Message] = Field( + default_factory=list, + description="List of messages in this turn", + ) + tool_calls: list[ToolCallSummary] = Field( + default_factory=list, + description="List of tool calls made in this turn", + ) + tool_results: list[ToolResultSummary] = Field( + default_factory=list, + description="List of tool results from this turn", + ) + provider: str = Field( + ..., + description="Provider identifier used for this turn", + examples=["openai"], + ) + model: str = Field( + ..., + description="Model identifier used for this turn", + examples=["gpt-4o-mini"], + ) + started_at: str = Field( + ..., + description="ISO 8601 timestamp when the turn started", + examples=["2024-01-01T00:01:00Z"], + ) + completed_at: str = Field( + ..., + description="ISO 8601 timestamp when the turn completed", + examples=["2024-01-01T00:01:05Z"], + ) diff --git a/src/models/common/health.py b/src/models/common/health.py new file mode 100644 index 000000000..6ccb87f75 --- /dev/null +++ b/src/models/common/health.py @@ -0,0 +1,28 @@ +"""Health-related shared models for readiness and diagnostics.""" + +from typing import Optional + +from pydantic import BaseModel, Field + + +class ProviderHealthStatus(BaseModel): + """Model representing the health status of a provider. + + Attributes: + provider_id: The ID of the provider. + status: The health status ('ok', 'unhealthy', 'not_implemented'). + message: Optional message about the health status. + """ + + provider_id: str = Field( + description="The ID of the provider", + ) + status: str = Field( + description="The health status", + examples=["ok", "unhealthy", "not_implemented"], + ) + message: Optional[str] = Field( + None, + description="Optional message about the health status", + examples=["All systems operational", "Llama Stack is unavailable"], + ) diff --git a/src/models/common/mcp.py b/src/models/common/mcp.py new file mode 100644 index 000000000..6f671ddc0 --- /dev/null +++ b/src/models/common/mcp.py @@ -0,0 +1,33 @@ +"""MCP server metadata models shared by registration and list responses.""" + +from pydantic import BaseModel, Field + + +class MCPServerAuthInfo(BaseModel): + """Information about MCP server client authentication options.""" + + name: str = Field(..., description="MCP server name") + client_auth_headers: list[str] = Field( + ..., + description="List of authentication header names for client-provided tokens", + ) + + +class MCPServerInfo(BaseModel): + """Information about a registered MCP server. + + Attributes: + name: Unique name of the MCP server. + url: URL of the MCP server endpoint. + provider_id: MCP provider identification. + source: Whether the server was registered statically (config) or dynamically (api). + """ + + name: str = Field(..., description="MCP server name") + url: str = Field(..., description="MCP server URL") + provider_id: str = Field(..., description="MCP provider identification") + source: str = Field( + ..., + description="How the server was registered: 'config' (static) or 'api' (dynamic)", + examples=["config", "api"], + ) diff --git a/src/models/common/moderation.py b/src/models/common/moderation.py new file mode 100644 index 000000000..1e4f16368 --- /dev/null +++ b/src/models/common/moderation.py @@ -0,0 +1,29 @@ +"""Shield moderation outcomes for the responses pipeline.""" + +from typing import Annotated, Literal + +from llama_stack_api.openai_responses import ( + OpenAIResponseMessage as ResponseMessage, +) +from pydantic import BaseModel, Field + + +class ShieldModerationPassed(BaseModel): + """Shield moderation passed; no refusal.""" + + decision: Literal["passed"] = "passed" + + +class ShieldModerationBlocked(BaseModel): + """Shield moderation blocked the content; refusal details are present.""" + + decision: Literal["blocked"] = "blocked" + message: str + moderation_id: str + refusal_response: ResponseMessage + + +ShieldModerationResult = Annotated[ + ShieldModerationPassed | ShieldModerationBlocked, + Field(discriminator="decision"), +] diff --git a/src/models/common/responses/__init__.py b/src/models/common/responses/__init__.py new file mode 100644 index 000000000..6cdf7e5c7 --- /dev/null +++ b/src/models/common/responses/__init__.py @@ -0,0 +1,21 @@ +"""Shared models for the OpenAI-compatible Responses API pipeline.""" + +from models.common.responses.responses_api_params import ResponsesApiParams +from models.common.responses.responses_context import ResponsesContext +from models.common.responses.responses_conversation_context import ( + ResponsesConversationContext, +) +from models.common.responses.types import ( + IncludeParameter, + ResponseInput, + ResponseItem, +) + +__all__ = [ + "ResponseInput", + "ResponseItem", + "IncludeParameter", + "ResponsesApiParams", + "ResponsesContext", + "ResponsesConversationContext", +] diff --git a/src/models/common/responses/responses_api_params.py b/src/models/common/responses/responses_api_params.py index 6767c392e..acb219c89 100644 --- a/src/models/common/responses/responses_api_params.py +++ b/src/models/common/responses/responses_api_params.py @@ -23,8 +23,8 @@ ) from pydantic import BaseModel, Field +from models.common.responses.types import IncludeParameter, ResponseInput from utils.tool_formatter import translate_vector_store_ids_to_user_facing -from utils.types import IncludeParameter, ResponseInput # Attribute names that are echoed back in the response. _ECHOED_FIELDS: Final[set[str]] = set( @@ -161,7 +161,7 @@ def echoed_params(self, rag_id_mapping: Mapping[str, str]) -> dict[str, Any]: data = self.model_dump(include=_ECHOED_FIELDS) if self.tools is not None: tool_dicts: list[dict[str, Any]] = [] - for t in self.tools: + for t in list(self.tools): if t.type == "mcp": validated = OutputToolMCP.model_validate(t.model_dump()) tool_dicts.append(validated.model_dump()) diff --git a/src/models/common/responses/responses_context.py b/src/models/common/responses/responses_context.py index 3cb6e718f..8e61f0e77 100644 --- a/src/models/common/responses/responses_context.py +++ b/src/models/common/responses/responses_context.py @@ -7,7 +7,8 @@ from llama_stack_client import AsyncLlamaStackClient from pydantic import BaseModel, ConfigDict, Field -from utils.types import RAGContext, ShieldModerationResult +from models.common.moderation import ShieldModerationResult +from models.common.turn_summary import RAGContext class ResponsesContext(BaseModel): diff --git a/src/models/common/responses/responses_conversation_context.py b/src/models/common/responses/responses_conversation_context.py new file mode 100644 index 000000000..05229e9b5 --- /dev/null +++ b/src/models/common/responses/responses_conversation_context.py @@ -0,0 +1,33 @@ +"""Conversation resolution result model for the OpenAI-compatible responses endpoint.""" + +from typing import Optional + +from pydantic import BaseModel, ConfigDict, Field + +from models.database.conversations import UserConversation + + +class ResponsesConversationContext(BaseModel): + """Result of resolving conversation context for the responses endpoint. + + Holds the conversation ID to use for the LLM, the optional user conversation + record, and the resolved generate_topic_summary flag. Caller assigns these + to the request in outer scope instead of mutating the request inside the + resolver. + + Attributes: + conversation: Conversation ID in llama-stack format to use for the request. + user_conversation: Resolved user conversation record, or None for new ones. + generate_topic_summary: Resolved value for request.generate_topic_summary. + """ + + model_config = ConfigDict(arbitrary_types_allowed=True) + + conversation: str = Field(description="Conversation ID in llama-stack format") + user_conversation: Optional[UserConversation] = Field( + default=None, + description="Resolved user conversation record, or None for new conversations", + ) + generate_topic_summary: bool = Field( + description="Resolved value for request.generate_topic_summary", + ) diff --git a/src/models/common/responses/types.py b/src/models/common/responses/types.py new file mode 100644 index 000000000..992d5a4df --- /dev/null +++ b/src/models/common/responses/types.py @@ -0,0 +1,55 @@ +"""Type aliases for OpenAI-compatible Responses API input shapes.""" + +from typing import Literal + +from llama_stack_api.openai_responses import ( + OpenAIResponseInputFunctionToolCallOutput as FunctionToolCallOutput, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseMCPApprovalRequest as McpApprovalRequest, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseMCPApprovalResponse as McpApprovalResponse, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseMessage as ResponseMessage, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseOutputMessageFileSearchToolCall as FileSearchToolCall, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseOutputMessageFunctionToolCall as FunctionToolCall, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseOutputMessageMCPCall as McpCall, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseOutputMessageMCPListTools as McpListTools, +) +from llama_stack_api.openai_responses import ( + OpenAIResponseOutputMessageWebSearchToolCall as WebSearchToolCall, +) + +type IncludeParameter = Literal[ + "web_search_call.action.sources", + "code_interpreter_call.outputs", + "computer_call_output.output.image_url", + "file_search_call.results", + "message.input_image.image_url", + "message.output_text.logprobs", + "reasoning.encrypted_content", +] + +type ResponseItem = ( + ResponseMessage + | WebSearchToolCall + | FileSearchToolCall + | FunctionToolCallOutput + | McpCall + | McpListTools + | McpApprovalRequest + | FunctionToolCall + | McpApprovalResponse +) + +type ResponseInput = str | list[ResponseItem] diff --git a/src/models/common/transcripts.py b/src/models/common/transcripts.py new file mode 100644 index 000000000..6ff648ba3 --- /dev/null +++ b/src/models/common/transcripts.py @@ -0,0 +1,31 @@ +"""Pydantic models for persisted query/response transcript entries.""" + +from typing import Any, Optional + +from pydantic import BaseModel, Field + + +class TranscriptMetadata(BaseModel): + """Metadata for a transcript entry.""" + + provider: Optional[str] = None + model: str + query_provider: Optional[str] = None + query_model: Optional[str] = None + user_id: str + conversation_id: str + timestamp: str + + +class Transcript(BaseModel): + """Model representing a transcript entry to be stored.""" + + metadata: TranscriptMetadata + redacted_query: str + query_is_valid: bool + llm_response: str + rag_chunks: list[dict[str, Any]] = Field(default_factory=list) + truncated: bool + attachments: list[dict[str, Any]] = Field(default_factory=list) + tool_calls: list[dict[str, Any]] = Field(default_factory=list) + tool_results: list[dict[str, Any]] = Field(default_factory=list) diff --git a/src/models/common/turn_summary.py b/src/models/common/turn_summary.py new file mode 100644 index 000000000..920a17c71 --- /dev/null +++ b/src/models/common/turn_summary.py @@ -0,0 +1,104 @@ +"""RAG context, chunks, document refs, tool summaries, and per-turn aggregation. + +Used on query and streaming paths. +""" + +from typing import Any, Optional + +from pydantic import AnyUrl, BaseModel, Field + +from utils.token_counter import TokenCounter + + +class RAGChunk(BaseModel): + """Model representing a RAG chunk used in the response.""" + + content: str = Field(description="The content of the chunk") + source: Optional[str] = Field( + default=None, + description="Index name identifying the knowledge source from configuration", + ) + score: Optional[float] = Field(default=None, description="Relevance score") + attributes: Optional[dict[str, Any]] = Field( + default=None, + description="Document metadata from the RAG provider (e.g., url, title, author)", + ) + + +class ReferencedDocument(BaseModel): + """Model representing a document referenced in generating a response. + + Attributes: + doc_url: Url to the referenced doc. + doc_title: Title of the referenced doc. + """ + + doc_url: Optional[AnyUrl] = Field( + default=None, description="URL of the referenced document" + ) + + doc_title: Optional[str] = Field( + default=None, description="Title of the referenced document" + ) + + source: Optional[str] = Field( + default=None, + description="Index name identifying the knowledge source from configuration", + ) + + +class RAGContext(BaseModel): + """Result of building RAG context from all enabled pre-query RAG sources. + + Attributes: + context_text: Formatted RAG context string for injection into the query. + rag_chunks: RAG chunks from pre-query sources (BYOK + Solr). + referenced_documents: Referenced documents from pre-query sources. + """ + + context_text: str = Field(default="", description="Formatted context for injection") + rag_chunks: list[RAGChunk] = Field( + default_factory=list, + description="RAG chunks from pre-query sources", + ) + referenced_documents: list[ReferencedDocument] = Field( + default_factory=list, + description="Documents from pre-query sources", + ) + + +class ToolCallSummary(BaseModel): + """Model representing a tool call made during response generation (for tool_calls list).""" + + id: str = Field(description="ID of the tool call") + name: str = Field(description="Name of the tool called") + args: dict[str, Any] = Field( + default_factory=dict, description="Arguments passed to the tool" + ) + type: str = Field("tool_call", description="Type indicator for tool call") + + +class ToolResultSummary(BaseModel): + """Model representing a result from a tool call (for tool_results list).""" + + id: str = Field( + description="ID of the tool call/result, matches the corresponding tool call 'id'" + ) + status: str = Field( + ..., description="Status of the tool execution (e.g., 'success')" + ) + content: str = Field(..., description="Content/result returned from the tool") + type: str = Field("tool_result", description="Type indicator for tool result") + round: int = Field(..., description="Round number or step of tool execution") + + +class TurnSummary(BaseModel): + """Summary of a turn in llama stack.""" + + id: str = Field(default="", description="ID of the response") + llm_response: str = "" + tool_calls: list[ToolCallSummary] = Field(default_factory=list) + tool_results: list[ToolResultSummary] = Field(default_factory=list) + rag_chunks: list[RAGChunk] = Field(default_factory=list) + referenced_documents: list[ReferencedDocument] = Field(default_factory=list) + token_usage: TokenCounter = Field(default_factory=TokenCounter) diff --git a/src/models/context.py b/src/models/context.py index 8df6f3692..c861b883a 100644 --- a/src/models/context.py +++ b/src/models/context.py @@ -4,8 +4,9 @@ from llama_stack_client import AsyncLlamaStackClient +from models.common.moderation import ShieldModerationResult +from models.common.turn_summary import RAGContext from models.requests import QueryRequest -from utils.types import RAGContext, ShieldModerationResult @dataclass diff --git a/src/models/requests.py b/src/models/requests.py index 69b084a46..524d02346 100644 --- a/src/models/requests.py +++ b/src/models/requests.py @@ -33,8 +33,8 @@ SOLR_VECTOR_SEARCH_DEFAULT_MODE, ) from log import get_logger +from models.common.responses.types import IncludeParameter, ResponseInput from utils import suid -from utils.types import IncludeParameter, ResponseInput logger = get_logger(__name__) diff --git a/src/models/responses.py b/src/models/responses.py deleted file mode 100644 index b89827645..000000000 --- a/src/models/responses.py +++ /dev/null @@ -1,2048 +0,0 @@ -# pylint: disable=too-many-lines - -"""Models for REST API responses.""" - -from typing import Any, ClassVar, Literal, Optional, cast - -from llama_stack_api.openai_responses import ( - OpenAIResponseError as Error, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseInputToolChoice as ToolChoice, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseOutput as Output, -) -from llama_stack_api.openai_responses import ( - OpenAIResponsePrompt as Prompt, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseReasoning as Reasoning, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseText as Text, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseTool as OutputTool, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseUsage as Usage, -) -from pydantic import BaseModel, Field, computed_field -from pydantic_core import SchemaError - -from constants import MEDIA_TYPE_EVENT_STREAM -from log import get_logger -from models.api.responses.constants import SUCCESSFUL_RESPONSE_DESCRIPTION -from models.config import Configuration -from utils.types import RAGChunk, ReferencedDocument, ToolCallSummary, ToolResultSummary - -logger = get_logger(__name__) - - -class AbstractSuccessfulResponse(BaseModel): - """Base class for all successful response models.""" - - @classmethod - def openapi_response(cls) -> dict[str, Any]: - """Generate FastAPI response dict with a single example from model_config.""" - schema = cls.model_json_schema() - model_examples = schema.get("examples") - if not model_examples: - raise SchemaError(f"Examples not found in {cls.__name__}") - example_value = model_examples[0] - content = {"application/json": {"example": example_value}} - - return { - "description": SUCCESSFUL_RESPONSE_DESCRIPTION, - "model": cls, - "content": content, - } - - -class AbstractDeleteResponse(BaseModel): - """Base model for successful delete responses.""" - - deleted: bool = Field( - ..., - description="Whether the deletion was successful.", - examples=[True, False], - ) - resource_name: ClassVar[str] - - @computed_field - def response(self) -> str: - """Human-readable outcome of the delete operation.""" - return ( - f"{self.resource_name} deleted successfully" - if self.deleted - else f"{self.resource_name} not found" - ) - - @classmethod - def openapi_response(cls) -> dict[str, Any]: - """Build FastAPI/OpenAPI metadata with named application/json examples. - - Returns: - A response dict with description, model, and content keys. - - Raises: - SchemaError: If the model JSON schema has no examples list. - """ - schema = cls.model_json_schema() - model_examples = schema.get("examples") - if not model_examples: - raise SchemaError(f"Examples not found in {cls.__name__}") - - examples: dict[str, dict[str, Any]] = {} - for index, example in enumerate(model_examples): - if "label" not in example: - raise SchemaError( - f"Example at index {index} in {cls.__name__} has no label" - ) - if "value" not in example: - raise SchemaError( - f"Example at index {index} in {cls.__name__} has no value" - ) - examples[example["label"]] = {"value": example["value"]} - - return { - "description": SUCCESSFUL_RESPONSE_DESCRIPTION, - "model": cls, - "content": {"application/json": {"examples": examples}}, - } - - -class ModelsResponse(AbstractSuccessfulResponse): - """Model representing a response to models request.""" - - models: list[dict[str, Any]] = Field( - ..., - description="List of models available", - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "models": [ - { - "identifier": "openai/gpt-4-turbo", - "metadata": {}, - "api_model_type": "llm", - "provider_id": "openai", - "type": "model", - "provider_resource_id": "gpt-4-turbo", - "model_type": "llm", - }, - ], - } - ] - } - } - - -class ToolsResponse(AbstractSuccessfulResponse): - """Model representing a response to tools request.""" - - tools: list[dict[str, Any]] = Field( - description=( - "List of tools available from all configured MCP servers and built-in toolgroups" - ), - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "tools": [ - { - "identifier": "filesystem_read", - "description": "Read contents of a file from the filesystem", - "parameters": [ - { - "name": "path", - "description": "Path to the file to read", - "parameter_type": "string", - "required": True, - "default": None, - } - ], - "provider_id": "model-context-protocol", - "toolgroup_id": "filesystem-tools", - "server_source": "http://localhost:3000", - "type": "tool", - } - ], - } - ] - } - } - - -class MCPServerAuthInfo(BaseModel): - """Information about MCP server client authentication options.""" - - name: str = Field(..., description="MCP server name") - client_auth_headers: list[str] = Field( - ..., - description="List of authentication header names for client-provided tokens", - ) - - -class MCPClientAuthOptionsResponse(AbstractSuccessfulResponse): - """Response containing MCP servers that accept client-provided authorization.""" - - servers: list[MCPServerAuthInfo] = Field( - default_factory=list, - description="List of MCP servers that accept client-provided authorization", - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "servers": [ - { - "name": "github", - "client_auth_headers": ["Authorization"], - }, - { - "name": "gitlab", - "client_auth_headers": ["Authorization", "X-API-Key"], - }, - ] - } - ] - } - } - - -class MCPServerInfo(BaseModel): - """Information about a registered MCP server. - - Attributes: - name: Unique name of the MCP server. - url: URL of the MCP server endpoint. - provider_id: MCP provider identification. - source: Whether the server was registered statically (config) or dynamically (api). - """ - - name: str = Field(..., description="MCP server name") - url: str = Field(..., description="MCP server URL") - provider_id: str = Field(..., description="MCP provider identification") - source: str = Field( - ..., - description="How the server was registered: 'config' (static) or 'api' (dynamic)", - examples=["config", "api"], - ) - - -class MCPServerRegistrationResponse(AbstractSuccessfulResponse): - """Response for a successful MCP server registration.""" - - name: str = Field(..., description="Registered MCP server name") - url: str = Field(..., description="Registered MCP server URL") - provider_id: str = Field(..., description="MCP provider identification") - message: str = Field(..., description="Status message") - - model_config = { - "json_schema_extra": { - "examples": [ - { - "name": "mcp-integration-tools", - "url": "http://host.docker.internal:7008/api/mcp-actions/v1", - "provider_id": "model-context-protocol", - "message": "MCP server 'mcp-integration-tools' registered successfully", - } - ] - } - } - - -class MCPServerListResponse(AbstractSuccessfulResponse): - """Response listing all registered MCP servers.""" - - servers: list[MCPServerInfo] = Field( - default_factory=list, - description="List of all registered MCP servers (static and dynamic)", - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "servers": [ - { - "name": "mcp-integration-tools", - "url": "http://host.docker.internal:7008/api/mcp-actions/v1", - "provider_id": "model-context-protocol", - "source": "config", - }, - { - "name": "test-mcp-server", - "url": "http://host.docker.internal:8888/mcp", - "provider_id": "model-context-protocol", - "source": "api", - }, - ] - } - ] - } - } - - -class MCPServerDeleteResponse(AbstractSuccessfulResponse): - """Response for a successful MCP server deletion.""" - - name: str = Field(..., description="Deleted MCP server name") - message: str = Field(..., description="Status message") - - model_config = { - "json_schema_extra": { - "examples": [ - { - "name": "test-mcp-server", - "message": "MCP server 'test-mcp-server' unregistered successfully", - } - ] - } - } - - -class ShieldsResponse(AbstractSuccessfulResponse): - """Model representing a response to shields request.""" - - shields: list[dict[str, Any]] = Field( - ..., - description="List of shields available", - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "shields": [ - { - "identifier": "lightspeed_question_validity-shield", - "provider_resource_id": "lightspeed_question_validity-shield", - "provider_id": "lightspeed_question_validity", - "type": "shield", - "params": {}, - } - ], - } - ] - } - } - - -class RAGInfoResponse(AbstractSuccessfulResponse): - """Model representing a response with information about RAG DB.""" - - id: str = Field( - ..., description="Vector DB unique ID", examples=["vs_00000000_0000_0000"] - ) - name: Optional[str] = Field( - None, - description="Human readable vector DB name", - examples=["Faiss Store with Knowledge base"], - ) - created_at: int = Field( - ..., - description="When the vector store was created, represented as Unix time", - examples=[1763391371], - ) - last_active_at: Optional[int] = Field( - None, - description="When the vector store was last active, represented as Unix time", - examples=[1763391371], - ) - usage_bytes: int = Field( - ..., - description="Storage byte(s) used by this vector DB", - examples=[0], - ) - expires_at: Optional[int] = Field( - None, - description="When the vector store expires, represented as Unix time", - examples=[1763391371], - ) - object: str = Field( - ..., - description="Object type", - examples=["vector_store"], - ) - status: str = Field( - ..., - description="Vector DB status", - examples=["completed"], - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "id": "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", - "name": "Faiss Store with Knowledge base", - "created_at": 1763391371, - "last_active_at": 1763391371, - "usage_bytes": 1024000, - "expires_at": None, - "object": "vector_store", - "status": "completed", - } - ] - } - } - - -class RAGListResponse(AbstractSuccessfulResponse): - """Model representing a response to list RAGs request.""" - - rags: list[str] = Field( - ..., - title="RAG list response", - description="List of RAG identifiers", - examples=[ - "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", - "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3", - ], - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "rags": [ - "vs_00000000-cafe-babe-0000-000000000000", - "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3", - "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3", - ] - } - ] - } - } - - -class ProvidersListResponse(AbstractSuccessfulResponse): - """Model representing a response to providers request.""" - - providers: dict[str, list[dict[str, Any]]] = Field( - ..., - description="List of available API types and their corresponding providers", - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "providers": { - "inference": [ - { - "provider_id": "sentence-transformers", - "provider_type": "inline::sentence-transformers", - }, - { - "provider_id": "openai", - "provider_type": "remote::openai", - }, - ], - "agents": [ - { - "provider_id": "meta-reference", - "provider_type": "inline::meta-reference", - }, - ], - }, - } - ] - } - } - - -class ProviderResponse(AbstractSuccessfulResponse): - """Model representing a response to get specific provider request.""" - - api: str = Field( - ..., - description="The API this provider implements", - ) - config: dict[str, Any] = Field( - ..., - description="Provider configuration parameters", - ) - health: dict[str, Any] = Field( - ..., - description="Current health status of the provider", - ) - provider_id: str = Field(..., description="Unique provider identifier") - provider_type: str = Field(..., description="Provider implementation type") - - model_config = { - "json_schema_extra": { - "examples": [ - { - "api": "inference", - "config": {"api_key": "********"}, - "health": {"status": "OK", "message": "Healthy"}, - "provider_id": "openai", - "provider_type": "remote::openai", - } - ] - } - } - - -class ConversationData(BaseModel): - """Model representing conversation data returned by cache list operations. - - Attributes: - conversation_id: The conversation ID - topic_summary: The topic summary for the conversation (can be None) - last_message_timestamp: The timestamp of the last message in the conversation - """ - - conversation_id: str - topic_summary: Optional[str] - last_message_timestamp: float - - -class QueryResponse(AbstractSuccessfulResponse): - """Model representing LLM response to a query. - - Attributes: - conversation_id: The optional conversation ID (UUID). - response: The response. - rag_chunks: Deprecated. List of RAG chunks used to generate the response. - This information is now available in tool_results under file_search_call type. - referenced_documents: The URLs and titles for the documents used to generate the response. - tool_calls: List of tool calls made during response generation. - tool_results: List of tool results. - truncated: Whether conversation history was truncated. - input_tokens: Number of tokens sent to LLM. - output_tokens: Number of tokens received from LLM. - available_quotas: Quota available as measured by all configured quota limiters. - """ - - conversation_id: Optional[str] = Field( - None, - description="The optional conversation ID (UUID)", - examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"], - ) - - response: str = Field( - description="Response from LLM", - examples=[ - "Kubernetes is an open-source container orchestration system for automating ..." - ], - ) - - rag_chunks: list[RAGChunk] = Field( - default_factory=list, - description="Deprecated: List of RAG chunks used to generate the response.", - ) - - referenced_documents: list[ReferencedDocument] = Field( - default_factory=list, - description="List of documents referenced in generating the response", - examples=[ - [ - { - "doc_url": "https://docs.openshift.com/" - "container-platform/4.15/operators/olm/index.html", - "doc_title": "Operator Lifecycle Manager (OLM)", - } - ] - ], - ) - - truncated: bool = Field( - False, - description="Deprecated:Whether conversation history was truncated", - examples=[False, True], - ) - - input_tokens: int = Field( - 0, - description="Number of tokens sent to LLM", - examples=[150, 250, 500], - ) - - output_tokens: int = Field( - 0, - description="Number of tokens received from LLM", - examples=[50, 100, 200], - ) - - available_quotas: dict[str, int] = Field( - default_factory=dict, - description="Quota available as measured by all configured quota limiters", - examples=[{"daily": 1000, "monthly": 50000}], - ) - - tool_calls: list[ToolCallSummary] = Field( - default_factory=list, - description="List of tool calls made during response generation", - ) - - tool_results: list[ToolResultSummary] = Field( - default_factory=list, - description="List of tool results", - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "response": "Operator Lifecycle Manager (OLM) helps users install...", - "referenced_documents": [ - { - "doc_url": "https://docs.openshift.com/container-platform/4.15/" - "operators/understanding/olm/olm-understanding-olm.html", - "doc_title": "Operator Lifecycle Manager concepts and resources", - }, - ], - "truncated": False, - "input_tokens": 123, - "output_tokens": 456, - "available_quotas": { - "UserQuotaLimiter": 998911, - "ClusterQuotaLimiter": 998911, - }, - "tool_calls": [ - {"name": "tool1", "args": {}, "id": "1", "type": "tool_call"} - ], - "tool_results": [ - { - "id": "1", - "status": "success", - "content": "bla", - "type": "tool_result", - "round": 1, - } - ], - } - ] - } - } - - -class StreamingQueryResponse(AbstractSuccessfulResponse): - """Documentation-only model for streaming query responses using Server-Sent Events (SSE).""" - - @classmethod - def openapi_response(cls) -> dict[str, Any]: - """Generate FastAPI response dict for SSE streaming with examples. - - Note: This is used for OpenAPI documentation only. The actual endpoint - returns a StreamingResponse object, not this Pydantic model. - """ - schema = cls.model_json_schema() - model_examples = schema.get("examples") - if not model_examples: - raise SchemaError(f"Examples not found in {cls.__name__}") - example_value = model_examples[0] - content = { - MEDIA_TYPE_EVENT_STREAM: { - "schema": {"type": "string", "format": MEDIA_TYPE_EVENT_STREAM}, - "example": example_value, - } - } - - return { - "description": SUCCESSFUL_RESPONSE_DESCRIPTION, - "content": content, - # Note: No "model" key since we're not actually serializing this model - } - - model_config = { - "json_schema_extra": { - "examples": [ - ( - 'data: {"event": "start", "data": {' - '"conversation_id": "123e4567-e89b-12d3-a456-426614174000", ' - '"request_id": "123e4567-e89b-12d3-a456-426614174001"}}\n\n' - 'data: {"event": "token", "data": {' - '"id": 0, "token": "No Violation"}}\n\n' - 'data: {"event": "token", "data": {' - '"id": 1, "token": ""}}\n\n' - 'data: {"event": "token", "data": {' - '"id": 2, "token": "Hello"}}\n\n' - 'data: {"event": "token", "data": {' - '"id": 3, "token": "!"}}\n\n' - 'data: {"event": "token", "data": {' - '"id": 4, "token": " How"}}\n\n' - 'data: {"event": "token", "data": {' - '"id": 5, "token": " can"}}\n\n' - 'data: {"event": "token", "data": {' - '"id": 6, "token": " I"}}\n\n' - 'data: {"event": "token", "data": {' - '"id": 7, "token": " assist"}}\n\n' - 'data: {"event": "token", "data": {' - '"id": 8, "token": " you"}}\n\n' - 'data: {"event": "token", "data": {' - '"id": 9, "token": " today"}}\n\n' - 'data: {"event": "token", "data": {' - '"id": 10, "token": "?"}}\n\n' - 'data: {"event": "turn_complete", "data": {' - '"token": "Hello! How can I assist you today?"}}\n\n' - 'data: {"event": "end", "data": {' - '"referenced_documents": [], ' - '"truncated": null, "input_tokens": 11, "output_tokens": 19}, ' - '"available_quotas": {}}\n\n' - ), - ] - } - } - - -class StreamingInterruptResponse(AbstractSuccessfulResponse): - """Model representing a response to a streaming interrupt request. - - Attributes: - request_id: The streaming request ID targeted by the interrupt call. - interrupted: Whether an in-progress stream was interrupted. - message: Human-readable interruption status message. - - Example: - ```python - response = StreamingInterruptResponse( - request_id="123e4567-e89b-12d3-a456-426614174000", - interrupted=True, - message="Streaming request interrupted", - ) - ``` - """ - - request_id: str = Field( - description="The streaming request ID targeted by the interrupt call", - examples=["123e4567-e89b-12d3-a456-426614174000"], - ) - - interrupted: bool = Field( - description="Whether an in-progress stream was interrupted", - examples=[True], - ) - - message: str = Field( - description="Human-readable interruption status message", - examples=["Streaming request interrupted"], - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "request_id": "123e4567-e89b-12d3-a456-426614174000", - "interrupted": True, - "message": "Streaming request interrupted", - } - ] - } - } - - -class InfoResponse(AbstractSuccessfulResponse): - """Model representing a response to an info request. - - Attributes: - name: Service name. - service_version: Service version. - llama_stack_version: Llama Stack version. - - Example: - ```python - info_response = InfoResponse( - name="Lightspeed Stack", - service_version="1.0.0", - llama_stack_version="0.2.22", - ) - ``` - """ - - name: str = Field( - description="Service name", - examples=["Lightspeed Stack"], - ) - - service_version: str = Field( - description="Service version", - examples=["0.1.0", "0.2.0", "1.0.0"], - ) - - llama_stack_version: str = Field( - description="Llama Stack version", - examples=["0.2.1", "0.2.2", "0.2.18", "0.2.21", "0.2.22"], - ) - - # provides examples for /docs endpoint - model_config = { - "json_schema_extra": { - "examples": [ - { - "name": "Lightspeed Stack", - "service_version": "1.0.0", - "llama_stack_version": "1.0.0", - } - ] - } - } - - -class ProviderHealthStatus(BaseModel): - """Model representing the health status of a provider. - - Attributes: - provider_id: The ID of the provider. - status: The health status ('ok', 'unhealthy', 'not_implemented'). - message: Optional message about the health status. - """ - - provider_id: str = Field( - description="The ID of the provider", - ) - status: str = Field( - description="The health status", - examples=["ok", "unhealthy", "not_implemented"], - ) - message: Optional[str] = Field( - None, - description="Optional message about the health status", - examples=["All systems operational", "Llama Stack is unavailable"], - ) - - -class ReadinessResponse(AbstractSuccessfulResponse): - """Model representing response to a readiness request. - - Attributes: - ready: If service is ready. - reason: The reason for the readiness. - providers: List of unhealthy providers in case of readiness failure. - - Example: - ```python - readiness_response = ReadinessResponse( - ready=False, - reason="Service is not ready", - providers=[ - ProviderHealthStatus( - provider_id="ollama", - status="unhealthy", - message="Server is unavailable" - ) - ] - ) - ``` - """ - - ready: bool = Field( - ..., - description="Flag indicating if service is ready", - examples=[True, False], - ) - - reason: str = Field( - ..., - description="The reason for the readiness", - examples=["Service is ready"], - ) - - providers: list[ProviderHealthStatus] = Field( - ..., - description="List of unhealthy providers in case of readiness failure.", - examples=[], - ) - - # provides examples for /docs endpoint - model_config = { - "json_schema_extra": { - "examples": [ - { - "ready": True, - "reason": "Service is ready", - "providers": [], - } - ] - } - } - - -class LivenessResponse(AbstractSuccessfulResponse): - """Model representing a response to a liveness request. - - Attributes: - alive: If app is alive. - - Example: - ```python - liveness_response = LivenessResponse(alive=True) - ``` - """ - - alive: bool = Field( - ..., - description="Flag indicating that the app is alive", - examples=[True, False], - ) - - # provides examples for /docs endpoint - model_config = { - "json_schema_extra": { - "examples": [ - { - "alive": True, - } - ] - } - } - - -class FeedbackResponse(AbstractSuccessfulResponse): - """Model representing a response to a feedback request. - - Attributes: - response: The response of the feedback request. - - Example: - ```python - feedback_response = FeedbackResponse(response="feedback received") - ``` - """ - - response: str = Field( - ..., - description="The response of the feedback request.", - examples=["feedback received"], - ) - - # provides examples for /docs endpoint - model_config = { - "json_schema_extra": { - "examples": [ - { - "response": "feedback received", - } - ] - } - } - - -class StatusResponse(AbstractSuccessfulResponse): - """Model representing a response to a status request. - - Attributes: - functionality: The functionality of the service. - status: The status of the service. - - Example: - ```python - status_response = StatusResponse( - functionality="feedback", - status={"enabled": True}, - ) - ``` - """ - - functionality: str = Field( - ..., - description="The functionality of the service", - examples=["feedback"], - ) - - status: dict = Field( - ..., - description="The status of the service", - examples=[{"enabled": True}], - ) - - # provides examples for /docs endpoint - model_config = { - "json_schema_extra": { - "examples": [ - { - "functionality": "feedback", - "status": {"enabled": True}, - } - ] - } - } - - -class AuthorizedResponse(AbstractSuccessfulResponse): - """Model representing a response to an authorization request. - - Attributes: - user_id: The ID of the logged in user. - username: The name of the logged in user. - skip_userid_check: Whether to skip the user ID check. - """ - - user_id: str = Field( - ..., - description="User ID, for example UUID", - examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"], - ) - username: str = Field( - ..., - description="User name", - examples=["John Doe", "Adam Smith"], - ) - skip_userid_check: bool = Field( - ..., - description="Whether to skip the user ID check", - examples=[True, False], - ) - - # provides examples for /docs endpoint - model_config = { - "json_schema_extra": { - "examples": [ - { - "user_id": "123e4567-e89b-12d3-a456-426614174000", - "username": "user1", - "skip_userid_check": False, - } - ] - } - } - - -class Message(BaseModel): - """Model representing a message in a conversation turn. - - Attributes: - content: The message content. - type: The type of message. - referenced_documents: Optional list of documents referenced in an assistant response. - """ - - content: str = Field( - ..., - description="The message content", - examples=["Hello, how can I help you?"], - ) - type: Literal["user", "assistant", "system", "developer"] = Field( - ..., - description="The type of message", - examples=["user", "assistant", "system", "developer"], - ) - referenced_documents: Optional[list[ReferencedDocument]] = Field( - None, - description="List of documents referenced in the response (assistant messages only)", - ) - - -class ConversationTurn(BaseModel): - """Model representing a single conversation turn. - - Attributes: - messages: List of messages in this turn. - tool_calls: List of tool calls made in this turn. - tool_results: List of tool results from this turn. - provider: Provider identifier used for this turn. - model: Model identifier used for this turn. - started_at: ISO 8601 timestamp when the turn started. - completed_at: ISO 8601 timestamp when the turn completed. - """ - - messages: list[Message] = Field( - default_factory=list, - description="List of messages in this turn", - ) - tool_calls: list[ToolCallSummary] = Field( - default_factory=list, - description="List of tool calls made in this turn", - ) - tool_results: list[ToolResultSummary] = Field( - default_factory=list, - description="List of tool results from this turn", - ) - provider: str = Field( - ..., - description="Provider identifier used for this turn", - examples=["openai"], - ) - model: str = Field( - ..., - description="Model identifier used for this turn", - examples=["gpt-4o-mini"], - ) - started_at: str = Field( - ..., - description="ISO 8601 timestamp when the turn started", - examples=["2024-01-01T00:01:00Z"], - ) - completed_at: str = Field( - ..., - description="ISO 8601 timestamp when the turn completed", - examples=["2024-01-01T00:01:05Z"], - ) - - -class ConversationResponse(AbstractSuccessfulResponse): - """Model representing a response for retrieving a conversation. - - Attributes: - conversation_id: The conversation ID (UUID). - chat_history: The chat history as a list of conversation turns. - """ - - conversation_id: str = Field( - ..., - description="Conversation ID (UUID)", - examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"], - ) - - chat_history: list[ConversationTurn] = Field( - ..., - description="The simplified chat history as a list of conversation turns", - examples=[ - { - "messages": [ - {"content": "Hello", "type": "user"}, - {"content": "Hi there!", "type": "assistant"}, - ], - "tool_calls": [], - "tool_results": [], - "provider": "openai", - "model": "gpt-4o-mini", - "started_at": "2024-01-01T00:01:00Z", - "completed_at": "2024-01-01T00:01:05Z", - } - ], - ) - - # provides examples for /docs endpoint - model_config = { - "json_schema_extra": { - "examples": [ - { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "chat_history": [ - { - "messages": [ - {"content": "Hello", "type": "user"}, - {"content": "Hi there!", "type": "assistant"}, - ], - "tool_calls": [], - "tool_results": [], - "provider": "openai", - "model": "gpt-4o-mini", - "started_at": "2024-01-01T00:01:00Z", - "completed_at": "2024-01-01T00:01:05Z", - } - ], - } - ] - } - } - - -class ConversationDeleteResponse(AbstractDeleteResponse): - """Response for deleting a conversation.""" - - resource_name: ClassVar[str] = "Conversation" - conversation_id: str = Field( - ..., - description="Conversation identifier that was passed to delete.", - examples=["123e4567-e89b-12d3-a456-426614174000"], - ) - - @computed_field(json_schema_extra={"deprecated": True}) - def success(self) -> bool: - """Successful response flag.""" - logger.warning("DEPRECATED: Will be removed in a future release.") - return True - - model_config = { - "json_schema_extra": { - "examples": [ - { - "label": "deleted", - "value": { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "deleted": True, - "response": "Conversation deleted successfully", - }, - }, - { - "label": "not found", - "value": { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "deleted": False, - "response": "Conversation not found", - }, - }, - ] - } - } - - -class ConversationDetails(BaseModel): - """Model representing the details of a user conversation. - - Attributes: - conversation_id: The conversation ID (UUID). - created_at: When the conversation was created. - last_message_at: When the last message was sent. - message_count: Number of user messages in the conversation. - last_used_model: The last model used for the conversation. - last_used_provider: The provider of the last used model. - topic_summary: The topic summary for the conversation. - - Example: - ```python - conversation = ConversationDetails( - conversation_id="123e4567-e89b-12d3-a456-426614174000", - created_at="2024-01-01T00:00:00Z", - last_message_at="2024-01-01T00:05:00Z", - message_count=5, - last_used_model="gemini/gemini-2.0-flash", - last_used_provider="gemini", - topic_summary="Openshift Microservices Deployment Strategies", - ) - ``` - """ - - conversation_id: str = Field( - ..., - description="Conversation ID (UUID)", - examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"], - ) - - created_at: Optional[str] = Field( - None, - description="When the conversation was created", - examples=["2024-01-01T01:00:00Z"], - ) - - last_message_at: Optional[str] = Field( - None, - description="When the last message was sent", - examples=["2024-01-01T01:00:00Z"], - ) - - message_count: Optional[int] = Field( - None, - description="Number of user messages in the conversation", - examples=[42], - ) - - last_used_model: Optional[str] = Field( - None, - description="Identification of the last model used for the conversation", - examples=["gpt-4-turbo", "gpt-3.5-turbo-0125"], - ) - - last_used_provider: Optional[str] = Field( - None, - description="Identification of the last provider used for the conversation", - examples=["openai", "gemini"], - ) - - topic_summary: Optional[str] = Field( - None, - description="Topic summary for the conversation", - examples=["Openshift Microservices Deployment Strategies"], - ) - - -class ConversationsListResponse(AbstractSuccessfulResponse): - """Model representing a response for listing conversations of a user. - - Attributes: - conversations: List of conversation details associated with the user. - """ - - conversations: list[ConversationDetails] - - model_config = { - "json_schema_extra": { - "examples": [ - { - "conversations": [ - { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "created_at": "2024-01-01T00:00:00Z", - "last_message_at": "2024-01-01T00:05:00Z", - "message_count": 5, - "last_used_model": "gemini/gemini-2.0-flash", - "last_used_provider": "gemini", - "topic_summary": "Openshift Microservices Deployment Strategies", - }, - { - "conversation_id": "456e7890-e12b-34d5-a678-901234567890", - "created_at": "2024-01-01T01:00:00Z", - "message_count": 2, - "last_used_model": "gemini/gemini-2.5-flash", - "last_used_provider": "gemini", - "topic_summary": "RHDH Purpose Summary", - }, - ] - } - ] - } - } - - -class ConversationsListResponseV2(AbstractSuccessfulResponse): - """Model representing a response for listing conversations of a user. - - Attributes: - conversations: List of conversation data associated with the user. - """ - - conversations: list[ConversationData] - - model_config = { - "json_schema_extra": { - "examples": [ - { - "conversations": [ - { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "topic_summary": "Openshift Microservices Deployment Strategies", - "last_message_timestamp": 1704067200.0, - } - ], - } - ] - } - } - - -class FeedbackStatusUpdateResponse(AbstractSuccessfulResponse): - """ - Model representing a response to a feedback status update request. - - Attributes: - status: The previous and current status of the service and who updated it. - - Example: - ```python - status_response = StatusResponse( - status={ - "previous_status": true, - "updated_status": false, - "updated_by": "user/test", - "timestamp": "2023-03-15 12:34:56" - }, - ) - ``` - """ - - status: dict - - # provides examples for /docs endpoint - model_config = { - "json_schema_extra": { - "examples": [ - { - "status": { - "previous_status": True, - "updated_status": False, - "updated_by": "user/test", - "timestamp": "2023-03-15 12:34:56", - }, - } - ] - } - } - - -class ConversationUpdateResponse(AbstractSuccessfulResponse): - """Model representing a response for updating a conversation topic summary. - - Attributes: - conversation_id: The conversation ID (UUID) that was updated. - success: Whether the update was successful. - message: A message about the update result. - - Example: - ```python - update_response = ConversationUpdateResponse( - conversation_id="123e4567-e89b-12d3-a456-426614174000", - success=True, - message="Topic summary updated successfully", - ) - ``` - """ - - conversation_id: str = Field( - ..., - description="The conversation ID (UUID) that was updated", - examples=["123e4567-e89b-12d3-a456-426614174000"], - ) - success: bool = Field( - ..., - description="Whether the update was successful", - examples=[True], - ) - message: str = Field( - ..., - description="A message about the update result", - examples=["Topic summary updated successfully"], - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "success": True, - "message": "Topic summary updated successfully", - } - ] - } - } - - -class ConfigurationResponse(AbstractSuccessfulResponse): - """Success response model for the config endpoint.""" - - configuration: Configuration - - model_config = { - "json_schema_extra": { - "examples": [ - { - "configuration": { - "name": "lightspeed-stack", - "service": { - "host": "localhost", - "port": 8080, - "auth_enabled": False, - "workers": 1, - "color_log": True, - "access_log": True, - "tls_config": { - "tls_certificate_path": None, - "tls_key_path": None, - "tls_key_password": None, - }, - "cors": { - "allow_origins": ["*"], - "allow_credentials": False, - "allow_methods": ["*"], - "allow_headers": ["*"], - }, - }, - "llama_stack": { - "url": "http://localhost:8321", - "api_key": "*****", - "use_as_library_client": False, - "library_client_config_path": None, - }, - "user_data_collection": { - "feedback_enabled": True, - "feedback_storage": "/tmp/data/feedback", - "transcripts_enabled": False, - "transcripts_storage": "/tmp/data/transcripts", - }, - "database": { - "sqlite": {"db_path": "/tmp/lightspeed-stack.db"}, - "postgres": None, - }, - "mcp_servers": [ - { - "name": "server1", - "provider_id": "provider1", - "url": "http://url.com:1", - }, - ], - "authentication": { - "module": "noop", - "skip_tls_verification": False, - }, - "authorization": {"access_rules": []}, - "customization": None, - "inference": { - "default_model": "gpt-4-turbo", - "default_provider": "openai", - }, - "conversation_cache": { - "type": None, - "memory": None, - "sqlite": None, - "postgres": None, - }, - "byok_rag": [], - "quota_handlers": { - "sqlite": None, - "postgres": None, - "limiters": [], - "scheduler": {"period": 1}, - "enable_token_history": False, - }, - } - } - ] - } - } - - -class ResponsesResponse(AbstractSuccessfulResponse): - """Model representing a response from the Responses API following LCORE specification. - - Attributes: - created_at: Unix timestamp when the response was created. - completed_at: Unix timestamp when the response was completed, if applicable. - error: Error details if the response failed or was blocked. - id: Unique identifier for this response. - model: Model identifier in "provider/model" format used for generation. - object: Object type identifier, always "response". - output: List of structured output items containing messages, tool calls, and - other content. This is the primary response content. - parallel_tool_calls: Whether the model can make multiple tool calls in parallel. - previous_response_id: Identifier of the previous response in a multi-turn - conversation. - prompt: The input prompt object that was sent to the model. - status: Current status of the response (e.g., "completed", "blocked", - "in_progress"). - temperature: Temperature parameter used for generation (controls randomness). - text: Text response configuration object used for OpenAI responses. - top_p: Top-p sampling parameter used for generation. - tools: List of tools available to the model during generation. - tool_choice: Tool selection strategy used (e.g., "auto", "required", "none"). - truncation: Strategy used for handling content that exceeds context limits. - usage: Token usage statistics including input_tokens, output_tokens, and - total_tokens. - instructions: System instructions or guidelines provided to the model. - max_tool_calls: Maximum number of tool calls allowed in a single response. - reasoning: Reasoning configuration (effort level) used for the response. - max_output_tokens: Upper bound for tokens generated in the response. - safety_identifier: Safety/guardrail identifier applied to the request. - metadata: Additional metadata dictionary with custom key-value pairs. - store: Whether the response was stored. - conversation: Conversation ID linking this response to a conversation thread - (LCORE-specific). - available_quotas: Remaining token quotas for the user (LCORE-specific). - output_text: Aggregated text output from all output_text items in the - output array. - """ - - created_at: int - completed_at: Optional[int] = None - error: Optional[Error] = None - id: str - model: str - object: Literal["response"] = "response" - output: list[Output] - parallel_tool_calls: bool = True - previous_response_id: Optional[str] = None - prompt: Optional[Prompt] = None - status: str - temperature: Optional[float] = None - text: Optional[Text] = None - top_p: Optional[float] = None - tools: Optional[list[OutputTool]] = None - tool_choice: Optional[ToolChoice] = None - truncation: Optional[str] = None - usage: Optional[Usage] = None - instructions: Optional[str] = None - max_tool_calls: Optional[int] = None - reasoning: Optional[Reasoning] = None - max_output_tokens: Optional[int] = None - safety_identifier: Optional[str] = None - metadata: Optional[dict[str, str]] = None - store: Optional[bool] = None - # LCORE-specific attributes - conversation: Optional[str] = None - available_quotas: dict[str, int] - output_text: str - - model_config = { - "json_schema_extra": { - "examples": [ - { - "created_at": 1704067200, - "completed_at": 1704067250, - "id": "resp_abc123", - "model": "openai/gpt-4-turbo", - "object": "response", - "output": [ - { - "type": "message", - "role": "assistant", - "content": [ - { - "type": "output_text", - "text": ( - "Kubernetes is an open-source container " - "orchestration system..." - ), - } - ], - } - ], - "parallel_tool_calls": True, - "status": "completed", - "temperature": 0.7, - "text": {"format": {"type": "text"}}, - "usage": { - "input_tokens": 100, - "output_tokens": 50, - "total_tokens": 150, - "input_tokens_details": {"cached_tokens": 0}, - "output_tokens_details": {"reasoning_tokens": 0}, - }, - "instructions": "You are a helpful assistant", - "store": True, - "conversation": "0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e", - "available_quotas": {"daily": 1000, "monthly": 50000}, - "output_text": ( - "Kubernetes is an open-source container " - "orchestration system..." - ), - } - ], - "sse_example": ( - "event: response.created\n" - 'data: {"type":"response.created","sequence_number":0,' - '"response":{"id":"resp_abc","object":"response",' - '"created_at":1704067200,"status":"in_progress","model":"openai/gpt-4o-mini",' - '"output":[],"store":true,"text":{"format":{"type":"text"}},' - '"conversation":"0d21ba731f21f798dc9680125d5d6f49",' - '"available_quotas":{},"output_text":""}}\n\n' - "event: response.output_item.added\n" - 'data: {"type":"response.output_item.added","sequence_number":1,' - '"response_id":"resp_abc","output_index":0,' - '"item":{"id":"msg_abc","type":"message","status":"in_progress",' - '"role":"assistant","content":[]}}\n\n' - "...\n\n" - "event: response.completed\n" - 'data: {"type":"response.completed","sequence_number":30,' - '"response":{"id":"resp_abc","object":"response",' - '"created_at":1704067200,"status":"completed","model":"openai/gpt-4o-mini",' - '"output":[{"id":"msg_abc","type":"message","status":"completed",' - '"role":"assistant","content":[{"type":"output_text",' - '"text":"Hello! How can I help?","annotations":[]}]}],' - '"store":true,"text":{"format":{"type":"text"}},' - '"usage":{"input_tokens":10,"output_tokens":6,"total_tokens":16,' - '"input_tokens_details":{"cached_tokens":0},' - '"output_tokens_details":{"reasoning_tokens":0}},' - '"conversation":"0d21ba731f21f798dc9680125d5d6f49",' - '"available_quotas":{"daily":1000,"monthly":50000},' - '"output_text":"Hello! How can I help?"}}\n\n' - "data: [DONE]\n\n" - ), - } - } - - @classmethod - def openapi_response(cls) -> dict[str, Any]: - """ - Build OpenAPI response dict with application/json and text/event-stream. - - Uses the single JSON example from the model schema and adds - text/event-stream example from json_schema_extra.sse_example. - """ - schema = cls.model_json_schema() - model_examples = schema.get("examples", []) - json_example = model_examples[0] if model_examples else None - - schema_extra = ( - cast(dict[str, Any], dict(cls.model_config)).get("json_schema_extra") or {} - ) - sse_example = schema_extra.get("sse_example", "") - - content: dict[str, Any] = { - "application/json": {"example": json_example} if json_example else {}, - "text/event-stream": { - "schema": {"type": "string"}, - "example": sse_example, - }, - } - - return { - "description": SUCCESSFUL_RESPONSE_DESCRIPTION, - "model": cls, - "content": content, - } - - -class VectorStoreResponse(AbstractSuccessfulResponse): - """Response model containing a single vector store. - - Attributes: - id: Vector store ID. - name: Vector store name. - created_at: Unix timestamp when created. - last_active_at: Unix timestamp of last activity. - expires_at: Optional Unix timestamp when it expires. - status: Vector store status. - usage_bytes: Storage usage in bytes. - metadata: Optional metadata dictionary for storing session information. - """ - - id: str = Field(..., description="Vector store ID") - name: str = Field(..., description="Vector store name") - created_at: int = Field(..., description="Unix timestamp when created") - last_active_at: Optional[int] = Field( - None, description="Unix timestamp of last activity" - ) - expires_at: Optional[int] = Field( - None, description="Unix timestamp when it expires" - ) - status: str = Field(..., description="Vector store status") - usage_bytes: int = Field(default=0, description="Storage usage in bytes") - metadata: Optional[dict[str, Any]] = Field( - None, - description="Metadata dictionary for storing session information", - examples=[ - {"conversation_id": "conv_123", "document_ids": ["doc_456", "doc_789"]} - ], - ) - - model_config = { - "extra": "forbid", - "json_schema_extra": { - "examples": [ - { - "id": "vs_abc123", - "name": "customer_support_docs", - "created_at": 1704067200, - "last_active_at": 1704153600, - "expires_at": None, - "status": "active", - "usage_bytes": 1048576, - "metadata": { - "conversation_id": "conv_123", - "document_ids": ["doc_456", "doc_789"], - }, - } - ] - }, - } - - -class VectorStoresListResponse(AbstractSuccessfulResponse): - """Response model containing a list of vector stores. - - Attributes: - data: List of vector store objects. - object: Object type (always "list"). - """ - - data: list[VectorStoreResponse] = Field( - default_factory=list, description="List of vector stores" - ) - object: str = Field(default="list", description="Object type") - - model_config = { - "extra": "forbid", - "json_schema_extra": { - "examples": [ - { - "data": [ - { - "id": "vs_abc123", - "name": "customer_support_docs", - "created_at": 1704067200, - "last_active_at": 1704153600, - "expires_at": None, - "status": "active", - "usage_bytes": 1048576, - "metadata": {"conversation_id": "conv_123"}, - }, - { - "id": "vs_def456", - "name": "product_documentation", - "created_at": 1704070800, - "last_active_at": 1704157200, - "expires_at": None, - "status": "active", - "usage_bytes": 2097152, - "metadata": None, - }, - ], - "object": "list", - } - ] - }, - } - - -class VectorStoreDeleteResponse(AbstractDeleteResponse): - """Result of deleting a vector store (always HTTP 200).""" - - resource_name: ClassVar[str] = "Vector store" - vector_store_id: str = Field( - ..., - description="Vector store identifier that was passed to delete.", - examples=["vs_abc123"], - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "label": "deleted", - "value": { - "vector_store_id": "vs_abc123", - "deleted": True, - "response": "Vector store deleted successfully", - }, - }, - { - "label": "not found", - "value": { - "vector_store_id": "vs_abc123", - "deleted": False, - "response": "Vector store not found", - }, - }, - ] - } - } - - -class VectorStoreFileDeleteResponse(AbstractDeleteResponse): - """Result of deleting a file from a vector store (always HTTP 200).""" - - resource_name: ClassVar[str] = "Vector store file" - file_id: str = Field( - ..., - description="File identifier that was passed to delete.", - examples=["file_abc123"], - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "label": "deleted", - "value": { - "file_id": "file_abc123", - "deleted": True, - "response": "Vector store file deleted successfully", - }, - }, - { - "label": "not found", - "value": { - "file_id": "file_abc123", - "deleted": False, - "response": "Vector store file not found", - }, - }, - ] - } - } - - -class PromptResourceResponse(AbstractSuccessfulResponse): - """A stored prompt template as returned by Llama Stack.""" - - prompt_id: str = Field(..., description="Prompt identifier from Llama Stack") - version: int = Field(..., description="Version number for this prompt") - is_default: Optional[bool] = Field( - None, description="Whether this version is the default" - ) - prompt: Optional[str] = Field(None, description="Prompt text with placeholders") - variables: Optional[list[str]] = Field( - None, description="Variable names used in the template" - ) - - model_config = { - "extra": "forbid", - "json_schema_extra": { - "examples": [ - { - "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567", - "version": 1, - "is_default": True, - "prompt": "Summarize: {{text}}", - "variables": ["text"], - } - ] - }, - } - - -class PromptsListResponse(AbstractSuccessfulResponse): - """List of stored prompt templates returned by Llama Stack.""" - - data: list[PromptResourceResponse] = Field( - default_factory=list, - description="Prompt entries (as returned by Llama Stack list)", - ) - - model_config = { - "extra": "forbid", - "json_schema_extra": { - "examples": [ - { - "data": [ - { - "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567", - "version": 1, - "is_default": True, - "prompt": "Summarize: {{text}}", - "variables": ["text"], - } - ], - } - ] - }, - } - - -class PromptDeleteResponse(AbstractDeleteResponse): - """Result of deleting a stored prompt (always HTTP 200, like conversations v2).""" - - resource_name: ClassVar[str] = "Prompt" - prompt_id: str = Field( - ..., - description="Prompt identifier that was passed to delete.", - examples=["pmpt_0123456789abcdef0123456789abcdef01234567"], - ) - - model_config = { - "json_schema_extra": { - "examples": [ - { - "label": "deleted", - "value": { - "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567", - "deleted": True, - "response": "Prompt deleted successfully", - }, - }, - { - "label": "not found", - "value": { - "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567", - "deleted": False, - "response": "Prompt not found", - }, - }, - ] - } - } - - -class FileResponse(AbstractSuccessfulResponse): - """Response model containing a file object. - - Attributes: - id: File ID. - filename: File name. - bytes: File size in bytes. - created_at: Unix timestamp when created. - purpose: File purpose. - object: Object type (always "file"). - """ - - id: str = Field(..., description="File ID") - filename: str = Field(..., description="File name") - bytes: int = Field(..., description="File size in bytes") - created_at: int = Field(..., description="Unix timestamp when created") - purpose: str = Field(default="assistants", description="File purpose") - object: str = Field(default="file", description="Object type") - - model_config = { - "extra": "forbid", - "json_schema_extra": { - "examples": [ - { - "id": "file_abc123", - "filename": "documentation.pdf", - "bytes": 524288, - "created_at": 1704067200, - "purpose": "assistants", - "object": "file", - } - ] - }, - } - - -class VectorStoreFileResponse(AbstractSuccessfulResponse): - """Response model containing a vector store file object. - - Attributes: - id: Vector store file ID. - vector_store_id: ID of the vector store. - status: File processing status. - attributes: Optional metadata key-value pairs. - last_error: Optional error message if processing failed. - object: Object type (always "vector_store.file"). - """ - - id: str = Field(..., description="Vector store file ID") - vector_store_id: str = Field(..., description="ID of the vector store") - status: str = Field(..., description="File processing status") - attributes: Optional[dict[str, str | float | bool]] = Field( - None, - description=( - "Set of up to 16 key-value pairs for storing additional information. " - "Keys: strings (max 64 chars). Values: strings (max 512 chars), booleans, or numbers." - ), - ) - last_error: Optional[str] = Field( - None, description="Error message if processing failed" - ) - object: str = Field(default="vector_store.file", description="Object type") - - model_config = { - "extra": "forbid", - "json_schema_extra": { - "examples": [ - { - "id": "file_abc123", - "vector_store_id": "vs_abc123", - "status": "completed", - "attributes": {"chunk_size": "512", "indexed": True}, - "last_error": None, - "object": "vector_store.file", - } - ] - }, - } - - -class VectorStoreFilesListResponse(AbstractSuccessfulResponse): - """Response model containing a list of vector store files. - - Attributes: - data: List of vector store file objects. - object: Object type (always "list"). - """ - - data: list[VectorStoreFileResponse] = Field( - default_factory=list, description="List of vector store files" - ) - object: str = Field(default="list", description="Object type") - - model_config = { - "extra": "forbid", - "json_schema_extra": { - "examples": [ - { - "data": [ - { - "id": "file_abc123", - "vector_store_id": "vs_abc123", - "status": "completed", - "attributes": {"chunk_size": "512"}, - "last_error": None, - "object": "vector_store.file", - }, - { - "id": "file_def456", - "vector_store_id": "vs_abc123", - "status": "processing", - "attributes": None, - "last_error": None, - "object": "vector_store.file", - }, - ], - "object": "list", - } - ] - }, - } diff --git a/src/models/rlsapi/responses.py b/src/models/rlsapi/responses.py index 502fe44b7..6d85dd514 100644 --- a/src/models/rlsapi/responses.py +++ b/src/models/rlsapi/responses.py @@ -4,14 +4,14 @@ from pydantic import Field -from models.config import ConfigurationBase -from models.responses import ( - AbstractSuccessfulResponse, +from models.api.responses.successful.bases import AbstractSuccessfulResponse +from models.common import ( RAGChunk, ReferencedDocument, ToolCallSummary, ToolResultSummary, ) +from models.config import ConfigurationBase class RlsapiV1InferData(ConfigurationBase): diff --git a/src/utils/conversations.py b/src/utils/conversations.py index 130d75b01..ac2659688 100644 --- a/src/utils/conversations.py +++ b/src/utils/conversations.py @@ -41,17 +41,18 @@ ) from constants import DEFAULT_RAG_TOOL -from models.api.responses import ( +from models.api.responses.error import ( InternalServerErrorResponse, ServiceUnavailableResponse, ) -from models.database.conversations import UserTurn -from models.responses import ( +from models.common.conversation import ( ConversationTurn, Message, ) +from models.common.responses.types import ResponseInput +from models.common.turn_summary import ToolCallSummary, ToolResultSummary +from models.database.conversations import UserTurn from utils.responses import parse_arguments_string -from utils.types import ResponseInput, ToolCallSummary, ToolResultSummary def _extract_text_from_content(content: str | list[Any]) -> str: diff --git a/src/utils/endpoints.py b/src/utils/endpoints.py index 5eba25258..a9d2a5754 100644 --- a/src/utils/endpoints.py +++ b/src/utils/endpoints.py @@ -11,15 +11,18 @@ from client import AsyncLlamaStackClientHolder from configuration import AppConfig, LogicError from log import get_logger -from models.api.responses import ( +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, NotFoundResponse, ) +from models.common.responses.responses_conversation_context import ( + ResponsesConversationContext, +) +from models.common.turn_summary import ReferencedDocument, TurnSummary from models.database.conversations import UserConversation, UserTurn from utils.responses import create_new_conversation from utils.suid import normalize_conversation_id, to_llama_stack_conversation_id -from utils.types import ReferencedDocument, ResponsesConversationContext, TurnSummary logger = get_logger(__name__) diff --git a/src/utils/mcp_oauth_probe.py b/src/utils/mcp_oauth_probe.py index dba2fa3fc..570e968eb 100644 --- a/src/utils/mcp_oauth_probe.py +++ b/src/utils/mcp_oauth_probe.py @@ -14,7 +14,7 @@ import constants from configuration import AppConfig from log import get_logger -from models.api.responses import UnauthorizedResponse +from models.api.responses.error import UnauthorizedResponse from utils.mcp_headers import McpHeaders, build_mcp_headers logger = get_logger(__name__) diff --git a/src/utils/prompts.py b/src/utils/prompts.py index 9a51371e8..5822f7b19 100644 --- a/src/utils/prompts.py +++ b/src/utils/prompts.py @@ -6,7 +6,7 @@ import constants from configuration import configuration -from models.api.responses import UnprocessableEntityResponse +from models.api.responses.error import UnprocessableEntityResponse def get_system_prompt( diff --git a/src/utils/query.py b/src/utils/query.py index c62145d60..c6b9b686d 100644 --- a/src/utils/query.py +++ b/src/utils/query.py @@ -25,7 +25,7 @@ from client import AsyncLlamaStackClientHolder from configuration import configuration from log import get_logger -from models.api.responses import ( +from models.api.responses.error import ( AbstractErrorResponse, ForbiddenResponse, InternalServerErrorResponse, @@ -35,6 +35,7 @@ UnprocessableEntityResponse, ) from models.cache_entry import CacheEntry +from models.common.turn_summary import TurnSummary from models.config import Action from models.database.conversations import UserConversation, UserTurn from models.requests import Attachment, QueryRequest @@ -46,7 +47,6 @@ create_transcript_metadata, store_transcript, ) -from utils.types import TurnSummary logger = get_logger(__name__) diff --git a/src/utils/quota.py b/src/utils/quota.py index bef10b910..b66d9b022 100644 --- a/src/utils/quota.py +++ b/src/utils/quota.py @@ -7,7 +7,10 @@ from fastapi import HTTPException from log import get_logger -from models.api.responses import InternalServerErrorResponse, QuotaExceededResponse +from models.api.responses.error import ( + InternalServerErrorResponse, + QuotaExceededResponse, +) from quota.quota_exceed_error import QuotaExceedError from quota.quota_limiter import QuotaLimiter from quota.token_usage_history import TokenUsageHistory diff --git a/src/utils/responses.py b/src/utils/responses.py index 3d97efdfd..4fa3f96b0 100644 --- a/src/utils/responses.py +++ b/src/utils/responses.py @@ -91,13 +91,21 @@ from constants import DEFAULT_RAG_TOOL from log import get_logger from metrics import recording -from models.api.responses import ( +from models.api.responses.error import ( ConflictResponse, InternalServerErrorResponse, NotFoundResponse, ServiceUnavailableResponse, ) from models.common.responses.responses_api_params import ResponsesApiParams +from models.common.responses.types import ResponseInput, ResponseItem +from models.common.turn_summary import ( + RAGChunk, + ReferencedDocument, + ToolCallSummary, + ToolResultSummary, + TurnSummary, +) from models.config import ByokRag from models.database.conversations import UserConversation from models.requests import QueryRequest @@ -114,15 +122,6 @@ ) from utils.suid import to_llama_stack_conversation_id from utils.token_counter import TokenCounter -from utils.types import ( - RAGChunk, - ReferencedDocument, - ResponseInput, - ResponseItem, - ToolCallSummary, - ToolResultSummary, - TurnSummary, -) logger = get_logger(__name__) diff --git a/src/utils/shields.py b/src/utils/shields.py index 821727be8..56cd729eb 100644 --- a/src/utils/shields.py +++ b/src/utils/shields.py @@ -18,19 +18,19 @@ from constants import DEFAULT_VIOLATION_MESSAGE from log import get_logger from metrics import recording -from models.api.responses import ( +from models.api.responses.error import ( InternalServerErrorResponse, NotFoundResponse, ServiceUnavailableResponse, UnprocessableEntityResponse, ) -from models.requests import QueryRequest -from utils.query import handle_known_apistatus_errors -from utils.types import ( +from models.common.moderation import ( ShieldModerationBlocked, ShieldModerationPassed, ShieldModerationResult, ) +from models.requests import QueryRequest +from utils.query import handle_known_apistatus_errors logger = get_logger(__name__) diff --git a/src/utils/transcripts.py b/src/utils/transcripts.py index bb7606b5d..ec2ef53d6 100644 --- a/src/utils/transcripts.py +++ b/src/utils/transcripts.py @@ -15,14 +15,11 @@ from configuration import configuration from log import get_logger -from models.api.responses import InternalServerErrorResponse +from models.api.responses.error import InternalServerErrorResponse +from models.common.transcripts import Transcript, TranscriptMetadata +from models.common.turn_summary import TurnSummary from models.requests import Attachment from utils.suid import get_suid -from utils.types import ( - Transcript, - TranscriptMetadata, - TurnSummary, -) logger = get_logger(__name__) diff --git a/src/utils/types.py b/src/utils/types.py index 48f0fc226..1e616a773 100644 --- a/src/utils/types.py +++ b/src/utils/types.py @@ -1,39 +1,8 @@ """Common types for the project.""" -from typing import Annotated, Any, Literal, Optional +from typing import Any from llama_stack_api import ImageContentItem, TextContentItem -from llama_stack_api.openai_responses import ( - OpenAIResponseInputFunctionToolCallOutput as FunctionToolCallOutput, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseMCPApprovalRequest as McpApprovalRequest, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseMCPApprovalResponse as McpApprovalResponse, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseMessage as ResponseMessage, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseOutputMessageFileSearchToolCall as FileSearchToolCall, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseOutputMessageFunctionToolCall as FunctionToolCall, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseOutputMessageMCPCall as McpCall, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseOutputMessageMCPListTools as McpListTools, -) -from llama_stack_api.openai_responses import ( - OpenAIResponseOutputMessageWebSearchToolCall as WebSearchToolCall, -) -from pydantic import AnyUrl, BaseModel, ConfigDict, Field - -from models.database.conversations import UserConversation -from utils.token_counter import TokenCounter type SingletonInstances = dict[type, Any] @@ -80,193 +49,3 @@ def __call__(cls, *args: Any, **kwargs: Any) -> Any: if cls not in cls._instances: cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) return cls._instances[cls] - - -class ShieldModerationPassed(BaseModel): - """Shield moderation passed; no refusal.""" - - decision: Literal["passed"] = "passed" - - -class ResponsesConversationContext(BaseModel): - """Result of resolving conversation context for the responses endpoint. - - Holds the conversation ID to use for the LLM, the optional user conversation - record, and the resolved generate_topic_summary flag. Caller assigns these - to the request in outer scope instead of mutating the request inside the - resolver. - - Attributes: - conversation: Conversation ID in llama-stack format to use for the request. - user_conversation: Resolved user conversation record, or None for new ones. - generate_topic_summary: Resolved value for request.generate_topic_summary. - """ - - conversation: str = Field(description="Conversation ID in llama-stack format") - user_conversation: Optional[UserConversation] = Field( - default=None, - description="Resolved user conversation record, or None for new conversations", - ) - generate_topic_summary: bool = Field( - description="Resolved value for request.generate_topic_summary", - ) - model_config = ConfigDict(arbitrary_types_allowed=True) - - -class ShieldModerationBlocked(BaseModel): - """Shield moderation blocked the content; refusal details are present.""" - - decision: Literal["blocked"] = "blocked" - message: str - moderation_id: str - refusal_response: ResponseMessage - - -ShieldModerationResult = Annotated[ - ShieldModerationPassed | ShieldModerationBlocked, - Field(discriminator="decision"), -] - -type IncludeParameter = Literal[ - "web_search_call.action.sources", - "code_interpreter_call.outputs", - "computer_call_output.output.image_url", - "file_search_call.results", - "message.input_image.image_url", - "message.output_text.logprobs", - "reasoning.encrypted_content", -] - -type ResponseItem = ( - ResponseMessage - | WebSearchToolCall - | FileSearchToolCall - | FunctionToolCallOutput - | McpCall - | McpListTools - | McpApprovalRequest - | FunctionToolCall - | McpApprovalResponse -) - -type ResponseInput = str | list[ResponseItem] - - -class ToolCallSummary(BaseModel): - """Model representing a tool call made during response generation (for tool_calls list).""" - - id: str = Field(description="ID of the tool call") - name: str = Field(description="Name of the tool called") - args: dict[str, Any] = Field( - default_factory=dict, description="Arguments passed to the tool" - ) - type: str = Field("tool_call", description="Type indicator for tool call") - - -class ToolResultSummary(BaseModel): - """Model representing a result from a tool call (for tool_results list).""" - - id: str = Field( - description="ID of the tool call/result, matches the corresponding tool call 'id'" - ) - status: str = Field( - ..., description="Status of the tool execution (e.g., 'success')" - ) - content: str = Field(..., description="Content/result returned from the tool") - type: str = Field("tool_result", description="Type indicator for tool result") - round: int = Field(..., description="Round number or step of tool execution") - - -class RAGChunk(BaseModel): - """Model representing a RAG chunk used in the response.""" - - content: str = Field(description="The content of the chunk") - source: Optional[str] = Field( - default=None, - description="Index name identifying the knowledge source from configuration", - ) - score: Optional[float] = Field(default=None, description="Relevance score") - attributes: Optional[dict[str, Any]] = Field( - default=None, - description="Document metadata from the RAG provider (e.g., url, title, author)", - ) - - -class ReferencedDocument(BaseModel): - """Model representing a document referenced in generating a response. - - Attributes: - doc_url: Url to the referenced doc. - doc_title: Title of the referenced doc. - """ - - doc_url: Optional[AnyUrl] = Field( - default=None, description="URL of the referenced document" - ) - - doc_title: Optional[str] = Field( - default=None, description="Title of the referenced document" - ) - - source: Optional[str] = Field( - default=None, - description="Index name identifying the knowledge source from configuration", - ) - - -class RAGContext(BaseModel): - """Result of building RAG context from all enabled pre-query RAG sources. - - Attributes: - context_text: Formatted RAG context string for injection into the query. - rag_chunks: RAG chunks from pre-query sources (BYOK + Solr). - referenced_documents: Referenced documents from pre-query sources. - """ - - context_text: str = Field(default="", description="Formatted context for injection") - rag_chunks: list[RAGChunk] = Field( - default_factory=list, - description="RAG chunks from pre-query sources", - ) - referenced_documents: list[ReferencedDocument] = Field( - default_factory=list, - description="Documents from pre-query sources", - ) - - -class TurnSummary(BaseModel): - """Summary of a turn in llama stack.""" - - id: str = Field(default="", description="ID of the response") - llm_response: str = "" - tool_calls: list[ToolCallSummary] = Field(default_factory=list) - tool_results: list[ToolResultSummary] = Field(default_factory=list) - rag_chunks: list[RAGChunk] = Field(default_factory=list) - referenced_documents: list[ReferencedDocument] = Field(default_factory=list) - token_usage: TokenCounter = Field(default_factory=TokenCounter) - - -class TranscriptMetadata(BaseModel): - """Metadata for a transcript entry.""" - - provider: Optional[str] = None - model: str - query_provider: Optional[str] = None - query_model: Optional[str] = None - user_id: str - conversation_id: str - timestamp: str - - -class Transcript(BaseModel): - """Model representing a transcript entry to be stored.""" - - metadata: TranscriptMetadata - redacted_query: str - query_is_valid: bool - llm_response: str - rag_chunks: list[dict[str, Any]] = Field(default_factory=list) - truncated: bool - attachments: list[dict[str, Any]] = Field(default_factory=list) - tool_calls: list[dict[str, Any]] = Field(default_factory=list) - tool_results: list[dict[str, Any]] = Field(default_factory=list) diff --git a/src/utils/vector_search.py b/src/utils/vector_search.py index 4b2e20448..a12d0ce71 100644 --- a/src/utils/vector_search.py +++ b/src/utils/vector_search.py @@ -18,10 +18,10 @@ import constants from configuration import configuration from log import get_logger +from models.common.responses.types import ResponseInput +from models.common.turn_summary import RAGChunk, RAGContext, ReferencedDocument from models.requests import SolrVectorSearchRequest -from models.responses import ReferencedDocument from utils.responses import resolve_vector_store_ids -from utils.types import RAGChunk, RAGContext, ResponseInput logger = get_logger(__name__) diff --git a/tests/integration/endpoints/test_query_byok_integration.py b/tests/integration/endpoints/test_query_byok_integration.py index 56030c107..650d840a7 100644 --- a/tests/integration/endpoints/test_query_byok_integration.py +++ b/tests/integration/endpoints/test_query_byok_integration.py @@ -15,8 +15,8 @@ from app.endpoints.query import query_endpoint_handler from authentication.interface import AuthTuple from configuration import AppConfig +from models.api.responses.successful import QueryResponse from models.requests import QueryRequest -from models.responses import QueryResponse # --------------------------------------------------------------------------- # Helpers diff --git a/tests/integration/endpoints/test_rlsapi_v1_integration.py b/tests/integration/endpoints/test_rlsapi_v1_integration.py index b04adb5dd..25be6f125 100644 --- a/tests/integration/endpoints/test_rlsapi_v1_integration.py +++ b/tests/integration/endpoints/test_rlsapi_v1_integration.py @@ -21,6 +21,7 @@ from app.endpoints.rlsapi_v1 import infer_endpoint from authentication.interface import AuthTuple from configuration import AppConfig +from models.common.moderation import ShieldModerationPassed from models.rlsapi.requests import ( RlsapiV1Attachment, RlsapiV1CLA, @@ -32,7 +33,6 @@ from models.rlsapi.responses import RlsapiV1InferResponse from tests.unit.utils.auth_helpers import mock_authorization_resolvers from utils.suid import check_suid -from utils.types import ShieldModerationPassed from version import __version__ # ========================================== diff --git a/tests/integration/test_middleware_integration.py b/tests/integration/test_middleware_integration.py index 3c5126aae..62e337673 100644 --- a/tests/integration/test_middleware_integration.py +++ b/tests/integration/test_middleware_integration.py @@ -4,7 +4,7 @@ from fastapi.testclient import TestClient from configuration import configuration -from models.api.responses import InternalServerErrorResponse +from models.api.responses.error import InternalServerErrorResponse class TestGlobalExceptionMiddlewareIntegration: # pylint: disable=too-few-public-methods diff --git a/tests/unit/app/endpoints/test_a2a.py b/tests/unit/app/endpoints/test_a2a.py index 337eb1ea0..6138d2568 100644 --- a/tests/unit/app/endpoints/test_a2a.py +++ b/tests/unit/app/endpoints/test_a2a.py @@ -353,7 +353,8 @@ class TestGetLightspeedAgentCard: """Tests for the agent card generation.""" def test_get_agent_card_with_config( - self, setup_configuration: AppConfig # pylint: disable=unused-argument + self, + setup_configuration: AppConfig, # pylint: disable=unused-argument ) -> None: """Test getting agent card with full configuration.""" agent_card = get_lightspeed_agent_card() diff --git a/tests/unit/app/endpoints/test_config.py b/tests/unit/app/endpoints/test_config.py index cd108ec2d..2cd39fa5c 100644 --- a/tests/unit/app/endpoints/test_config.py +++ b/tests/unit/app/endpoints/test_config.py @@ -33,7 +33,8 @@ async def test_config_endpoint_handler_configuration_not_loaded( with pytest.raises(HTTPException) as exc_info: await config_endpoint_handler( - auth=auth, request=request # pyright:ignore[reportArgumentType] + auth=auth, + request=request, # pyright:ignore[reportArgumentType] ) assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR @@ -66,7 +67,8 @@ async def test_config_endpoint_handler_configuration_loaded( auth: AuthTuple = ("test_user_id", "test_user", True, "test_token") response = await config_endpoint_handler( - auth=auth, request=request # pyright:ignore[reportArgumentType] + auth=auth, + request=request, # pyright:ignore[reportArgumentType] ) assert response is not None assert response.configuration == minimal_config.configuration diff --git a/tests/unit/app/endpoints/test_conversations.py b/tests/unit/app/endpoints/test_conversations.py index 3f7b86e10..09af41bb4 100644 --- a/tests/unit/app/endpoints/test_conversations.py +++ b/tests/unit/app/endpoints/test_conversations.py @@ -19,19 +19,19 @@ update_conversation_endpoint_handler, ) from configuration import AppConfig -from models.api.responses import ( +from models.api.responses.error import ( ForbiddenResponse, InternalServerErrorResponse, ) -from models.config import Action -from models.database.conversations import UserConversation, UserTurn -from models.requests import ConversationUpdateRequest -from models.responses import ( +from models.api.responses.successful import ( ConversationDeleteResponse, ConversationResponse, ConversationsListResponse, ConversationUpdateResponse, ) +from models.config import Action +from models.database.conversations import UserConversation, UserTurn +from models.requests import ConversationUpdateRequest from tests.unit.utils.auth_helpers import mock_authorization_resolvers from utils.conversations import build_conversation_turns_from_items diff --git a/tests/unit/app/endpoints/test_conversations_v2.py b/tests/unit/app/endpoints/test_conversations_v2.py index 59d2face9..10c19222b 100644 --- a/tests/unit/app/endpoints/test_conversations_v2.py +++ b/tests/unit/app/endpoints/test_conversations_v2.py @@ -20,14 +20,16 @@ update_conversation_endpoint_handler, ) from configuration import AppConfig +from models.api.responses.successful import ConversationUpdateResponse from models.cache_entry import CacheEntry -from models.requests import ConversationUpdateRequest -from models.responses import ( - ConversationData, - ConversationUpdateResponse, +from models.common import ConversationData +from models.common.turn_summary import ( + ReferencedDocument, + ToolCallSummary, + ToolResultSummary, ) +from models.requests import ConversationUpdateRequest from tests.unit.utils.auth_helpers import mock_authorization_resolvers -from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary MOCK_AUTH = ("mock_user_id", "mock_username", False, "mock_token") VALID_CONVERSATION_ID = "123e4567-e89b-12d3-a456-426614174000" diff --git a/tests/unit/app/endpoints/test_health.py b/tests/unit/app/endpoints/test_health.py index a6a746f06..82ddd78d1 100644 --- a/tests/unit/app/endpoints/test_health.py +++ b/tests/unit/app/endpoints/test_health.py @@ -14,7 +14,8 @@ readiness_probe_get_method, ) from authentication.interface import AuthTuple -from models.responses import ProviderHealthStatus, ReadinessResponse +from models.api.responses.successful import ReadinessResponse +from models.common import ProviderHealthStatus from tests.unit.utils.auth_helpers import mock_authorization_resolvers diff --git a/tests/unit/app/endpoints/test_mcp_auth.py b/tests/unit/app/endpoints/test_mcp_auth.py index 6df771501..0e06c3688 100644 --- a/tests/unit/app/endpoints/test_mcp_auth.py +++ b/tests/unit/app/endpoints/test_mcp_auth.py @@ -10,6 +10,7 @@ from app.endpoints import mcp_auth from authentication.interface import AuthTuple from configuration import AppConfig +from models.api.responses.successful import MCPClientAuthOptionsResponse from models.config import ( Configuration, LlamaStackConfiguration, @@ -17,7 +18,6 @@ ServiceConfiguration, UserDataCollection, ) -from models.responses import MCPClientAuthOptionsResponse # Shared mock auth tuple with 4 fields as expected by the application MOCK_AUTH: AuthTuple = ("mock_user_id", "mock_username", False, "mock_token") diff --git a/tests/unit/app/endpoints/test_mcp_servers.py b/tests/unit/app/endpoints/test_mcp_servers.py index 9623c8625..5c34ee043 100644 --- a/tests/unit/app/endpoints/test_mcp_servers.py +++ b/tests/unit/app/endpoints/test_mcp_servers.py @@ -14,6 +14,11 @@ from app.endpoints import mcp_servers from authentication.interface import AuthTuple from configuration import AppConfig +from models.api.responses.successful import ( + MCPServerDeleteResponse, + MCPServerListResponse, + MCPServerRegistrationResponse, +) from models.config import ( Configuration, CORSConfiguration, @@ -24,11 +29,6 @@ UserDataCollection, ) from models.requests import MCPServerRegistrationRequest -from models.responses import ( - MCPServerDeleteResponse, - MCPServerListResponse, - MCPServerRegistrationResponse, -) MOCK_AUTH: AuthTuple = ("mock_user_id", "mock_username", False, "mock_token") diff --git a/tests/unit/app/endpoints/test_prompts.py b/tests/unit/app/endpoints/test_prompts.py index 8387a5473..a02503978 100644 --- a/tests/unit/app/endpoints/test_prompts.py +++ b/tests/unit/app/endpoints/test_prompts.py @@ -17,8 +17,8 @@ ) from authentication.interface import AuthTuple from configuration import AppConfig +from models.api.responses.successful import PromptDeleteResponse from models.requests import PromptCreateRequest, PromptUpdateRequest -from models.responses import PromptDeleteResponse from tests.unit.utils.auth_helpers import mock_authorization_resolvers MOCK_AUTH: AuthTuple = ("mock_user_id", "mock_username", False, "mock_token") diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py index 58a955289..9458aadf5 100644 --- a/tests/unit/app/endpoints/test_query.py +++ b/tests/unit/app/endpoints/test_query.py @@ -12,20 +12,20 @@ from app.endpoints.query import query_endpoint_handler, retrieve_response from configuration import AppConfig +from models.api.responses.successful import QueryResponse +from models.common.moderation import ShieldModerationPassed from models.common.responses.responses_api_params import ResponsesApiParams -from models.database.conversations import UserConversation -from models.requests import Attachment, QueryRequest -from models.responses import QueryResponse -from utils.token_counter import TokenCounter -from utils.types import ( +from models.common.turn_summary import ( RAGChunk, RAGContext, ReferencedDocument, - ShieldModerationPassed, ToolCallSummary, ToolResultSummary, TurnSummary, ) +from models.database.conversations import UserConversation +from models.requests import Attachment, QueryRequest +from utils.token_counter import TokenCounter # User ID must be proper UUID MOCK_AUTH = ( diff --git a/tests/unit/app/endpoints/test_responses.py b/tests/unit/app/endpoints/test_responses.py index 53ffc3d00..d4c0d631a 100644 --- a/tests/unit/app/endpoints/test_responses.py +++ b/tests/unit/app/endpoints/test_responses.py @@ -26,19 +26,17 @@ ) from configuration import AppConfig from constants import DEFAULT_SYSTEM_PROMPT, SUBSTITUTED_INSTRUCTIONS_PLACEHOLDER +from models.api.responses.successful import ResponsesResponse +from models.common.moderation import ShieldModerationBlocked, ShieldModerationPassed from models.common.responses.responses_api_params import ResponsesApiParams from models.common.responses.responses_context import ResponsesContext +from models.common.responses.responses_conversation_context import ( + ResponsesConversationContext, +) +from models.common.turn_summary import RAGContext, TurnSummary from models.config import Action, ModelContextProtocolServer from models.database.conversations import UserConversation from models.requests import ResponsesRequest -from models.responses import ResponsesResponse -from utils.types import ( - RAGContext, - ResponsesConversationContext, - ShieldModerationBlocked, - ShieldModerationPassed, - TurnSummary, -) MOCK_AUTH = ( "00000001-0001-0001-0001-000000000001", diff --git a/tests/unit/app/endpoints/test_responses_splunk.py b/tests/unit/app/endpoints/test_responses_splunk.py index 7e5c34bb0..4822ab556 100644 --- a/tests/unit/app/endpoints/test_responses_splunk.py +++ b/tests/unit/app/endpoints/test_responses_splunk.py @@ -22,10 +22,10 @@ handle_streaming_response, ) from configuration import AppConfig +from models.common.turn_summary import RAGContext, TurnSummary from models.requests import ResponsesRequest from observability.formats.responses import ResponsesEventData from tests.unit.app.endpoints.test_responses import build_api_params_and_context -from utils.types import RAGContext, TurnSummary MODULE = "app.endpoints.responses" MOCK_AUTH = ( diff --git a/tests/unit/app/endpoints/test_rlsapi_v1.py b/tests/unit/app/endpoints/test_rlsapi_v1.py index 87ab91000..38227cf4c 100644 --- a/tests/unit/app/endpoints/test_rlsapi_v1.py +++ b/tests/unit/app/endpoints/test_rlsapi_v1.py @@ -35,7 +35,8 @@ from authentication.interface import AuthTuple from authentication.rh_identity import RHIdentityData from configuration import AppConfig -from models.api.responses import ServiceUnavailableResponse +from models.api.responses.error import ServiceUnavailableResponse +from models.common.moderation import ShieldModerationBlocked, ShieldModerationPassed from models.rlsapi.requests import ( RlsapiV1Attachment, RlsapiV1Context, @@ -47,7 +48,6 @@ from tests.unit.utils.auth_helpers import mock_authorization_resolvers from utils.rh_identity import get_rh_identity_context from utils.suid import check_suid -from utils.types import ShieldModerationBlocked, ShieldModerationPassed MOCK_AUTH: AuthTuple = ("mock_user_id", "mock_username", False, "mock_token") diff --git a/tests/unit/app/endpoints/test_shields.py b/tests/unit/app/endpoints/test_shields.py index d9e260093..354e2817d 100644 --- a/tests/unit/app/endpoints/test_shields.py +++ b/tests/unit/app/endpoints/test_shields.py @@ -10,7 +10,7 @@ from app.endpoints.shields import shields_endpoint_handler from authentication.interface import AuthTuple from configuration import AppConfig -from models.responses import ShieldsResponse +from models.api.responses.successful import ShieldsResponse from tests.unit.utils.auth_helpers import mock_authorization_resolvers diff --git a/tests/unit/app/endpoints/test_stream_interrupt.py b/tests/unit/app/endpoints/test_stream_interrupt.py index 8a767ee36..dccdb592a 100644 --- a/tests/unit/app/endpoints/test_stream_interrupt.py +++ b/tests/unit/app/endpoints/test_stream_interrupt.py @@ -7,8 +7,8 @@ from fastapi import HTTPException from app.endpoints.stream_interrupt import stream_interrupt_endpoint_handler +from models.api.responses.successful import StreamingInterruptResponse from models.requests import StreamingInterruptRequest -from models.responses import StreamingInterruptResponse from utils.stream_interrupts import StreamInterruptRegistry REQUEST_ID_SUCCESS = "123e4567-e89b-12d3-a456-426614174000" diff --git a/tests/unit/app/endpoints/test_streaming_query.py b/tests/unit/app/endpoints/test_streaming_query.py index 30c52c341..d29fb6c50 100644 --- a/tests/unit/app/endpoints/test_streaming_query.py +++ b/tests/unit/app/endpoints/test_streaming_query.py @@ -64,20 +64,20 @@ MEDIA_TYPE_JSON, MEDIA_TYPE_TEXT, ) -from models.api.responses import InternalServerErrorResponse +from models.api.responses.error import InternalServerErrorResponse +from models.common.moderation import ShieldModerationPassed from models.common.responses.responses_api_params import ResponsesApiParams -from models.config import Action -from models.context import ResponseGeneratorContext -from models.requests import Attachment, QueryRequest -from utils.stream_interrupts import StreamInterruptRegistry -from utils.token_counter import TokenCounter -from utils.types import ( +from models.common.turn_summary import ( RAGChunk, RAGContext, ReferencedDocument, - ShieldModerationPassed, TurnSummary, ) +from models.config import Action +from models.context import ResponseGeneratorContext +from models.requests import Attachment, QueryRequest +from utils.stream_interrupts import StreamInterruptRegistry +from utils.token_counter import TokenCounter MOCK_AUTH_STREAMING = ( "00000001-0001-0001-0001-000000000001", diff --git a/tests/unit/app/endpoints/test_tools.py b/tests/unit/app/endpoints/test_tools.py index f6c0fa754..436fda75a 100644 --- a/tests/unit/app/endpoints/test_tools.py +++ b/tests/unit/app/endpoints/test_tools.py @@ -16,6 +16,7 @@ from app.endpoints.tools import _input_schema_to_parameters from authentication.interface import AuthTuple from configuration import AppConfig +from models.api.responses.successful import ToolsResponse from models.config import ( Configuration, CORSConfiguration, @@ -25,7 +26,6 @@ TLSConfiguration, UserDataCollection, ) -from models.responses import ToolsResponse # Shared mock auth tuple with 4 fields as expected by the application MOCK_AUTH: AuthTuple = ("mock_user_id", "mock_username", False, "mock_token") diff --git a/tests/unit/app/test_main_middleware.py b/tests/unit/app/test_main_middleware.py index ea2ab9265..f0b76885a 100644 --- a/tests/unit/app/test_main_middleware.py +++ b/tests/unit/app/test_main_middleware.py @@ -10,7 +10,7 @@ from starlette.types import Message, Receive, Scope, Send from app.main import GlobalExceptionMiddleware, RestApiMetricsMiddleware -from models.api.responses import InternalServerErrorResponse +from models.api.responses.error import InternalServerErrorResponse def _make_scope(path: str = "/test", root_path: str = "") -> Scope: diff --git a/tests/unit/cache/test_postgres_cache.py b/tests/unit/cache/test_postgres_cache.py index b01490919..bf2855de3 100644 --- a/tests/unit/cache/test_postgres_cache.py +++ b/tests/unit/cache/test_postgres_cache.py @@ -11,10 +11,14 @@ from cache.cache_error import CacheError from cache.postgres_cache import PostgresCache from models.cache_entry import CacheEntry +from models.common import ConversationData +from models.common.turn_summary import ( + ReferencedDocument, + ToolCallSummary, + ToolResultSummary, +) from models.config import PostgreSQLDatabaseConfiguration -from models.responses import ConversationData from utils import suid -from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary USER_ID_1 = suid.get_suid() USER_ID_2 = suid.get_suid() diff --git a/tests/unit/cache/test_sqlite_cache.py b/tests/unit/cache/test_sqlite_cache.py index e4de441b3..3c87e677e 100644 --- a/tests/unit/cache/test_sqlite_cache.py +++ b/tests/unit/cache/test_sqlite_cache.py @@ -10,10 +10,14 @@ from cache.cache_error import CacheError from cache.sqlite_cache import SQLiteCache from models.cache_entry import CacheEntry +from models.common import ConversationData +from models.common.turn_summary import ( + ReferencedDocument, + ToolCallSummary, + ToolResultSummary, +) from models.config import SQLiteDatabaseConfiguration -from models.responses import ConversationData from utils import suid -from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary USER_ID_1 = suid.get_suid() USER_ID_2 = suid.get_suid() diff --git a/tests/unit/models/responses/test_authorized_response.py b/tests/unit/models/responses/test_authorized_response.py index 3a8675078..c200d2dc1 100644 --- a/tests/unit/models/responses/test_authorized_response.py +++ b/tests/unit/models/responses/test_authorized_response.py @@ -3,7 +3,7 @@ import pytest from pydantic import ValidationError -from models.responses import AuthorizedResponse +from models.api.responses.successful import AuthorizedResponse class TestAuthorizedResponse: diff --git a/tests/unit/models/responses/test_error_responses.py b/tests/unit/models/responses/test_error_responses.py index aa99aebe6..602ccab94 100644 --- a/tests/unit/models/responses/test_error_responses.py +++ b/tests/unit/models/responses/test_error_responses.py @@ -6,7 +6,7 @@ from fastapi import status from pydantic_core import SchemaError -from models.api.responses import ( +from models.api.responses.constants import ( BAD_REQUEST_DESCRIPTION, FORBIDDEN_DESCRIPTION, INTERNAL_SERVER_ERROR_DESCRIPTION, @@ -16,6 +16,8 @@ SERVICE_UNAVAILABLE_DESCRIPTION, UNAUTHORIZED_DESCRIPTION, UNPROCESSABLE_CONTENT_DESCRIPTION, +) +from models.api.responses.error import ( AbstractErrorResponse, BadRequestResponse, DetailModel, diff --git a/tests/unit/models/responses/test_query_response.py b/tests/unit/models/responses/test_query_response.py index 95e7e0498..ce547ec1a 100644 --- a/tests/unit/models/responses/test_query_response.py +++ b/tests/unit/models/responses/test_query_response.py @@ -2,8 +2,12 @@ from pydantic import AnyUrl -from models.responses import QueryResponse -from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary +from models.api.responses.successful import QueryResponse +from models.common.turn_summary import ( + ReferencedDocument, + ToolCallSummary, + ToolResultSummary, +) class TestQueryResponse: diff --git a/tests/unit/models/responses/test_rag_chunk.py b/tests/unit/models/responses/test_rag_chunk.py index 5e545f867..2586614da 100644 --- a/tests/unit/models/responses/test_rag_chunk.py +++ b/tests/unit/models/responses/test_rag_chunk.py @@ -2,8 +2,8 @@ from pydantic import HttpUrl -from models.responses import ReferencedDocument -from utils.types import RAGChunk, RAGContext +from models.common import ReferencedDocument +from models.common.turn_summary import RAGChunk, RAGContext class TestRAGChunk: diff --git a/tests/unit/models/responses/test_response_types.py b/tests/unit/models/responses/test_response_types.py index 69743e1db..2616cb801 100644 --- a/tests/unit/models/responses/test_response_types.py +++ b/tests/unit/models/responses/test_response_types.py @@ -3,7 +3,11 @@ import pytest from pydantic import ValidationError -from models.responses import ConversationData, ConversationDetails, ProviderHealthStatus +from models.common import ( + ConversationData, + ConversationDetails, + ProviderHealthStatus, +) class TestConversationDetails: diff --git a/tests/unit/models/responses/test_successful_responses.py b/tests/unit/models/responses/test_successful_responses.py index 4d5004148..bd86f9ceb 100644 --- a/tests/unit/models/responses/test_successful_responses.py +++ b/tests/unit/models/responses/test_successful_responses.py @@ -6,19 +6,10 @@ from pydantic import AnyHttpUrl, AnyUrl, ConfigDict, ValidationError from pydantic_core import SchemaError -from models.config import ( - Configuration, - LlamaStackConfiguration, - ServiceConfiguration, - UserDataCollection, -) -from models.responses import ( - AbstractSuccessfulResponse, +from models.api.responses.successful import ( AuthorizedResponse, ConfigurationResponse, - ConversationData, ConversationDeleteResponse, - ConversationDetails, ConversationResponse, ConversationsListResponse, ConversationsListResponseV2, @@ -28,9 +19,7 @@ InfoResponse, LivenessResponse, MCPClientAuthOptionsResponse, - MCPServerAuthInfo, ModelsResponse, - ProviderHealthStatus, ProviderResponse, ProvidersListResponse, QueryResponse, @@ -42,7 +31,24 @@ StreamingQueryResponse, ToolsResponse, ) -from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary +from models.api.responses.successful.bases import AbstractSuccessfulResponse +from models.common import ( + ConversationData, + ConversationDetails, + MCPServerAuthInfo, + ProviderHealthStatus, +) +from models.common.turn_summary import ( + ReferencedDocument, + ToolCallSummary, + ToolResultSummary, +) +from models.config import ( + Configuration, + LlamaStackConfiguration, + ServiceConfiguration, + UserDataCollection, +) class TestModelsResponse: @@ -1108,7 +1114,6 @@ def test_openapi_response_structure(self) -> None: schema = content["schema"] assert schema["type"] == "string" - assert schema["format"] == "text/event-stream" def test_model_json_schema_has_examples(self) -> None: """Test that model_json_schema() includes examples. diff --git a/tests/unit/models/responses/test_types.py b/tests/unit/models/responses/test_types.py index 90e73a1f8..411248b97 100644 --- a/tests/unit/models/responses/test_types.py +++ b/tests/unit/models/responses/test_types.py @@ -3,7 +3,11 @@ import pytest from pydantic import ValidationError -from models.responses import ConversationData, ConversationDetails, ProviderHealthStatus +from models.common import ( + ConversationData, + ConversationDetails, + ProviderHealthStatus, +) class TestConversationDetails: diff --git a/tests/unit/models/rlsapi/test_responses.py b/tests/unit/models/rlsapi/test_responses.py index f561b42f8..0511d5587 100644 --- a/tests/unit/models/rlsapi/test_responses.py +++ b/tests/unit/models/rlsapi/test_responses.py @@ -6,7 +6,7 @@ import pytest from pydantic import BaseModel, ValidationError -from models.responses import AbstractSuccessfulResponse +from models.api.responses.successful.bases import AbstractSuccessfulResponse from models.rlsapi.responses import ( RlsapiV1InferData, RlsapiV1InferResponse, diff --git a/tests/unit/utils/test_conversations.py b/tests/unit/utils/test_conversations.py index 2ddc65373..3003e2e35 100644 --- a/tests/unit/utils/test_conversations.py +++ b/tests/unit/utils/test_conversations.py @@ -10,6 +10,7 @@ from pytest_mock import MockerFixture from constants import DEFAULT_RAG_TOOL +from models.common.turn_summary import ToolCallSummary from models.database.conversations import UserTurn from utils.conversations import ( _build_tool_call_summary_from_item, @@ -18,7 +19,6 @@ build_conversation_turns_from_items, get_all_conversation_items, ) -from utils.types import ToolCallSummary # Default conversation start time for tests DEFAULT_CONVERSATION_START_TIME = datetime.fromisoformat( diff --git a/tests/unit/utils/test_endpoints.py b/tests/unit/utils/test_endpoints.py index 03a88e8e1..2220d25d2 100644 --- a/tests/unit/utils/test_endpoints.py +++ b/tests/unit/utils/test_endpoints.py @@ -11,9 +11,12 @@ from pytest_mock import MockerFixture from sqlalchemy.exc import SQLAlchemyError +from models.common.responses.responses_conversation_context import ( + ResponsesConversationContext, +) +from models.common.turn_summary import ReferencedDocument from models.database.conversations import UserConversation, UserTurn from utils import endpoints -from utils.types import ReferencedDocument, ResponsesConversationContext @pytest.fixture(name="input_file") diff --git a/tests/unit/utils/test_query.py b/tests/unit/utils/test_query.py index f21f5189f..e13ec18ab 100644 --- a/tests/unit/utils/test_query.py +++ b/tests/unit/utils/test_query.py @@ -15,12 +15,13 @@ from cache.cache_error import CacheError from configuration import AppConfig -from models.api.responses import ( +from models.api.responses.error import ( InternalServerErrorResponse, PromptTooLongResponse, QuotaExceededResponse, ) from models.cache_entry import CacheEntry +from models.common.turn_summary import TurnSummary from models.config import Action from models.database.conversations import UserConversation, UserTurn from models.requests import Attachment, QueryRequest @@ -41,7 +42,6 @@ validate_model_provider_override, ) from utils.token_counter import TokenCounter -from utils.types import TurnSummary @pytest.fixture(name="mock_config") diff --git a/tests/unit/utils/test_shields.py b/tests/unit/utils/test_shields.py index b11562704..9d4dd3c48 100644 --- a/tests/unit/utils/test_shields.py +++ b/tests/unit/utils/test_shields.py @@ -481,9 +481,10 @@ def test_raises_422_when_shield_ids_provided_and_override_disabled( validate_shield_ids_override(query_request, mock_config) assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - # pylint: disable=line-too-long - assert "Shield IDs customization is disabled" in exc_info.value.detail["response"] # type: ignore - assert "disable_shield_ids_override" in exc_info.value.detail["cause"] # type: ignore + detail = exc_info.value.detail + assert isinstance(detail, dict) + assert "Shield IDs customization is disabled" in detail["response"] + assert "disable_shield_ids_override" in detail["cause"] def test_raises_422_when_empty_list_shield_ids_and_override_disabled( self, mocker: MockerFixture diff --git a/tests/unit/utils/test_transcripts.py b/tests/unit/utils/test_transcripts.py index 10de4fe4c..aeeafe7a0 100644 --- a/tests/unit/utils/test_transcripts.py +++ b/tests/unit/utils/test_transcripts.py @@ -5,6 +5,7 @@ from pytest_mock import MockerFixture from configuration import AppConfig +from models.common.turn_summary import ToolCallSummary, ToolResultSummary, TurnSummary from models.requests import QueryRequest from utils.transcripts import ( construct_transcripts_path, @@ -12,7 +13,6 @@ create_transcript_metadata, store_transcript, ) -from utils.types import ToolCallSummary, ToolResultSummary, TurnSummary def test_construct_transcripts_path(mocker: MockerFixture) -> None: diff --git a/tests/unit/utils/test_types.py b/tests/unit/utils/test_types.py index 6a62c5da5..8447054f0 100644 --- a/tests/unit/utils/test_types.py +++ b/tests/unit/utils/test_types.py @@ -11,12 +11,12 @@ from pydantic import AnyUrl, ValidationError from models.common.responses.responses_api_params import ResponsesApiParams -from utils.types import ( +from models.common.turn_summary import ( ReferencedDocument, ToolCallSummary, ToolResultSummary, - content_to_str, ) +from utils.types import content_to_str class TestContentToStr: diff --git a/tests/unit/utils/test_vector_search.py b/tests/unit/utils/test_vector_search.py index 2aafab0a7..64bc33b10 100644 --- a/tests/unit/utils/test_vector_search.py +++ b/tests/unit/utils/test_vector_search.py @@ -6,8 +6,8 @@ import constants from configuration import AppConfig +from models.common.turn_summary import RAGChunk from models.requests import SolrVectorSearchRequest -from utils.types import RAGChunk from utils.vector_search import ( _build_document_url, _build_query_params,