diff --git a/docs/openapi.json b/docs/openapi.json
index 8dae125f8..e0df7d8a9 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -6176,8 +6176,7 @@
                         "content": {
                             "text/event-stream": {
                                 "schema": {
-                                    "type": "string",
-                                    "format": "text/event-stream"
+                                    "type": "string"
                                 },
                                 "example": "data: {\"event\": \"start\", \"data\": {\"conversation_id\": \"123e4567-e89b-12d3-a456-426614174000\", \"request_id\": \"123e4567-e89b-12d3-a456-426614174001\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 0, \"token\": \"No Violation\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 1, \"token\": \"\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 2, \"token\": \"Hello\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 3, \"token\": \"!\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 4, \"token\": \" How\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 5, \"token\": \" can\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 6, \"token\": \" I\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 7, \"token\": \" assist\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 8, \"token\": \" you\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 9, \"token\": \" today\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 10, \"token\": \"?\"}}\n\ndata: {\"event\": \"turn_complete\", \"data\": {\"token\": \"Hello! How can I assist you today?\"}}\n\ndata: {\"event\": \"end\", \"data\": {\"referenced_documents\": [], \"truncated\": null, \"input_tokens\": 11, \"output_tokens\": 19}, \"available_quotas\": {}}\n\n"
                             }
@@ -12048,7 +12047,7 @@
                     "configuration"
                 ],
                 "title": "ConfigurationResponse",
-                "description": "Success response model for the config endpoint.",
+                "description": "Success response model for the config endpoint.\n\nAttributes:\n    configuration: Parsed application configuration returned to the client.",
                 "examples": [
                     {
                         "configuration": {
@@ -12624,7 +12623,7 @@
                     "message"
                 ],
                 "title": "ConversationUpdateResponse",
-                "description": "Model representing a response for updating a conversation topic summary.\n\nAttributes:\n    conversation_id: The conversation ID (UUID) that was updated.\n    success: Whether the update was successful.\n    message: A message about the update result.\n\nExample:\n    ```python\n    update_response = ConversationUpdateResponse(\n        conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n        success=True,\n        message=\"Topic summary updated successfully\",\n    )\n    ```",
+                "description": "Model representing a response for updating a conversation topic summary.\n\nAttributes:\n    conversation_id: The conversation ID (UUID) that was updated.\n    success: Whether the update was successful.\n    message: A message about the update result.",
                 "examples": [
                     {
                         "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
@@ -13012,7 +13011,7 @@
                     "response"
                 ],
                 "title": "FeedbackResponse",
-                "description": "Model representing a response to a feedback request.\n\nAttributes:\n    response: The response of the feedback request.\n\nExample:\n    ```python\n    feedback_response = FeedbackResponse(response=\"feedback received\")\n    ```",
+                "description": "Model representing a response to a feedback request.\n\nAttributes:\n    response: The response of the feedback request.",
                 "examples": [
                     {
                         "response": "feedback received"
@@ -13050,7 +13049,7 @@
                     "status"
                 ],
                 "title": "FeedbackStatusUpdateResponse",
-                "description": "Model representing a response to a feedback status update request.\n\nAttributes:\n    status: The previous and current status of the service and who updated it.\n\nExample:\n    ```python\n    status_response = StatusResponse(\n        status={\n            \"previous_status\": true,\n            \"updated_status\": false,\n            \"updated_by\": \"user/test\",\n            \"timestamp\": \"2023-03-15 12:34:56\"\n        },\n    )\n    ```",
+                "description": "Model representing a response to a feedback status update request.\n\nAttributes:\n    status: The previous and current status of the service and who updated it.",
                 "examples": [
                     {
                         "status": {
@@ -13424,7 +13423,7 @@
                     "llama_stack_version"
                 ],
                 "title": "InfoResponse",
-                "description": "Model representing a response to an info request.\n\nAttributes:\n    name: Service name.\n    service_version: Service version.\n    llama_stack_version: Llama Stack version.\n\nExample:\n    ```python\n    info_response = InfoResponse(\n        name=\"Lightspeed Stack\",\n        service_version=\"1.0.0\",\n        llama_stack_version=\"0.2.22\",\n    )\n    ```",
+                "description": "Model representing a response to an info request.\n\nAttributes:\n    name: Service name.\n    service_version: Service version.\n    llama_stack_version: Llama Stack version.",
                 "examples": [
                     {
                         "llama_stack_version": "1.0.0",
@@ -13639,7 +13638,7 @@
                     "alive"
                 ],
                 "title": "LivenessResponse",
-                "description": "Model representing a response to a liveness request.\n\nAttributes:\n    alive: If app is alive.\n\nExample:\n    ```python\n    liveness_response = LivenessResponse(alive=True)\n    ```",
+                "description": "Model representing a response to a liveness request.\n\nAttributes:\n    alive: If app is alive.",
                 "examples": [
                     {
                         "alive": true
@@ -13726,7 +13725,7 @@
                 },
                 "type": "object",
                 "title": "MCPClientAuthOptionsResponse",
-                "description": "Response containing MCP servers that accept client-provided authorization.",
+                "description": "Response containing MCP servers that accept client-provided authorization.\n\nAttributes:\n    servers: MCP servers that declare client authentication headers.",
                 "examples": [
                     {
                         "servers": [
@@ -13821,7 +13820,7 @@
                     "message"
                 ],
                 "title": "MCPServerDeleteResponse",
-                "description": "Response for a successful MCP server deletion.",
+                "description": "Response for a successful MCP server deletion.\n\nAttributes:\n    name: Deleted MCP server name.\n    message: Status message.",
                 "examples": [
                     {
                         "message": "MCP server 'test-mcp-server' unregistered successfully",
@@ -13879,7 +13878,7 @@
                 },
                 "type": "object",
                 "title": "MCPServerListResponse",
-                "description": "Response listing all registered MCP servers.",
+                "description": "Response listing all registered MCP servers.\n\nAttributes:\n    servers: All registered MCP servers (static and dynamic).",
                 "examples": [
                     {
                         "servers": [
@@ -14063,7 +14062,7 @@
                     "message"
                 ],
                 "title": "MCPServerRegistrationResponse",
-                "description": "Response for a successful MCP server registration.",
+                "description": "Response for a successful MCP server registration.\n\nAttributes:\n    name: Registered MCP server name.\n    url: Registered MCP server URL.\n    provider_id: MCP provider identification.\n    message: Status message.",
                 "examples": [
                     {
                         "message": "MCP server 'mcp-integration-tools' registered successfully",
@@ -16494,7 +16493,7 @@
                     "response"
                 ],
                 "title": "PromptDeleteResponse",
-                "description": "Result of deleting a stored prompt (always HTTP 200, like conversations v2).",
+                "description": "Result of deleting a stored prompt (always HTTP 200, like conversations v2).\n\nAttributes:\n    prompt_id: Prompt identifier that was passed to delete.\n    deleted: Whether the prompt was deleted successfully\n    response: Human readable response",
                 "examples": [
                     {
                         "label": "deleted",
@@ -16573,7 +16572,7 @@
                     "version"
                 ],
                 "title": "PromptResourceResponse",
-                "description": "A stored prompt template as returned by Llama Stack.",
+                "description": "A stored prompt template as returned by Llama Stack.\n\nAttributes:\n    prompt_id: Prompt identifier from Llama Stack.\n    version: Version number for this prompt.\n    is_default: Whether this version is the default.\n    prompt: Prompt text with placeholders.\n    variables: Variable names used in the template.",
                 "examples": [
                     {
                         "is_default": true,
@@ -16711,7 +16710,7 @@
                 "additionalProperties": false,
                 "type": "object",
                 "title": "PromptsListResponse",
-                "description": "List of stored prompt templates returned by Llama Stack.",
+                "description": "List of stored prompt templates returned by Llama Stack.\n\nAttributes:\n    data: Prompt entries as returned by the Llama Stack list API.",
                 "examples": [
                     {
                         "data": [
@@ -17184,7 +17183,7 @@
                     "truncated": {
                         "type": "boolean",
                         "title": "Truncated",
-                        "description": "Deprecated:Whether conversation history was truncated",
+                        "description": "Deprecated: whether conversation history was truncated",
                         "default": false,
                         "examples": [
                             false,
@@ -17767,7 +17766,7 @@
                     "providers"
                 ],
                 "title": "ReadinessResponse",
-                "description": "Model representing response to a readiness request.\n\nAttributes:\n    ready: If service is ready.\n    reason: The reason for the readiness.\n    providers: List of unhealthy providers in case of readiness failure.\n\nExample:\n    ```python\n    readiness_response = ReadinessResponse(\n        ready=False,\n        reason=\"Service is not ready\",\n        providers=[\n            ProviderHealthStatus(\n                provider_id=\"ollama\",\n                status=\"unhealthy\",\n                message=\"Server is unavailable\"\n            )\n        ]\n    )\n    ```",
+                "description": "Model representing response to a readiness request.\n\nAttributes:\n    ready: If service is ready.\n    reason: The reason for the readiness.\n    providers: List of unhealthy providers in case of readiness failure.",
                 "examples": [
                     {
                         "providers": [],
@@ -19392,7 +19391,7 @@
                     "status"
                 ],
                 "title": "StatusResponse",
-                "description": "Model representing a response to a status request.\n\nAttributes:\n    functionality: The functionality of the service.\n    status: The status of the service.\n\nExample:\n    ```python\n    status_response = StatusResponse(\n        functionality=\"feedback\",\n        status={\"enabled\": True},\n    )\n    ```",
+                "description": "Model representing a response to a status request.\n\nAttributes:\n    functionality: The functionality of the service.\n    status: The status of the service.",
                 "examples": [
                     {
                         "functionality": "feedback",
@@ -19460,7 +19459,7 @@
                     "message"
                 ],
                 "title": "StreamingInterruptResponse",
-                "description": "Model representing a response to a streaming interrupt request.\n\nAttributes:\n    request_id: The streaming request ID targeted by the interrupt call.\n    interrupted: Whether an in-progress stream was interrupted.\n    message: Human-readable interruption status message.\n\nExample:\n    ```python\n    response = StreamingInterruptResponse(\n        request_id=\"123e4567-e89b-12d3-a456-426614174000\",\n        interrupted=True,\n        message=\"Streaming request interrupted\",\n    )\n    ```",
+                "description": "Model representing a response to a streaming interrupt request.\n\nAttributes:\n    request_id: The streaming request ID targeted by the interrupt call.\n    interrupted: Whether an in-progress stream was interrupted.\n    message: Human-readable interruption status message.",
                 "examples": [
                     {
                         "interrupted": true,
diff --git a/src/app/endpoints/authorized.py b/src/app/endpoints/authorized.py
index 63a3b5b06..175c42a1f 100644
--- a/src/app/endpoints/authorized.py
+++ b/src/app/endpoints/authorized.py
@@ -7,15 +7,13 @@
 from authentication import get_auth_dependency
 from authentication.interface import AuthTuple
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
-from models.responses import (
-    AuthorizedResponse,
-)
+from models.api.responses.successful import AuthorizedResponse
 
 logger = get_logger(__name__)
 router = APIRouter(tags=["authorized"])
diff --git a/src/app/endpoints/config.py b/src/app/endpoints/config.py
index a1190a959..21dea5097 100644
--- a/src/app/endpoints/config.py
+++ b/src/app/endpoints/config.py
@@ -9,17 +9,15 @@
 from authorization.middleware import authorize
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
+from models.api.responses.successful import ConfigurationResponse
 from models.config import Action
-from models.responses import (
-    ConfigurationResponse,
-)
 from utils.endpoints import check_configuration_loaded
 
 logger = get_logger(__name__)
diff --git a/src/app/endpoints/conversations_v1.py b/src/app/endpoints/conversations_v1.py
index 9cde18542..937d6a9e8 100644
--- a/src/app/endpoints/conversations_v1.py
+++ b/src/app/endpoints/conversations_v1.py
@@ -16,8 +16,8 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     BadRequestResponse,
     ForbiddenResponse,
     InternalServerErrorResponse,
@@ -25,18 +25,18 @@
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
-from models.config import Action
-from models.database.conversations import (
-    UserConversation,
-)
-from models.requests import ConversationUpdateRequest
-from models.responses import (
+from models.api.responses.successful import (
     ConversationDeleteResponse,
-    ConversationDetails,
     ConversationResponse,
     ConversationsListResponse,
     ConversationUpdateResponse,
 )
+from models.common import ConversationDetails
+from models.config import Action
+from models.database.conversations import (
+    UserConversation,
+)
+from models.requests import ConversationUpdateRequest
 from utils.conversations import (
     build_conversation_turns_from_items,
     get_all_conversation_items,
diff --git a/src/app/endpoints/conversations_v2.py b/src/app/endpoints/conversations_v2.py
index 773ce4a0a..4109ee40d 100644
--- a/src/app/endpoints/conversations_v2.py
+++ b/src/app/endpoints/conversations_v2.py
@@ -8,8 +8,8 @@
 from authorization.middleware import authorize
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     BadRequestResponse,
     ForbiddenResponse,
     InternalServerErrorResponse,
@@ -17,17 +17,19 @@
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
-from models.cache_entry import CacheEntry
-from models.config import Action
-from models.requests import ConversationUpdateRequest
-from models.responses import (
+from models.api.responses.successful import (
     ConversationDeleteResponse,
     ConversationResponse,
     ConversationsListResponseV2,
-    ConversationTurn,
     ConversationUpdateResponse,
+)
+from models.cache_entry import CacheEntry
+from models.common import (
+    ConversationTurn,
     Message,
 )
+from models.config import Action
+from models.requests import ConversationUpdateRequest
 from utils.endpoints import check_configuration_loaded
 from utils.suid import check_suid
 
diff --git a/src/app/endpoints/feedback.py b/src/app/endpoints/feedback.py
index 8779a6189..94cdd6a42 100644
--- a/src/app/endpoints/feedback.py
+++ b/src/app/endpoints/feedback.py
@@ -13,21 +13,21 @@
 from authorization.middleware import authorize
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     NotFoundResponse,
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
-from models.config import Action
-from models.requests import FeedbackRequest, FeedbackStatusUpdateRequest
-from models.responses import (
+from models.api.responses.successful import (
     FeedbackResponse,
     FeedbackStatusUpdateResponse,
     StatusResponse,
 )
+from models.config import Action
+from models.requests import FeedbackRequest, FeedbackStatusUpdateRequest
 from utils.endpoints import check_configuration_loaded, retrieve_conversation
 from utils.suid import get_suid
 
diff --git a/src/app/endpoints/health.py b/src/app/endpoints/health.py
index 57e349990..d2f56efc7 100644
--- a/src/app/endpoints/health.py
+++ b/src/app/endpoints/health.py
@@ -17,18 +17,18 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
-from models.config import Action
-from models.responses import (
+from models.api.responses.successful import (
     LivenessResponse,
-    ProviderHealthStatus,
     ReadinessResponse,
 )
+from models.common import ProviderHealthStatus
+from models.config import Action
 
 logger = get_logger(__name__)
 router = APIRouter(tags=["health"])
diff --git a/src/app/endpoints/info.py b/src/app/endpoints/info.py
index a58fdd1ec..2acd89b03 100644
--- a/src/app/endpoints/info.py
+++ b/src/app/endpoints/info.py
@@ -11,16 +11,14 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
+from models.api.responses.successful import InfoResponse
 from models.config import Action
-from models.responses import (
-    InfoResponse,
-)
 from version import __version__
 
 logger = get_logger(__name__)
diff --git a/src/app/endpoints/mcp_auth.py b/src/app/endpoints/mcp_auth.py
index 85ca8ac48..62aea7615 100644
--- a/src/app/endpoints/mcp_auth.py
+++ b/src/app/endpoints/mcp_auth.py
@@ -10,18 +10,16 @@
 from authorization.middleware import authorize
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
+from models.api.responses.successful import MCPClientAuthOptionsResponse
+from models.common import MCPServerAuthInfo
 from models.config import Action
-from models.responses import (
-    MCPClientAuthOptionsResponse,
-    MCPServerAuthInfo,
-)
 from utils.endpoints import check_configuration_loaded
 
 logger = get_logger(__name__)
diff --git a/src/app/endpoints/mcp_servers.py b/src/app/endpoints/mcp_servers.py
index 90b524760..5f34bea38 100644
--- a/src/app/endpoints/mcp_servers.py
+++ b/src/app/endpoints/mcp_servers.py
@@ -11,8 +11,8 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ConflictResponse,
     ForbiddenResponse,
     InternalServerErrorResponse,
@@ -20,14 +20,14 @@
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
-from models.config import Action, ModelContextProtocolServer
-from models.requests import MCPServerRegistrationRequest
-from models.responses import (
+from models.api.responses.successful import (
     MCPServerDeleteResponse,
-    MCPServerInfo,
     MCPServerListResponse,
     MCPServerRegistrationResponse,
 )
+from models.common import MCPServerInfo
+from models.config import Action, ModelContextProtocolServer
+from models.requests import MCPServerRegistrationRequest
 from utils.endpoints import check_configuration_loaded
 
 logger = get_logger(__name__)
diff --git a/src/app/endpoints/metrics.py b/src/app/endpoints/metrics.py
index 134f88944..c33a6866c 100644
--- a/src/app/endpoints/metrics.py
+++ b/src/app/endpoints/metrics.py
@@ -13,8 +13,8 @@
 from authentication.interface import AuthTuple
 from authorization.middleware import authorize
 from metrics.utils import setup_model_metrics
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     ServiceUnavailableResponse,
diff --git a/src/app/endpoints/models.py b/src/app/endpoints/models.py
index 59b6e4178..ddd093214 100644
--- a/src/app/endpoints/models.py
+++ b/src/app/endpoints/models.py
@@ -12,18 +12,16 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
+from models.api.responses.successful import ModelsResponse
 from models.config import Action
 from models.requests import ModelFilter
-from models.responses import (
-    ModelsResponse,
-)
 from utils.endpoints import check_configuration_loaded
 
 logger = get_logger(__name__)
diff --git a/src/app/endpoints/prompts.py b/src/app/endpoints/prompts.py
index 0c66d4768..956bb3029 100644
--- a/src/app/endpoints/prompts.py
+++ b/src/app/endpoints/prompts.py
@@ -13,8 +13,8 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     BadRequestResponse,
     ForbiddenResponse,
     InternalServerErrorResponse,
@@ -22,13 +22,13 @@
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
-from models.config import Action
-from models.requests import PromptCreateRequest, PromptUpdateRequest
-from models.responses import (
+from models.api.responses.successful import (
     PromptDeleteResponse,
     PromptResourceResponse,
     PromptsListResponse,
 )
+from models.config import Action
+from models.requests import PromptCreateRequest, PromptUpdateRequest
 from utils.endpoints import check_configuration_loaded
 from utils.query import handle_known_apistatus_errors
 from utils.suid import check_suid_prompt
diff --git a/src/app/endpoints/providers.py b/src/app/endpoints/providers.py
index b2060de86..0d7592ae0 100644
--- a/src/app/endpoints/providers.py
+++ b/src/app/endpoints/providers.py
@@ -13,19 +13,19 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     NotFoundResponse,
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
-from models.config import Action
-from models.responses import (
+from models.api.responses.successful import (
     ProviderResponse,
     ProvidersListResponse,
 )
+from models.config import Action
 from utils.endpoints import check_configuration_loaded
 
 logger = get_logger(__name__)
diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py
index 096feef5e..8a04a4d2a 100644
--- a/src/app/endpoints/query.py
+++ b/src/app/endpoints/query.py
@@ -24,8 +24,8 @@
 from configuration import configuration
 from constants import ENDPOINT_PATH_QUERY
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     NotFoundResponse,
@@ -35,12 +35,12 @@
     UnauthorizedResponse,
     UnprocessableEntityResponse,
 )
+from models.api.responses.successful import QueryResponse
+from models.common.moderation import ShieldModerationResult
 from models.common.responses.responses_api_params import ResponsesApiParams
+from models.common.turn_summary import TurnSummary
 from models.config import Action
 from models.requests import QueryRequest
-from models.responses import (
-    QueryResponse,
-)
 from utils.conversations import append_turn_items_to_conversation
 from utils.endpoints import (
     check_configuration_loaded,
@@ -68,10 +68,6 @@
 )
 from utils.shields import run_shield_moderation, validate_shield_ids_override
 from utils.suid import normalize_conversation_id
-from utils.types import (
-    ShieldModerationResult,
-    TurnSummary,
-)
 from utils.vector_search import build_rag_context
 
 logger = get_logger(__name__)
diff --git a/src/app/endpoints/rags.py b/src/app/endpoints/rags.py
index 955a4b447..c60c6db64 100644
--- a/src/app/endpoints/rags.py
+++ b/src/app/endpoints/rags.py
@@ -12,19 +12,19 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     NotFoundResponse,
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
-from models.config import Action, ByokRag
-from models.responses import (
+from models.api.responses.successful import (
     RAGInfoResponse,
     RAGListResponse,
 )
+from models.config import Action, ByokRag
 from utils.endpoints import check_configuration_loaded
 
 logger = get_logger(__name__)
diff --git a/src/app/endpoints/responses.py b/src/app/endpoints/responses.py
index 34f819199..6c3757de4 100644
--- a/src/app/endpoints/responses.py
+++ b/src/app/endpoints/responses.py
@@ -38,8 +38,8 @@
 from configuration import configuration
 from constants import ENDPOINT_PATH_RESPONSES, SUBSTITUTED_INSTRUCTIONS_PLACEHOLDER
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH
+from models.api.responses.error import (
     ConflictResponse,
     ForbiddenResponse,
     InternalServerErrorResponse,
@@ -50,13 +50,13 @@
     UnauthorizedResponse,
     UnprocessableEntityResponse,
 )
+from models.api.responses.successful import ResponsesResponse
+from models.common.moderation import ShieldModerationBlocked
 from models.common.responses.responses_api_params import ResponsesApiParams
 from models.common.responses.responses_context import ResponsesContext
+from models.common.turn_summary import TurnSummary
 from models.config import Action
 from models.requests import ResponsesRequest
-from models.responses import (
-    ResponsesResponse,
-)
 from observability import ResponsesEventData, build_responses_event, send_splunk_event
 from utils.conversations import append_turn_items_to_conversation
 from utils.endpoints import (
@@ -100,10 +100,6 @@
     normalize_conversation_id,
 )
 from utils.tool_formatter import translate_vector_store_ids_to_user_facing
-from utils.types import (
-    ShieldModerationBlocked,
-    TurnSummary,
-)
 from utils.vector_search import (
     append_inline_rag_context_to_responses_input,
     build_rag_context,
diff --git a/src/app/endpoints/rlsapi_v1.py b/src/app/endpoints/rlsapi_v1.py
index 72d7958c3..08555a4d5 100644
--- a/src/app/endpoints/rlsapi_v1.py
+++ b/src/app/endpoints/rlsapi_v1.py
@@ -26,8 +26,8 @@
 from constants import ENDPOINT_PATH_INFER
 from log import get_logger
 from metrics import recording
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     NotFoundResponse,
diff --git a/src/app/endpoints/root.py b/src/app/endpoints/root.py
index e966a5cef..5b88f8b87 100644
--- a/src/app/endpoints/root.py
+++ b/src/app/endpoints/root.py
@@ -9,8 +9,8 @@
 from authentication.interface import AuthTuple
 from authorization.middleware import authorize
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     ServiceUnavailableResponse,
     UnauthorizedResponse,
diff --git a/src/app/endpoints/shields.py b/src/app/endpoints/shields.py
index 779a73ee9..480e02d50 100644
--- a/src/app/endpoints/shields.py
+++ b/src/app/endpoints/shields.py
@@ -12,17 +12,15 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
+from models.api.responses.successful import ShieldsResponse
 from models.config import Action
-from models.responses import (
-    ShieldsResponse,
-)
 from utils.endpoints import check_configuration_loaded
 
 logger = get_logger(__name__)
diff --git a/src/app/endpoints/stream_interrupt.py b/src/app/endpoints/stream_interrupt.py
index d4ee6239a..7c58b21ad 100644
--- a/src/app/endpoints/stream_interrupt.py
+++ b/src/app/endpoints/stream_interrupt.py
@@ -7,18 +7,16 @@
 from authentication import get_auth_dependency
 from authentication.interface import AuthTuple
 from authorization.middleware import authorize
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     NotFoundResponse,
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
+from models.api.responses.successful import StreamingInterruptResponse
 from models.config import Action
 from models.requests import StreamingInterruptRequest
-from models.responses import (
-    StreamingInterruptResponse,
-)
 from utils.stream_interrupts import (
     CancelStreamResult,
     StreamInterruptRegistry,
diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py
index 1ac7a73d4..faccdc920 100644
--- a/src/app/endpoints/streaming_query.py
+++ b/src/app/endpoints/streaming_query.py
@@ -60,8 +60,8 @@
 )
 from log import get_logger
 from metrics import recording
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH
+from models.api.responses.error import (
     AbstractErrorResponse,
     ForbiddenResponse,
     InternalServerErrorResponse,
@@ -72,13 +72,12 @@
     UnauthorizedResponse,
     UnprocessableEntityResponse,
 )
+from models.api.responses.successful import StreamingQueryResponse
 from models.common.responses.responses_api_params import ResponsesApiParams
+from models.common.turn_summary import ReferencedDocument, TurnSummary
 from models.config import Action
 from models.context import ResponseGeneratorContext
 from models.requests import QueryRequest
-from models.responses import (
-    StreamingQueryResponse,
-)
 from utils.conversations import append_turn_items_to_conversation
 from utils.endpoints import (
     check_configuration_loaded,
@@ -119,7 +118,6 @@
 from utils.stream_interrupts import get_stream_interrupt_registry
 from utils.suid import get_suid, normalize_conversation_id
 from utils.token_counter import TokenCounter
-from utils.types import ReferencedDocument, TurnSummary
 from utils.vector_search import build_rag_context
 
 logger = get_logger(__name__)
@@ -832,7 +830,8 @@ async def response_generator(  # pylint: disable=too-many-branches,too-many-stat
         # Completed response - capture final text and response object
         elif event_type == "response.completed":
             latest_response_object = cast(
-                OpenAIResponseObject, getattr(chunk, "response")  # noqa: B009
+                OpenAIResponseObject,
+                getattr(chunk, "response"),  # noqa: B009
             )
             turn_summary.llm_response = turn_summary.llm_response or "".join(text_parts)
             yield stream_event(
@@ -848,7 +847,8 @@ async def response_generator(  # pylint: disable=too-many-branches,too-many-stat
         # Incomplete or failed response - emit error
         elif event_type in ("response.incomplete", "response.failed"):
             latest_response_object = cast(
-                OpenAIResponseObject, getattr(chunk, "response")  # noqa: B009
+                OpenAIResponseObject,
+                getattr(chunk, "response"),  # noqa: B009
             )
             error_message = (
                 latest_response_object.error.message
diff --git a/src/app/endpoints/tools.py b/src/app/endpoints/tools.py
index eac339e36..222e1fc7a 100644
--- a/src/app/endpoints/tools.py
+++ b/src/app/endpoints/tools.py
@@ -11,17 +11,15 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
+from models.api.responses.successful import ToolsResponse
 from models.config import Action
-from models.responses import (
-    ToolsResponse,
-)
 from utils.endpoints import check_configuration_loaded
 from utils.mcp_headers import (
     McpHeaders,
diff --git a/src/app/endpoints/vector_stores.py b/src/app/endpoints/vector_stores.py
index 34f3090ea..0d24c9c6a 100644
--- a/src/app/endpoints/vector_stores.py
+++ b/src/app/endpoints/vector_stores.py
@@ -22,8 +22,8 @@
 from configuration import configuration
 from constants import DEFAULT_MAX_FILE_UPLOAD_SIZE
 from log import get_logger
-from models.api.responses import (
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
+from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
+from models.api.responses.error import (
     FileTooLargeResponse,
     ForbiddenResponse,
     InternalServerErrorResponse,
@@ -31,13 +31,7 @@
     ServiceUnavailableResponse,
     UnauthorizedResponse,
 )
-from models.config import Action
-from models.requests import (
-    VectorStoreCreateRequest,
-    VectorStoreFileCreateRequest,
-    VectorStoreUpdateRequest,
-)
-from models.responses import (
+from models.api.responses.successful import (
     FileResponse,
     VectorStoreDeleteResponse,
     VectorStoreFileDeleteResponse,
@@ -46,6 +40,12 @@
     VectorStoreResponse,
     VectorStoresListResponse,
 )
+from models.config import Action
+from models.requests import (
+    VectorStoreCreateRequest,
+    VectorStoreFileCreateRequest,
+    VectorStoreUpdateRequest,
+)
 from utils.endpoints import check_configuration_loaded
 from utils.query import handle_known_apistatus_errors
 
diff --git a/src/app/main.py b/src/app/main.py
index 6b68c54bb..af42dd9f5 100644
--- a/src/app/main.py
+++ b/src/app/main.py
@@ -23,7 +23,7 @@
 from configuration import configuration
 from log import get_logger
 from metrics import recording
-from models.api.responses import InternalServerErrorResponse
+from models.api.responses.error import InternalServerErrorResponse
 from sentry import initialize_sentry
 from utils.common import register_mcp_servers_async
 from utils.llama_stack_version import check_llama_stack_version
diff --git a/src/authentication/jwk_token.py b/src/authentication/jwk_token.py
index 756cb593a..7cc15870b 100644
--- a/src/authentication/jwk_token.py
+++ b/src/authentication/jwk_token.py
@@ -22,7 +22,7 @@
     DEFAULT_VIRTUAL_PATH,
 )
 from log import get_logger
-from models.api.responses import UnauthorizedResponse
+from models.api.responses.error import UnauthorizedResponse
 from models.config import JwkConfiguration
 
 logger = get_logger(__name__)
diff --git a/src/authentication/k8s.py b/src/authentication/k8s.py
index e3ac0c9de..5d59d7844 100644
--- a/src/authentication/k8s.py
+++ b/src/authentication/k8s.py
@@ -14,7 +14,7 @@
 from configuration import configuration
 from constants import DEFAULT_VIRTUAL_PATH
 from log import get_logger
-from models.api.responses import (
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     ServiceUnavailableResponse,
diff --git a/src/authentication/utils.py b/src/authentication/utils.py
index 26da5c049..aad460c00 100644
--- a/src/authentication/utils.py
+++ b/src/authentication/utils.py
@@ -3,7 +3,7 @@
 from fastapi import HTTPException
 from starlette.datastructures import Headers
 
-from models.api.responses import UnauthorizedResponse
+from models.api.responses.error import UnauthorizedResponse
 
 
 def extract_user_token(headers: Headers) -> str:
diff --git a/src/authorization/middleware.py b/src/authorization/middleware.py
index f29fa7cc5..2aaa8d415 100644
--- a/src/authorization/middleware.py
+++ b/src/authorization/middleware.py
@@ -18,7 +18,7 @@
 )
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
 )
diff --git a/src/cache/cache.py b/src/cache/cache.py
index 461d94b6b..b0122ada9 100644
--- a/src/cache/cache.py
+++ b/src/cache/cache.py
@@ -3,7 +3,7 @@
 from abc import ABC, abstractmethod
 
 from models.cache_entry import CacheEntry
-from models.responses import ConversationData
+from models.common import ConversationData
 from utils.suid import check_suid
 
 
diff --git a/src/cache/in_memory_cache.py b/src/cache/in_memory_cache.py
index c6b017289..cf2b85a76 100644
--- a/src/cache/in_memory_cache.py
+++ b/src/cache/in_memory_cache.py
@@ -3,8 +3,8 @@
 from cache.cache import Cache
 from log import get_logger
 from models.cache_entry import CacheEntry
+from models.common import ConversationData
 from models.config import InMemoryCacheConfig
-from models.responses import ConversationData
 from utils.connection_decorator import connection
 
 logger = get_logger(__name__)
diff --git a/src/cache/noop_cache.py b/src/cache/noop_cache.py
index fb76f6679..e7426885b 100644
--- a/src/cache/noop_cache.py
+++ b/src/cache/noop_cache.py
@@ -3,7 +3,7 @@
 from cache.cache import Cache
 from log import get_logger
 from models.cache_entry import CacheEntry
-from models.responses import ConversationData
+from models.common import ConversationData
 from utils.connection_decorator import connection
 
 logger = get_logger(__name__)
diff --git a/src/cache/postgres_cache.py b/src/cache/postgres_cache.py
index 4503c9eb3..b51b06893 100644
--- a/src/cache/postgres_cache.py
+++ b/src/cache/postgres_cache.py
@@ -9,10 +9,14 @@
 from cache.cache_error import CacheError
 from log import get_logger
 from models.cache_entry import CacheEntry
+from models.common import ConversationData
+from models.common.turn_summary import (
+    ReferencedDocument,
+    ToolCallSummary,
+    ToolResultSummary,
+)
 from models.config import PostgreSQLDatabaseConfiguration
-from models.responses import ConversationData
 from utils.connection_decorator import connection
-from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary
 
 logger = get_logger(__name__)
 
@@ -166,8 +170,7 @@ def connect(self) -> None:
                 raise ValueError(f"Invalid namespace: {namespace}")
             if len(namespace) > 63:
                 raise ValueError(
-                    f"Invalid namespace: {namespace}. "
-                    "Maximum length is 63 characters."
+                    f"Invalid namespace: {namespace}. Maximum length is 63 characters."
                 )
         try:
             self.connection = psycopg2.connect(
@@ -305,8 +308,7 @@ def get(  # pylint: disable=R0914
                         ]
                     except (ValueError, TypeError) as e:
                         logger.warning(
-                            "Failed to deserialize tool_calls for "
-                            "conversation %s: %s",
+                            "Failed to deserialize tool_calls for conversation %s: %s",
                             conversation_id,
                             e,
                         )
diff --git a/src/cache/sqlite_cache.py b/src/cache/sqlite_cache.py
index 25aafd4a8..ee1c4c2e4 100644
--- a/src/cache/sqlite_cache.py
+++ b/src/cache/sqlite_cache.py
@@ -8,10 +8,14 @@
 from cache.cache_error import CacheError
 from log import get_logger
 from models.cache_entry import CacheEntry
+from models.common import ConversationData
+from models.common.turn_summary import (
+    ReferencedDocument,
+    ToolCallSummary,
+    ToolResultSummary,
+)
 from models.config import SQLiteDatabaseConfiguration
-from models.responses import ConversationData
 from utils.connection_decorator import connection
-from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary
 
 logger = get_logger(__name__)
 
@@ -357,8 +361,7 @@ def insert_or_append(
                 referenced_documents_json = json.dumps(docs_as_dicts)
             except (TypeError, ValueError) as e:
                 logger.warning(
-                    "Failed to serialize referenced_documents for "
-                    "conversation %s: %s",
+                    "Failed to serialize referenced_documents for conversation %s: %s",
                     conversation_id,
                     e,
                 )
diff --git a/src/client.py b/src/client.py
index 8ece5d374..a503c0094 100644
--- a/src/client.py
+++ b/src/client.py
@@ -13,7 +13,7 @@
 from configuration import configuration
 from llama_stack_configuration import YamlDumper, enrich_byok_rag, enrich_solr
 from log import get_logger
-from models.api.responses import ServiceUnavailableResponse
+from models.api.responses.error import ServiceUnavailableResponse
 from models.config import LlamaStackConfiguration
 from utils.types import Singleton
 
diff --git a/src/metrics/utils.py b/src/metrics/utils.py
index 0456c47e6..806e7a336 100644
--- a/src/metrics/utils.py
+++ b/src/metrics/utils.py
@@ -7,7 +7,7 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
-from models.api.responses import ServiceUnavailableResponse
+from models.api.responses.error import ServiceUnavailableResponse
 from utils.common import run_once_async
 from utils.endpoints import check_configuration_loaded
 
diff --git a/src/models/api/README.md b/src/models/api/README.md
index 58243fa92..29e945e42 100644
--- a/src/models/api/README.md
+++ b/src/models/api/README.md
@@ -1,5 +1,7 @@
 # List of source files stored in `src/models/api` directory
 
 ## [__init__.py](__init__.py)
-Typed HTTP API models (OpenAPI-oriented) for FastAPI routes.
+Typed HTTP API models (OpenAPI-oriented) for FastAPI routes. Exposes the [`responses`](responses/README.md) subpackage.
 
+## [responses/](responses/README.md)
+HTTP response shapes (successful payloads, errors, and OpenAPI description constants).
diff --git a/src/models/api/responses/README.md b/src/models/api/responses/README.md
index dff4ff4c1..91ece582b 100644
--- a/src/models/api/responses/README.md
+++ b/src/models/api/responses/README.md
@@ -6,3 +6,8 @@ HTTP response models and shared OpenAPI description constants.
 ## [constants.py](constants.py)
 OpenAPI description strings and shared example-label lists for API responses.
 
+## [error/](error/README.md)
+Structured HTTP error response models for OpenAPI documentation.
+
+## [successful/](successful/README.md)
+Concrete successful HTTP response models grouped by domain (barrel in `successful/__init__.py`).
diff --git a/src/models/api/responses/__init__.py b/src/models/api/responses/__init__.py
index 0ead1c15e..3a48064fe 100644
--- a/src/models/api/responses/__init__.py
+++ b/src/models/api/responses/__init__.py
@@ -1,65 +1,5 @@
 """HTTP response models and shared OpenAPI description constants."""
 
-from models.api.responses.constants import (
-    BAD_REQUEST_DESCRIPTION,
-    CONFLICT_DESCRIPTION,
-    FILE_UPLOAD_EXCEEDS_SIZE_LIMIT_DESCRIPTION,
-    FORBIDDEN_DESCRIPTION,
-    INTERNAL_SERVER_ERROR_DESCRIPTION,
-    INVALID_FEEDBACK_PATH_DESCRIPTION,
-    NOT_FOUND_DESCRIPTION,
-    PROMPT_TOO_LONG_DESCRIPTION,
-    QUOTA_EXCEEDED_DESCRIPTION,
-    SERVICE_UNAVAILABLE_DESCRIPTION,
-    SUCCESSFUL_RESPONSE_DESCRIPTION,
-    UNAUTHORIZED_DESCRIPTION,
-    UNAUTHORIZED_OPENAPI_EXAMPLES,
-    UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH,
-    UNPROCESSABLE_CONTENT_DESCRIPTION,
-)
-from models.api.responses.error import (
-    AbstractErrorResponse,
-    BadRequestResponse,
-    ConflictResponse,
-    DetailModel,
-    FileTooLargeResponse,
-    ForbiddenResponse,
-    InternalServerErrorResponse,
-    NotFoundResponse,
-    PromptTooLongResponse,
-    QuotaExceededResponse,
-    ServiceUnavailableResponse,
-    UnauthorizedResponse,
-    UnprocessableEntityResponse,
-)
+from models.api.responses import constants, error, successful
 
-__all__ = [
-    "BAD_REQUEST_DESCRIPTION",
-    "CONFLICT_DESCRIPTION",
-    "FILE_UPLOAD_EXCEEDS_SIZE_LIMIT_DESCRIPTION",
-    "FORBIDDEN_DESCRIPTION",
-    "INTERNAL_SERVER_ERROR_DESCRIPTION",
-    "INVALID_FEEDBACK_PATH_DESCRIPTION",
-    "NOT_FOUND_DESCRIPTION",
-    "PROMPT_TOO_LONG_DESCRIPTION",
-    "QUOTA_EXCEEDED_DESCRIPTION",
-    "SERVICE_UNAVAILABLE_DESCRIPTION",
-    "SUCCESSFUL_RESPONSE_DESCRIPTION",
-    "UNAUTHORIZED_DESCRIPTION",
-    "UNAUTHORIZED_OPENAPI_EXAMPLES",
-    "UNAUTHORIZED_OPENAPI_EXAMPLES_WITH_MCP_OAUTH",
-    "UNPROCESSABLE_CONTENT_DESCRIPTION",
-    "AbstractErrorResponse",
-    "BadRequestResponse",
-    "ConflictResponse",
-    "DetailModel",
-    "ForbiddenResponse",
-    "InternalServerErrorResponse",
-    "NotFoundResponse",
-    "PromptTooLongResponse",
-    "FileTooLargeResponse",
-    "QuotaExceededResponse",
-    "ServiceUnavailableResponse",
-    "UnauthorizedResponse",
-    "UnprocessableEntityResponse",
-]
+__all__ = ["error", "successful", "constants"]
diff --git a/src/models/api/responses/error/README.md b/src/models/api/responses/error/README.md
index 6dcaa4ca9..7525a98b4 100644
--- a/src/models/api/responses/error/README.md
+++ b/src/models/api/responses/error/README.md
@@ -35,4 +35,3 @@ OpenAPI-aligned error response models: HTTP 401 Unauthorized.
 
 ## [unprocessable_entity.py](unprocessable_entity.py)
 OpenAPI-aligned error response models: HTTP 422 Unprocessable Entity.
-
diff --git a/src/models/api/responses/successful/README.md b/src/models/api/responses/successful/README.md
new file mode 100644
index 000000000..bdcf4ac2c
--- /dev/null
+++ b/src/models/api/responses/successful/README.md
@@ -0,0 +1,37 @@
+# List of source files stored in `src/models/api/responses/successful` directory
+
+## [__init__.py](__init__.py)
+Concrete successful HTTP response models grouped by domain.
+
+## [bases.py](bases.py)
+Base classes for successful API response models.
+
+## [catalog.py](catalog.py)
+Successful responses for models, tools, shields, RAG, and providers.
+
+## [configuration.py](configuration.py)
+Successful response model for the configuration endpoint.
+
+## [conversations.py](conversations.py)
+Successful responses for conversation CRUD and listing.
+
+## [feedback.py](feedback.py)
+Successful responses for feedback and feedback status endpoints.
+
+## [mcp_servers.py](mcp_servers.py)
+Successful responses for MCP server registration and listing.
+
+## [probes.py](probes.py)
+Successful responses for service probes and related endpoints (info, readiness, liveness, status, auth).
+
+## [prompts.py](prompts.py)
+Successful responses for stored prompt templates.
+
+## [query.py](query.py)
+Successful response models for synchronous query and streaming query documentation.
+
+## [responses_openai.py](responses_openai.py)
+Successful response model for the OpenAI-compatible Responses API.
+
+## [vector_stores.py](vector_stores.py)
+Successful responses for vector stores and vector store files.
diff --git a/src/models/api/responses/successful/__init__.py b/src/models/api/responses/successful/__init__.py
new file mode 100644
index 000000000..d84bf2197
--- /dev/null
+++ b/src/models/api/responses/successful/__init__.py
@@ -0,0 +1,97 @@
+"""Concrete successful HTTP response models grouped by domain."""
+
+from models.api.responses.successful.catalog import (
+    ModelsResponse,
+    ProviderResponse,
+    ProvidersListResponse,
+    RAGInfoResponse,
+    RAGListResponse,
+    ShieldsResponse,
+    ToolsResponse,
+)
+from models.api.responses.successful.configuration import ConfigurationResponse
+from models.api.responses.successful.conversations import (
+    ConversationDeleteResponse,
+    ConversationResponse,
+    ConversationsListResponse,
+    ConversationsListResponseV2,
+    ConversationUpdateResponse,
+)
+from models.api.responses.successful.feedback import (
+    FeedbackResponse,
+    FeedbackStatusUpdateResponse,
+)
+from models.api.responses.successful.mcp_servers import (
+    MCPClientAuthOptionsResponse,
+    MCPServerDeleteResponse,
+    MCPServerListResponse,
+    MCPServerRegistrationResponse,
+)
+from models.api.responses.successful.probes import (
+    AuthorizedResponse,
+    InfoResponse,
+    LivenessResponse,
+    ReadinessResponse,
+    StatusResponse,
+)
+from models.api.responses.successful.prompts import (
+    PromptDeleteResponse,
+    PromptResourceResponse,
+    PromptsListResponse,
+)
+from models.api.responses.successful.query import (
+    QueryResponse,
+    StreamingInterruptResponse,
+    StreamingQueryResponse,
+)
+from models.api.responses.successful.responses_openai import ResponsesResponse
+from models.api.responses.successful.vector_stores import (
+    FileResponse,
+    VectorStoreDeleteResponse,
+    VectorStoreFileDeleteResponse,
+    VectorStoreFileResponse,
+    VectorStoreFilesListResponse,
+    VectorStoreResponse,
+    VectorStoresListResponse,
+)
+
+__all__ = [
+    "AuthorizedResponse",
+    "ConfigurationResponse",
+    "ConversationDeleteResponse",
+    "ConversationResponse",
+    "ConversationsListResponse",
+    "ConversationsListResponseV2",
+    "ConversationUpdateResponse",
+    "FeedbackResponse",
+    "FeedbackStatusUpdateResponse",
+    "FileResponse",
+    "InfoResponse",
+    "LivenessResponse",
+    "MCPClientAuthOptionsResponse",
+    "MCPServerDeleteResponse",
+    "MCPServerListResponse",
+    "MCPServerRegistrationResponse",
+    "ModelsResponse",
+    "PromptDeleteResponse",
+    "PromptResourceResponse",
+    "PromptsListResponse",
+    "ProviderResponse",
+    "ProvidersListResponse",
+    "QueryResponse",
+    "RAGInfoResponse",
+    "RAGListResponse",
+    "ReadinessResponse",
+    "ResponsesResponse",
+    "ShieldsResponse",
+    "StatusResponse",
+    "StreamingInterruptResponse",
+    "StreamingQueryResponse",
+    "ToolsResponse",
+    "VectorStoreDeleteResponse",
+    "VectorStoreFileDeleteResponse",
+    "VectorStoreFileResponse",
+    "VectorStoreFilesListResponse",
+    "VectorStoreResponse",
+    "VectorStoresListResponse",
+]
diff --git a/src/models/api/responses/successful/bases.py b/src/models/api/responses/successful/bases.py
new file mode 100644
index 000000000..c4355d25b
--- /dev/null
+++ b/src/models/api/responses/successful/bases.py
@@ -0,0 +1,84 @@
+"""Base classes for successful API response models."""
+
+from typing import Any, ClassVar
+
+from pydantic import BaseModel, Field, computed_field
+from pydantic_core import SchemaError
+
+from log import get_logger
+from models.api.responses.constants import SUCCESSFUL_RESPONSE_DESCRIPTION
+
+logger = get_logger(__name__)
+
+
+class AbstractSuccessfulResponse(BaseModel):
+    """Base class for all successful response models."""
+
+    @classmethod
+    def openapi_response(cls) -> dict[str, Any]:
+        """Generate FastAPI response dict with a single example from model_config."""
+        schema = cls.model_json_schema()
+        model_examples = schema.get("examples")
+        if not model_examples:
+            raise SchemaError(f"Examples not found in {cls.__name__}")
+        example_value = model_examples[0]
+        content = {"application/json": {"example": example_value}}
+
+        return {
+            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
+            "model": cls,
+            "content": content,
+        }
+
+
+class AbstractDeleteResponse(BaseModel):
+    """Base model for successful delete responses."""
+
+    deleted: bool = Field(
+        ...,
+        description="Whether the deletion was successful.",
+        examples=[True, False],
+    )
+    resource_name: ClassVar[str]
+
+    @computed_field
+    def response(self) -> str:
+        """Human-readable outcome of the delete operation."""
+        return (
+            f"{self.resource_name} deleted successfully"
+            if self.deleted
+            else f"{self.resource_name} not found"
+        )
+
+    @classmethod
+    def openapi_response(cls) -> dict[str, Any]:
+        """Build FastAPI/OpenAPI metadata with named application/json examples.
+
+        Returns:
+            A response dict with description, model, and content keys.
+
+        Raises:
+            SchemaError: If the model JSON schema has no examples list.
+        """
+        schema = cls.model_json_schema()
+        model_examples = schema.get("examples")
+        if not model_examples:
+            raise SchemaError(f"Examples not found in {cls.__name__}")
+
+        examples: dict[str, dict[str, Any]] = {}
+        for index, example in enumerate(model_examples):
+            if "label" not in example:
+                raise SchemaError(
+                    f"Example at index {index} in {cls.__name__} has no label"
+                )
+            if "value" not in example:
+                raise SchemaError(
+                    f"Example at index {index} in {cls.__name__} has no value"
+                )
+            examples[example["label"]] = {"value": example["value"]}
+
+        return {
+            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
+            "model": cls,
+            "content": {"application/json": {"examples": examples}},
+        }
diff --git a/src/models/api/responses/successful/catalog.py b/src/models/api/responses/successful/catalog.py
new file mode 100644
index 000000000..3d357a724
--- /dev/null
+++ b/src/models/api/responses/successful/catalog.py
@@ -0,0 +1,258 @@
+"""Successful response bodies for catalog-style endpoints."""
+
+from typing import Any, Optional
+
+from pydantic import Field
+
+from models.api.responses.successful.bases import AbstractSuccessfulResponse
+
+
+class ModelsResponse(AbstractSuccessfulResponse):
+    """Model representing a response to models request."""
+
+    models: list[dict[str, Any]] = Field(
+        ...,
+        description="List of models available",
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "models": [
+                        {
+                            "identifier": "openai/gpt-4-turbo",
+                            "metadata": {},
+                            "api_model_type": "llm",
+                            "provider_id": "openai",
+                            "type": "model",
+                            "provider_resource_id": "gpt-4-turbo",
+                            "model_type": "llm",
+                        },
+                    ],
+                }
+            ]
+        }
+    }
+
+
+class ToolsResponse(AbstractSuccessfulResponse):
+    """Model representing a response to tools request."""
+
+    tools: list[dict[str, Any]] = Field(
+        description=(
+            "List of tools available from all configured MCP servers and built-in toolgroups"
+        ),
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "tools": [
+                        {
+                            "identifier": "filesystem_read",
+                            "description": "Read contents of a file from the filesystem",
+                            "parameters": [
+                                {
+                                    "name": "path",
+                                    "description": "Path to the file to read",
+                                    "parameter_type": "string",
+                                    "required": True,
+                                    "default": None,
+                                }
+                            ],
+                            "provider_id": "model-context-protocol",
+                            "toolgroup_id": "filesystem-tools",
+                            "server_source": "http://localhost:3000",
+                            "type": "tool",
+                        }
+                    ],
+                }
+            ]
+        }
+    }
+
+
+class ShieldsResponse(AbstractSuccessfulResponse):
+    """Model representing a response to shields request."""
+
+    shields: list[dict[str, Any]] = Field(
+        ...,
+        description="List of shields available",
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "shields": [
+                        {
+                            "identifier": "lightspeed_question_validity-shield",
+                            "provider_resource_id": "lightspeed_question_validity-shield",
+                            "provider_id": "lightspeed_question_validity",
+                            "type": "shield",
+                            "params": {},
+                        }
+                    ],
+                }
+            ]
+        }
+    }
+
+
+class RAGInfoResponse(AbstractSuccessfulResponse):
+    """Model representing a response with information about RAG DB."""
+
+    id: str = Field(
+        ..., description="Vector DB unique ID", examples=["vs_00000000_0000_0000"]
+    )
+    name: Optional[str] = Field(
+        None,
+        description="Human readable vector DB name",
+        examples=["Faiss Store with Knowledge base"],
+    )
+    created_at: int = Field(
+        ...,
+        description="When the vector store was created, represented as Unix time",
+        examples=[1763391371],
+    )
+    last_active_at: Optional[int] = Field(
+        None,
+        description="When the vector store was last active, represented as Unix time",
+        examples=[1763391371],
+    )
+    usage_bytes: int = Field(
+        ...,
+        description="Storage byte(s) used by this vector DB",
+        examples=[0],
+    )
+    expires_at: Optional[int] = Field(
+        None,
+        description="When the vector store expires, represented as Unix time",
+        examples=[1763391371],
+    )
+    object: str = Field(
+        ...,
+        description="Object type",
+        examples=["vector_store"],
+    )
+    status: str = Field(
+        ...,
+        description="Vector DB status",
+        examples=["completed"],
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "id": "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
+                    "name": "Faiss Store with Knowledge base",
+                    "created_at": 1763391371,
+                    "last_active_at": 1763391371,
+                    "usage_bytes": 1024000,
+                    "expires_at": None,
+                    "object": "vector_store",
+                    "status": "completed",
+                }
+            ]
+        }
+    }
+
+
+class RAGListResponse(AbstractSuccessfulResponse):
+    """Model representing a response to list RAGs request."""
+
+    rags: list[str] = Field(
+        ...,
+        title="RAG list response",
+        description="List of RAG identifiers",
+        examples=[
+            "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
+            "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3",
+        ],
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "rags": [
+                        "vs_00000000-cafe-babe-0000-000000000000",
+                        "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
+                        "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3",
+                    ]
+                }
+            ]
+        }
+    }
+
+
+class ProvidersListResponse(AbstractSuccessfulResponse):
+    """Model representing a response to providers request."""
+
+    providers: dict[str, list[dict[str, Any]]] = Field(
+        ...,
+        description="List of available API types and their corresponding providers",
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "providers": {
+                        "inference": [
+                            {
+                                "provider_id": "sentence-transformers",
+                                "provider_type": "inline::sentence-transformers",
+                            },
+                            {
+                                "provider_id": "openai",
+                                "provider_type": "remote::openai",
+                            },
+                        ],
+                        "agents": [
+                            {
+                                "provider_id": "meta-reference",
+                                "provider_type": "inline::meta-reference",
+                            },
+                        ],
+                    },
+                }
+            ]
+        }
+    }
+
+
+class ProviderResponse(AbstractSuccessfulResponse):
+    """Model representing a response to get specific provider request."""
+
+    api: str = Field(
+        ...,
+        description="The API this provider implements",
+    )
+    config: dict[str, Any] = Field(
+        ...,
+        description="Provider configuration parameters",
+    )
+    health: dict[str, Any] = Field(
+        ...,
+        description="Current health status of the provider",
+    )
+    provider_id: str = Field(..., description="Unique provider identifier")
+    provider_type: str = Field(..., description="Provider implementation type")
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "api": "inference",
+                    "config": {"api_key": "********"},
+                    "health": {"status": "OK", "message": "Healthy"},
+                    "provider_id": "openai",
+                    "provider_type": "remote::openai",
+                }
+            ]
+        }
+    }
diff --git a/src/models/api/responses/successful/configuration.py b/src/models/api/responses/successful/configuration.py
new file mode 100644
index 000000000..d41e8ff20
--- /dev/null
+++ b/src/models/api/responses/successful/configuration.py
@@ -0,0 +1,94 @@
+"""Successful response model for the configuration endpoint."""
+
+from pydantic import ConfigDict
+
+from models.api.responses.successful.bases import AbstractSuccessfulResponse
+from models.config import Configuration
+
+
+class ConfigurationResponse(AbstractSuccessfulResponse):
+    """Success response model for the config endpoint.
+
+    Attributes:
+        configuration: Parsed application configuration returned to the client.
+    """
+
+    configuration: Configuration
+
+    model_config = ConfigDict(
+        json_schema_extra={
+            "examples": [
+                {
+                    "configuration": {
+                        "name": "lightspeed-stack",
+                        "service": {
+                            "host": "localhost",
+                            "port": 8080,
+                            "auth_enabled": False,
+                            "workers": 1,
+                            "color_log": True,
+                            "access_log": True,
+                            "tls_config": {
+                                "tls_certificate_path": None,
+                                "tls_key_path": None,
+                                "tls_key_password": None,
+                            },
+                            "cors": {
+                                "allow_origins": ["*"],
+                                "allow_credentials": False,
+                                "allow_methods": ["*"],
+                                "allow_headers": ["*"],
+                            },
+                        },
+                        "llama_stack": {
+                            "url": "http://localhost:8321",
+                            "api_key": "*****",
+                            "use_as_library_client": False,
+                            "library_client_config_path": None,
+                        },
+                        "user_data_collection": {
+                            "feedback_enabled": True,
+                            "feedback_storage": "/tmp/data/feedback",
+                            "transcripts_enabled": False,
+                            "transcripts_storage": "/tmp/data/transcripts",
+                        },
+                        "database": {
+                            "sqlite": {"db_path": "/tmp/lightspeed-stack.db"},
+                            "postgres": None,
+                        },
+                        "mcp_servers": [
+                            {
+                                "name": "server1",
+                                "provider_id": "provider1",
+                                "url": "http://url.com:1",
+                            },
+                        ],
+                        "authentication": {
+                            "module": "noop",
+                            "skip_tls_verification": False,
+                        },
+                        "authorization": {"access_rules": []},
+                        "customization": None,
+                        "inference": {
+                            "default_model": "gpt-4-turbo",
+                            "default_provider": "openai",
+                        },
+                        "conversation_cache": {
+                            "type": None,
+                            "memory": None,
+                            "sqlite": None,
+                            "postgres": None,
+                        },
+                        "byok_rag": [],
+                        "quota_handlers": {
+                            "sqlite": None,
+                            "postgres": None,
+                            "limiters": [],
+                            "scheduler": {"period": 1},
+                            "enable_token_history": False,
+                        },
+                    }
+                }
+            ]
+        }
+    )
diff --git a/src/models/api/responses/successful/conversations.py b/src/models/api/responses/successful/conversations.py
new file mode 100644
index 000000000..c1bacae42
--- /dev/null
+++ b/src/models/api/responses/successful/conversations.py
@@ -0,0 +1,219 @@
+"""Successful responses for conversation CRUD and listing."""
+
+from typing import ClassVar
+
+from pydantic import Field, computed_field
+
+from log import get_logger
+from models.api.responses.successful.bases import (
+    AbstractDeleteResponse,
+    AbstractSuccessfulResponse,
+)
+from models.common.conversation import (
+    ConversationData,
+    ConversationDetails,
+    ConversationTurn,
+)
+
+logger = get_logger(__name__)
+
+
+class ConversationResponse(AbstractSuccessfulResponse):
+    """Model representing a response for retrieving a conversation.
+
+    Attributes:
+        conversation_id: The conversation ID (UUID).
+        chat_history: The chat history as a list of conversation turns.
+    """
+
+    conversation_id: str = Field(
+        ...,
+        description="Conversation ID (UUID)",
+        examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"],
+    )
+
+    chat_history: list[ConversationTurn] = Field(
+        ...,
+        description="The simplified chat history as a list of conversation turns",
+        examples=[
+            {
+                "messages": [
+                    {"content": "Hello", "type": "user"},
+                    {"content": "Hi there!", "type": "assistant"},
+                ],
+                "tool_calls": [],
+                "tool_results": [],
+                "provider": "openai",
+                "model": "gpt-4o-mini",
+                "started_at": "2024-01-01T00:01:00Z",
+                "completed_at": "2024-01-01T00:01:05Z",
+            }
+        ],
+    )
+
+    # provides examples for /docs endpoint
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                    "chat_history": [
+                        {
+                            "messages": [
+                                {"content": "Hello", "type": "user"},
+                                {"content": "Hi there!", "type": "assistant"},
+                            ],
+                            "tool_calls": [],
+                            "tool_results": [],
+                            "provider": "openai",
+                            "model": "gpt-4o-mini",
+                            "started_at": "2024-01-01T00:01:00Z",
+                            "completed_at": "2024-01-01T00:01:05Z",
+                        }
+                    ],
+                }
+            ]
+        }
+    }
+
+
+class ConversationDeleteResponse(AbstractDeleteResponse):
+    """Response for deleting a conversation."""
+
+    resource_name: ClassVar[str] = "Conversation"
+    conversation_id: str = Field(
+        ...,
+        description="Conversation identifier that was passed to delete.",
+        examples=["123e4567-e89b-12d3-a456-426614174000"],
+    )
+
+    @computed_field(json_schema_extra={"deprecated": True})
+    def success(self) -> bool:
+        """Successful response flag."""
+        logger.warning("DEPRECATED: Will be removed in a future release.")
+        return True
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "label": "deleted",
+                    "value": {
+                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                        "deleted": True,
+                        "response": "Conversation deleted successfully",
+                    },
+                },
+                {
+                    "label": "not found",
+                    "value": {
+                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                        "deleted": False,
+                        "response": "Conversation not found",
+                    },
+                },
+            ]
+        }
+    }
+
+
+class ConversationsListResponse(AbstractSuccessfulResponse):
+    """Model representing a response for listing conversations of a user.
+
+    Attributes:
+        conversations: List of conversation details associated with the user.
+    """
+
+    conversations: list[ConversationDetails]
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "conversations": [
+                        {
+                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                            "created_at": "2024-01-01T00:00:00Z",
+                            "last_message_at": "2024-01-01T00:05:00Z",
+                            "message_count": 5,
+                            "last_used_model": "gemini/gemini-2.0-flash",
+                            "last_used_provider": "gemini",
+                            "topic_summary": "Openshift Microservices Deployment Strategies",
+                        },
+                        {
+                            "conversation_id": "456e7890-e12b-34d5-a678-901234567890",
+                            "created_at": "2024-01-01T01:00:00Z",
+                            "message_count": 2,
+                            "last_used_model": "gemini/gemini-2.5-flash",
+                            "last_used_provider": "gemini",
+                            "topic_summary": "RHDH Purpose Summary",
+                        },
+                    ]
+                }
+            ]
+        }
+    }
+
+
+class ConversationsListResponseV2(AbstractSuccessfulResponse):
+    """Model representing a response for listing conversations of a user.
+
+    Attributes:
+        conversations: List of conversation data associated with the user.
+    """
+
+    conversations: list[ConversationData]
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "conversations": [
+                        {
+                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                            "topic_summary": "Openshift Microservices Deployment Strategies",
+                            "last_message_timestamp": 1704067200.0,
+                        }
+                    ],
+                }
+            ]
+        }
+    }
+
+
+class ConversationUpdateResponse(AbstractSuccessfulResponse):
+    """Model representing a response for updating a conversation topic summary.
+
+    Attributes:
+        conversation_id: The conversation ID (UUID) that was updated.
+        success: Whether the update was successful.
+        message: A message about the update result.
+    """
+
+    conversation_id: str = Field(
+        ...,
+        description="The conversation ID (UUID) that was updated",
+        examples=["123e4567-e89b-12d3-a456-426614174000"],
+    )
+    success: bool = Field(
+        ...,
+        description="Whether the update was successful",
+        examples=[True],
+    )
+    message: str = Field(
+        ...,
+        description="A message about the update result",
+        examples=["Topic summary updated successfully"],
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                    "success": True,
+                    "message": "Topic summary updated successfully",
+                }
+            ]
+        }
+    }
diff --git a/src/models/api/responses/successful/feedback.py b/src/models/api/responses/successful/feedback.py
new file mode 100644
index 000000000..c6bc86113
--- /dev/null
+++ b/src/models/api/responses/successful/feedback.py
@@ -0,0 +1,58 @@
+"""Successful responses for feedback and feedback status endpoints."""
+
+from typing import Any
+
+from pydantic import Field
+
+from models.api.responses.successful.bases import AbstractSuccessfulResponse
+
+
+class FeedbackResponse(AbstractSuccessfulResponse):
+    """Model representing a response to a feedback request.
+
+    Attributes:
+        response: The response of the feedback request.
+    """
+
+    response: str = Field(
+        ...,
+        description="The response of the feedback request.",
+        examples=["feedback received"],
+    )
+
+    # provides examples for /docs endpoint
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "response": "feedback received",
+                }
+            ]
+        }
+    }
+
+
+class FeedbackStatusUpdateResponse(AbstractSuccessfulResponse):
+    """Model representing a response to a feedback status update request.
+
+    Attributes:
+        status: The previous and current status of the service and who updated it.
+    """
+
+    status: dict[str, Any]
+
+    # provides examples for /docs endpoint
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "status": {
+                        "previous_status": True,
+                        "updated_status": False,
+                        "updated_by": "user/test",
+                        "timestamp": "2023-03-15 12:34:56",
+                    },
+                }
+            ]
+        }
+    }
diff --git a/src/models/api/responses/successful/mcp_servers.py b/src/models/api/responses/successful/mcp_servers.py
new file mode 100644
index 000000000..b19e9b5af
--- /dev/null
+++ b/src/models/api/responses/successful/mcp_servers.py
@@ -0,0 +1,126 @@
+"""Successful responses for MCP server registration and listing."""
+
+from pydantic import Field
+
+from models.api.responses.successful.bases import AbstractSuccessfulResponse
+from models.common.mcp import MCPServerAuthInfo, MCPServerInfo
+
+
+class MCPClientAuthOptionsResponse(AbstractSuccessfulResponse):
+    """Response containing MCP servers that accept client-provided authorization.
+
+    Attributes:
+        servers: MCP servers that declare client authentication headers.
+    """
+
+    servers: list[MCPServerAuthInfo] = Field(
+        default_factory=list,
+        description="List of MCP servers that accept client-provided authorization",
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "servers": [
+                        {
+                            "name": "github",
+                            "client_auth_headers": ["Authorization"],
+                        },
+                        {
+                            "name": "gitlab",
+                            "client_auth_headers": ["Authorization", "X-API-Key"],
+                        },
+                    ]
+                }
+            ]
+        }
+    }
+
+
+class MCPServerRegistrationResponse(AbstractSuccessfulResponse):
+    """Response for a successful MCP server registration.
+
+    Attributes:
+        name: Registered MCP server name.
+        url: Registered MCP server URL.
+        provider_id: MCP provider identification.
+        message: Status message.
+    """
+
+    name: str = Field(..., description="Registered MCP server name")
+    url: str = Field(..., description="Registered MCP server URL")
+    provider_id: str = Field(..., description="MCP provider identification")
+    message: str = Field(..., description="Status message")
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "name": "mcp-integration-tools",
+                    "url": "http://host.docker.internal:7008/api/mcp-actions/v1",
+                    "provider_id": "model-context-protocol",
+                    "message": "MCP server 'mcp-integration-tools' registered successfully",
+                }
+            ]
+        }
+    }
+
+
+class MCPServerListResponse(AbstractSuccessfulResponse):
+    """Response listing all registered MCP servers.
+
+    Attributes:
+        servers: All registered MCP servers (static and dynamic).
+    """
+
+    servers: list[MCPServerInfo] = Field(
+        default_factory=list,
+        description="List of all registered MCP servers (static and dynamic)",
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "servers": [
+                        {
+                            "name": "mcp-integration-tools",
+                            "url": "http://host.docker.internal:7008/api/mcp-actions/v1",
+                            "provider_id": "model-context-protocol",
+                            "source": "config",
+                        },
+                        {
+                            "name": "test-mcp-server",
+                            "url": "http://host.docker.internal:8888/mcp",
+                            "provider_id": "model-context-protocol",
+                            "source": "api",
+                        },
+                    ]
+                }
+            ]
+        }
+    }
+
+
+class MCPServerDeleteResponse(AbstractSuccessfulResponse):
+    """Response for a successful MCP server deletion.
+
+    Attributes:
+        name: Deleted MCP server name.
+        message: Status message.
+    """
+
+    name: str = Field(..., description="Deleted MCP server name")
+    message: str = Field(..., description="Status message")
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "name": "test-mcp-server",
+                    "message": "MCP server 'test-mcp-server' unregistered successfully",
+                }
+            ]
+        }
+    }
diff --git a/src/models/api/responses/successful/probes.py b/src/models/api/responses/successful/probes.py
new file mode 100644
index 000000000..2fe966843
--- /dev/null
+++ b/src/models/api/responses/successful/probes.py
@@ -0,0 +1,182 @@
+"""Successful probe-related API responses (info, readiness, liveness, status, auth)."""
+
+from pydantic import Field
+
+from models.api.responses.successful.bases import AbstractSuccessfulResponse
+from models.common.health import ProviderHealthStatus
+
+
+class InfoResponse(AbstractSuccessfulResponse):
+    """Model representing a response to an info request.
+
+    Attributes:
+        name: Service name.
+        service_version: Service version.
+        llama_stack_version: Llama Stack version.
+    """
+
+    name: str = Field(
+        description="Service name",
+        examples=["Lightspeed Stack"],
+    )
+
+    service_version: str = Field(
+        description="Service version",
+        examples=["0.1.0", "0.2.0", "1.0.0"],
+    )
+
+    llama_stack_version: str = Field(
+        description="Llama Stack version",
+        examples=["0.2.1", "0.2.2", "0.2.18", "0.2.21", "0.2.22"],
+    )
+
+    # provides examples for /docs endpoint
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "name": "Lightspeed Stack",
+                    "service_version": "1.0.0",
+                    "llama_stack_version": "1.0.0",
+                }
+            ]
+        }
+    }
+
+
+class ReadinessResponse(AbstractSuccessfulResponse):
+    """Model representing response to a readiness request.
+
+    Attributes:
+        ready: If service is ready.
+        reason: The reason for the readiness.
+        providers: List of unhealthy providers in case of readiness failure.
+    """
+
+    ready: bool = Field(
+        ...,
+        description="Flag indicating if service is ready",
+        examples=[True, False],
+    )
+
+    reason: str = Field(
+        ...,
+        description="The reason for the readiness",
+        examples=["Service is ready"],
+    )
+
+    providers: list[ProviderHealthStatus] = Field(
+        ...,
+        description="List of unhealthy providers in case of readiness failure.",
+        examples=[],
+    )
+
+    # provides examples for /docs endpoint
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "ready": True,
+                    "reason": "Service is ready",
+                    "providers": [],
+                }
+            ]
+        }
+    }
+
+
+class LivenessResponse(AbstractSuccessfulResponse):
+    """Model representing a response to a liveness request.
+
+    Attributes:
+        alive: If app is alive.
+    """
+
+    alive: bool = Field(
+        ...,
+        description="Flag indicating that the app is alive",
+        examples=[True, False],
+    )
+
+    # provides examples for /docs endpoint
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "alive": True,
+                }
+            ]
+        }
+    }
+
+
+class StatusResponse(AbstractSuccessfulResponse):
+    """Model representing a response to a status request.
+
+    Attributes:
+        functionality: The functionality of the service.
+        status: The status of the service.
+    """
+
+    functionality: str = Field(
+        ...,
+        description="The functionality of the service",
+        examples=["feedback"],
+    )
+
+    status: dict = Field(
+        ...,
+        description="The status of the service",
+        examples=[{"enabled": True}],
+    )
+
+    # provides examples for /docs endpoint
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "functionality": "feedback",
+                    "status": {"enabled": True},
+                }
+            ]
+        }
+    }
+
+
+class AuthorizedResponse(AbstractSuccessfulResponse):
+    """Model representing a response to an authorization request.
+
+    Attributes:
+        user_id: The ID of the logged in user.
+        username: The name of the logged in user.
+        skip_userid_check: Whether to skip the user ID check.
+    """
+
+    user_id: str = Field(
+        ...,
+        description="User ID, for example UUID",
+        examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"],
+    )
+    username: str = Field(
+        ...,
+        description="User name",
+        examples=["John Doe", "Adam Smith"],
+    )
+    skip_userid_check: bool = Field(
+        ...,
+        description="Whether to skip the user ID check",
+        examples=[True, False],
+    )
+
+    # provides examples for /docs endpoint
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "user_id": "123e4567-e89b-12d3-a456-426614174000",
+                    "username": "user1",
+                    "skip_userid_check": False,
+                }
+            ]
+        }
+    }
diff --git a/src/models/api/responses/successful/prompts.py b/src/models/api/responses/successful/prompts.py
new file mode 100644
index 000000000..b06fd8977
--- /dev/null
+++ b/src/models/api/responses/successful/prompts.py
@@ -0,0 +1,119 @@
+"""Successful responses for stored prompt templates."""
+
+from typing import ClassVar, Optional
+
+from pydantic import Field
+
+from models.api.responses.successful.bases import (
+    AbstractDeleteResponse,
+    AbstractSuccessfulResponse,
+)
+
+
+class PromptResourceResponse(AbstractSuccessfulResponse):
+    """A stored prompt template as returned by Llama Stack.
+
+    Attributes:
+        prompt_id: Prompt identifier from Llama Stack.
+        version: Version number for this prompt.
+        is_default: Whether this version is the default.
+        prompt: Prompt text with placeholders.
+        variables: Variable names used in the template.
+    """
+
+    prompt_id: str = Field(..., description="Prompt identifier from Llama Stack")
+    version: int = Field(..., description="Version number for this prompt")
+    is_default: Optional[bool] = Field(
+        None, description="Whether this version is the default"
+    )
+    prompt: Optional[str] = Field(None, description="Prompt text with placeholders")
+    variables: Optional[list[str]] = Field(
+        None, description="Variable names used in the template"
+    )
+
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567",
+                    "version": 1,
+                    "is_default": True,
+                    "prompt": "Summarize: {{text}}",
+                    "variables": ["text"],
+                }
+            ]
+        },
+    }
+
+
+class PromptsListResponse(AbstractSuccessfulResponse):
+    """List of stored prompt templates returned by Llama Stack.
+
+    Attributes:
+        data: Prompt entries as returned by the Llama Stack list API.
+    """
+
+    data: list[PromptResourceResponse] = Field(
+        default_factory=list,
+        description="Prompt entries (as returned by Llama Stack list)",
+    )
+
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "data": [
+                        {
+                            "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567",
+                            "version": 1,
+                            "is_default": True,
+                            "prompt": "Summarize: {{text}}",
+                            "variables": ["text"],
+                        }
+                    ],
+                }
+            ]
+        },
+    }
+
+
+class PromptDeleteResponse(AbstractDeleteResponse):
+    """Result of deleting a stored prompt (always HTTP 200, like conversations v2).
+
+    Attributes:
+        prompt_id: Prompt identifier that was passed to delete.
+        deleted: Whether the prompt was deleted successfully
+        response: Human readable response
+    """
+
+    resource_name: ClassVar[str] = "Prompt"
+    prompt_id: str = Field(
+        ...,
+        description="Prompt identifier that was passed to delete.",
+        examples=["pmpt_0123456789abcdef0123456789abcdef01234567"],
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "label": "deleted",
+                    "value": {
+                        "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567",
+                        "deleted": True,
+                        "response": "Prompt deleted successfully",
+                    },
+                },
+                {
+                    "label": "not found",
+                    "value": {
+                        "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567",
+                        "deleted": False,
+                        "response": "Prompt not found",
+                    },
+                },
+            ]
+        }
+    }
diff --git a/src/models/api/responses/successful/query.py b/src/models/api/responses/successful/query.py
new file mode 100644
index 000000000..c59bac766
--- /dev/null
+++ b/src/models/api/responses/successful/query.py
@@ -0,0 +1,243 @@
+"""Successful response models for synchronous query and streaming query documentation."""
+
+from typing import Any, Optional
+
+from pydantic import Field
+from pydantic_core import SchemaError
+
+from constants import MEDIA_TYPE_EVENT_STREAM
+from models.api.responses.constants import SUCCESSFUL_RESPONSE_DESCRIPTION
+from models.api.responses.successful.bases import AbstractSuccessfulResponse
+from models.common.turn_summary import (
+    RAGChunk,
+    ReferencedDocument,
+    ToolCallSummary,
+    ToolResultSummary,
+)
+
+
+class QueryResponse(AbstractSuccessfulResponse):
+    """Model representing LLM response to a query.
+
+    Attributes:
+        conversation_id: The optional conversation ID (UUID).
+        response: The response.
+        rag_chunks: Deprecated. List of RAG chunks used to generate the response.
+            This information is now available in tool_results under file_search_call type.
+        referenced_documents: The URLs and titles for the documents used to generate the response.
+        tool_calls: List of tool calls made during response generation.
+        tool_results: List of tool results.
+        truncated: Whether conversation history was truncated.
+        input_tokens: Number of tokens sent to LLM.
+        output_tokens: Number of tokens received from LLM.
+        available_quotas: Quota available as measured by all configured quota limiters.
+    """
+
+    conversation_id: Optional[str] = Field(
+        None,
+        description="The optional conversation ID (UUID)",
+        examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"],
+    )
+
+    response: str = Field(
+        description="Response from LLM",
+        examples=[
+            "Kubernetes is an open-source container orchestration system for automating ..."
+        ],
+    )
+
+    rag_chunks: list[RAGChunk] = Field(
+        default_factory=list,
+        description="Deprecated: List of RAG chunks used to generate the response.",
+    )
+
+    referenced_documents: list[ReferencedDocument] = Field(
+        default_factory=list,
+        description="List of documents referenced in generating the response",
+        examples=[
+            [
+                {
+                    "doc_url": "https://docs.openshift.com/"
+                    "container-platform/4.15/operators/olm/index.html",
+                    "doc_title": "Operator Lifecycle Manager (OLM)",
+                }
+            ]
+        ],
+    )
+
+    truncated: bool = Field(
+        False,
+        description="Deprecated: whether conversation history was truncated",
+        examples=[False, True],
+    )
+
+    input_tokens: int = Field(
+        0,
+        description="Number of tokens sent to LLM",
+        examples=[150, 250, 500],
+    )
+
+    output_tokens: int = Field(
+        0,
+        description="Number of tokens received from LLM",
+        examples=[50, 100, 200],
+    )
+
+    available_quotas: dict[str, int] = Field(
+        default_factory=dict,
+        description="Quota available as measured by all configured quota limiters",
+        examples=[{"daily": 1000, "monthly": 50000}],
+    )
+
+    tool_calls: list[ToolCallSummary] = Field(
+        default_factory=list,
+        description="List of tool calls made during response generation",
+    )
+
+    tool_results: list[ToolResultSummary] = Field(
+        default_factory=list,
+        description="List of tool results",
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                    "response": "Operator Lifecycle Manager (OLM) helps users install...",
+                    "referenced_documents": [
+                        {
+                            "doc_url": "https://docs.openshift.com/container-platform/4.15/"
+                            "operators/understanding/olm/olm-understanding-olm.html",
+                            "doc_title": "Operator Lifecycle Manager concepts and resources",
+                        },
+                    ],
+                    "truncated": False,
+                    "input_tokens": 123,
+                    "output_tokens": 456,
+                    "available_quotas": {
+                        "UserQuotaLimiter": 998911,
+                        "ClusterQuotaLimiter": 998911,
+                    },
+                    "tool_calls": [
+                        {"name": "tool1", "args": {}, "id": "1", "type": "tool_call"}
+                    ],
+                    "tool_results": [
+                        {
+                            "id": "1",
+                            "status": "success",
+                            "content": "bla",
+                            "type": "tool_result",
+                            "round": 1,
+                        }
+                    ],
+                }
+            ]
+        }
+    }
+
+
+class StreamingQueryResponse(AbstractSuccessfulResponse):
+    """Documentation-only model for streaming query responses using Server-Sent Events (SSE)."""
+
+    @classmethod
+    def openapi_response(cls) -> dict[str, Any]:
+        """Generate FastAPI response dict for SSE streaming with examples.
+
+        Note: This is used for OpenAPI documentation only. The actual endpoint
+        returns a StreamingResponse object, not this Pydantic model.
+        """
+        schema = cls.model_json_schema()
+        model_examples = schema.get("examples")
+        if not model_examples:
+            raise SchemaError(f"Examples not found in {cls.__name__}")
+        example_value = model_examples[0]
+        content = {
+            MEDIA_TYPE_EVENT_STREAM: {
+                "schema": {"type": "string"},
+                "example": example_value,
+            }
+        }
+
+        return {
+            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
+            "content": content,
+            # Note: No "model" key since we're not actually serializing this model
+        }
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                (
+                    'data: {"event": "start", "data": {'
+                    '"conversation_id": "123e4567-e89b-12d3-a456-426614174000", '
+                    '"request_id": "123e4567-e89b-12d3-a456-426614174001"}}\n\n'
+                    'data: {"event": "token", "data": {'
+                    '"id": 0, "token": "No Violation"}}\n\n'
+                    'data: {"event": "token", "data": {'
+                    '"id": 1, "token": ""}}\n\n'
+                    'data: {"event": "token", "data": {'
+                    '"id": 2, "token": "Hello"}}\n\n'
+                    'data: {"event": "token", "data": {'
+                    '"id": 3, "token": "!"}}\n\n'
+                    'data: {"event": "token", "data": {'
+                    '"id": 4, "token": " How"}}\n\n'
+                    'data: {"event": "token", "data": {'
+                    '"id": 5, "token": " can"}}\n\n'
+                    'data: {"event": "token", "data": {'
+                    '"id": 6, "token": " I"}}\n\n'
+                    'data: {"event": "token", "data": {'
+                    '"id": 7, "token": " assist"}}\n\n'
+                    'data: {"event": "token", "data": {'
+                    '"id": 8, "token": " you"}}\n\n'
+                    'data: {"event": "token", "data": {'
+                    '"id": 9, "token": " today"}}\n\n'
+                    'data: {"event": "token", "data": {'
+                    '"id": 10, "token": "?"}}\n\n'
+                    'data: {"event": "turn_complete", "data": {'
+                    '"token": "Hello! How can I assist you today?"}}\n\n'
+                    'data: {"event": "end", "data": {'
+                    '"referenced_documents": [], '
+                    '"truncated": null, "input_tokens": 11, "output_tokens": 19}, '
+                    '"available_quotas": {}}\n\n'
+                ),
+            ]
+        }
+    }
+
+
+class StreamingInterruptResponse(AbstractSuccessfulResponse):
+    """Model representing a response to a streaming interrupt request.
+
+    Attributes:
+        request_id: The streaming request ID targeted by the interrupt call.
+        interrupted: Whether an in-progress stream was interrupted.
+        message: Human-readable interruption status message.
+    """
+
+    request_id: str = Field(
+        description="The streaming request ID targeted by the interrupt call",
+        examples=["123e4567-e89b-12d3-a456-426614174000"],
+    )
+
+    interrupted: bool = Field(
+        description="Whether an in-progress stream was interrupted",
+        examples=[True],
+    )
+
+    message: str = Field(
+        description="Human-readable interruption status message",
+        examples=["Streaming request interrupted"],
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "request_id": "123e4567-e89b-12d3-a456-426614174000",
+                    "interrupted": True,
+                    "message": "Streaming request interrupted",
+                }
+            ]
+        }
+    }
diff --git a/src/models/api/responses/successful/responses_openai.py b/src/models/api/responses/successful/responses_openai.py
new file mode 100644
index 000000000..30ed13fb0
--- /dev/null
+++ b/src/models/api/responses/successful/responses_openai.py
@@ -0,0 +1,210 @@
+"""Successful response model for the OpenAI-compatible Responses API."""
+
+from typing import Any, Literal, Optional, cast
+
+from llama_stack_api.openai_responses import (
+    OpenAIResponseError as Error,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseInputToolChoice as ToolChoice,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseOutput as Output,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponsePrompt as Prompt,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseReasoning as Reasoning,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseText as Text,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseTool as OutputTool,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseUsage as Usage,
+)
+
+from models.api.responses.constants import SUCCESSFUL_RESPONSE_DESCRIPTION
+from models.api.responses.successful.bases import AbstractSuccessfulResponse
+
+
+class ResponsesResponse(AbstractSuccessfulResponse):
+    """Model representing a response from the Responses API following LCORE specification.
+
+    Attributes:
+        created_at: Unix timestamp when the response was created.
+        completed_at: Unix timestamp when the response was completed, if applicable.
+        error: Error details if the response failed or was blocked.
+        id: Unique identifier for this response.
+        model: Model identifier in "provider/model" format used for generation.
+        object: Object type identifier, always "response".
+        output: List of structured output items containing messages, tool calls, and
+            other content. This is the primary response content.
+        parallel_tool_calls: Whether the model can make multiple tool calls in parallel.
+        previous_response_id: Identifier of the previous response in a multi-turn
+            conversation.
+        prompt: The input prompt object that was sent to the model.
+        status: Current status of the response (e.g., "completed", "blocked",
+            "in_progress").
+        temperature: Temperature parameter used for generation (controls randomness).
+        text: Text response configuration object used for OpenAI responses.
+        top_p: Top-p sampling parameter used for generation.
+        tools: List of tools available to the model during generation.
+        tool_choice: Tool selection strategy used (e.g., "auto", "required", "none").
+        truncation: Strategy used for handling content that exceeds context limits.
+        usage: Token usage statistics including input_tokens, output_tokens, and
+            total_tokens.
+        instructions: System instructions or guidelines provided to the model.
+        max_tool_calls: Maximum number of tool calls allowed in a single response.
+        reasoning: Reasoning configuration (effort level) used for the response.
+        max_output_tokens: Upper bound for tokens generated in the response.
+        safety_identifier: Safety/guardrail identifier applied to the request.
+        metadata: Additional metadata dictionary with custom key-value pairs.
+        store: Whether the response was stored.
+        conversation: Conversation ID linking this response to a conversation thread
+            (LCORE-specific).
+        available_quotas: Remaining token quotas for the user (LCORE-specific).
+        output_text: Aggregated text output from all output_text items in the
+            output array.
+    """
+
+    created_at: int
+    completed_at: Optional[int] = None
+    error: Optional[Error] = None
+    id: str
+    model: str
+    object: Literal["response"] = "response"
+    output: list[Output]
+    parallel_tool_calls: bool = True
+    previous_response_id: Optional[str] = None
+    prompt: Optional[Prompt] = None
+    status: str
+    temperature: Optional[float] = None
+    text: Optional[Text] = None
+    top_p: Optional[float] = None
+    tools: Optional[list[OutputTool]] = None
+    tool_choice: Optional[ToolChoice] = None
+    truncation: Optional[str] = None
+    usage: Optional[Usage] = None
+    instructions: Optional[str] = None
+    max_tool_calls: Optional[int] = None
+    reasoning: Optional[Reasoning] = None
+    max_output_tokens: Optional[int] = None
+    safety_identifier: Optional[str] = None
+    metadata: Optional[dict[str, str]] = None
+    store: Optional[bool] = None
+    # LCORE-specific attributes
+    conversation: Optional[str] = None
+    available_quotas: dict[str, int]
+    output_text: str
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "created_at": 1704067200,
+                    "completed_at": 1704067250,
+                    "id": "resp_abc123",
+                    "model": "openai/gpt-4-turbo",
+                    "object": "response",
+                    "output": [
+                        {
+                            "type": "message",
+                            "role": "assistant",
+                            "content": [
+                                {
+                                    "type": "output_text",
+                                    "text": (
+                                        "Kubernetes is an open-source container "
+                                        "orchestration system..."
+                                    ),
+                                }
+                            ],
+                        }
+                    ],
+                    "parallel_tool_calls": True,
+                    "status": "completed",
+                    "temperature": 0.7,
+                    "text": {"format": {"type": "text"}},
+                    "usage": {
+                        "input_tokens": 100,
+                        "output_tokens": 50,
+                        "total_tokens": 150,
+                        "input_tokens_details": {"cached_tokens": 0},
+                        "output_tokens_details": {"reasoning_tokens": 0},
+                    },
+                    "instructions": "You are a helpful assistant",
+                    "store": True,
+                    "conversation": "0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e",
+                    "available_quotas": {"daily": 1000, "monthly": 50000},
+                    "output_text": (
+                        "Kubernetes is an open-source container orchestration system..."
+                    ),
+                }
+            ],
+            "sse_example": (
+                "event: response.created\n"
+                'data: {"type":"response.created","sequence_number":0,'
+                '"response":{"id":"resp_abc","object":"response",'
+                '"created_at":1704067200,"status":"in_progress","model":"openai/gpt-4o-mini",'
+                '"output":[],"store":true,"text":{"format":{"type":"text"}},'
+                '"conversation":"0d21ba731f21f798dc9680125d5d6f49",'
+                '"available_quotas":{},"output_text":""}}\n\n'
+                "event: response.output_item.added\n"
+                'data: {"type":"response.output_item.added","sequence_number":1,'
+                '"response_id":"resp_abc","output_index":0,'
+                '"item":{"id":"msg_abc","type":"message","status":"in_progress",'
+                '"role":"assistant","content":[]}}\n\n'
+                "...\n\n"
+                "event: response.completed\n"
+                'data: {"type":"response.completed","sequence_number":30,'
+                '"response":{"id":"resp_abc","object":"response",'
+                '"created_at":1704067200,"status":"completed","model":"openai/gpt-4o-mini",'
+                '"output":[{"id":"msg_abc","type":"message","status":"completed",'
+                '"role":"assistant","content":[{"type":"output_text",'
+                '"text":"Hello! How can I help?","annotations":[]}]}],'
+                '"store":true,"text":{"format":{"type":"text"}},'
+                '"usage":{"input_tokens":10,"output_tokens":6,"total_tokens":16,'
+                '"input_tokens_details":{"cached_tokens":0},'
+                '"output_tokens_details":{"reasoning_tokens":0}},'
+                '"conversation":"0d21ba731f21f798dc9680125d5d6f49",'
+                '"available_quotas":{"daily":1000,"monthly":50000},'
+                '"output_text":"Hello! How can I help?"}}\n\n'
+                "data: [DONE]\n\n"
+            ),
+        }
+    }
+
+    @classmethod
+    def openapi_response(cls) -> dict[str, Any]:
+        """
+        Build OpenAPI response dict with application/json and text/event-stream.
+
+        Uses the single JSON example from the model schema and adds
+        text/event-stream example from json_schema_extra.sse_example.
+        """
+        schema = cls.model_json_schema()
+        model_examples = schema.get("examples", [])
+        json_example = model_examples[0] if model_examples else None
+
+        schema_extra = (
+            cast(dict[str, Any], dict(cls.model_config)).get("json_schema_extra") or {}
+        )
+        sse_example = schema_extra.get("sse_example", "")
+
+        content: dict[str, Any] = {
+            "application/json": {"example": json_example} if json_example else {},
+            "text/event-stream": {
+                "schema": {"type": "string"},
+                "example": sse_example,
+            },
+        }
+
+        return {
+            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
+            "model": cls,
+            "content": content,
+        }
diff --git a/src/models/api/responses/successful/vector_stores.py b/src/models/api/responses/successful/vector_stores.py
new file mode 100644
index 000000000..08c21ae0d
--- /dev/null
+++ b/src/models/api/responses/successful/vector_stores.py
@@ -0,0 +1,303 @@
+"""Successful responses for vector stores and vector store files."""
+
+from typing import Any, ClassVar, Optional
+
+from pydantic import Field
+
+from models.api.responses.successful.bases import (
+    AbstractDeleteResponse,
+    AbstractSuccessfulResponse,
+)
+
+
+class VectorStoreResponse(AbstractSuccessfulResponse):
+    """Response model containing a single vector store.
+
+    Attributes:
+        id: Vector store ID.
+        name: Vector store name.
+        created_at: Unix timestamp when created.
+        last_active_at: Unix timestamp of last activity.
+        expires_at: Optional Unix timestamp when it expires.
+        status: Vector store status.
+        usage_bytes: Storage usage in bytes.
+        metadata: Optional metadata dictionary for storing session information.
+    """
+
+    id: str = Field(..., description="Vector store ID")
+    name: str = Field(..., description="Vector store name")
+    created_at: int = Field(..., description="Unix timestamp when created")
+    last_active_at: Optional[int] = Field(
+        None, description="Unix timestamp of last activity"
+    )
+    expires_at: Optional[int] = Field(
+        None, description="Unix timestamp when it expires"
+    )
+    status: str = Field(..., description="Vector store status")
+    usage_bytes: int = Field(default=0, description="Storage usage in bytes")
+    metadata: Optional[dict[str, Any]] = Field(
+        None,
+        description="Metadata dictionary for storing session information",
+        examples=[
+            {"conversation_id": "conv_123", "document_ids": ["doc_456", "doc_789"]}
+        ],
+    )
+
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "id": "vs_abc123",
+                    "name": "customer_support_docs",
+                    "created_at": 1704067200,
+                    "last_active_at": 1704153600,
+                    "expires_at": None,
+                    "status": "active",
+                    "usage_bytes": 1048576,
+                    "metadata": {
+                        "conversation_id": "conv_123",
+                        "document_ids": ["doc_456", "doc_789"],
+                    },
+                }
+            ]
+        },
+    }
+
+
+class VectorStoresListResponse(AbstractSuccessfulResponse):
+    """Response model containing a list of vector stores.
+
+    Attributes:
+        data: List of vector store objects.
+        object: Object type (always "list").
+    """
+
+    data: list[VectorStoreResponse] = Field(
+        default_factory=list, description="List of vector stores"
+    )
+    object: str = Field(default="list", description="Object type")
+
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "data": [
+                        {
+                            "id": "vs_abc123",
+                            "name": "customer_support_docs",
+                            "created_at": 1704067200,
+                            "last_active_at": 1704153600,
+                            "expires_at": None,
+                            "status": "active",
+                            "usage_bytes": 1048576,
+                            "metadata": {"conversation_id": "conv_123"},
+                        },
+                        {
+                            "id": "vs_def456",
+                            "name": "product_documentation",
+                            "created_at": 1704070800,
+                            "last_active_at": 1704157200,
+                            "expires_at": None,
+                            "status": "active",
+                            "usage_bytes": 2097152,
+                            "metadata": None,
+                        },
+                    ],
+                    "object": "list",
+                }
+            ]
+        },
+    }
+
+
+class VectorStoreDeleteResponse(AbstractDeleteResponse):
+    """Result of deleting a vector store (always HTTP 200)."""
+
+    resource_name: ClassVar[str] = "Vector store"
+    vector_store_id: str = Field(
+        ...,
+        description="Vector store identifier that was passed to delete.",
+        examples=["vs_abc123"],
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "label": "deleted",
+                    "value": {
+                        "vector_store_id": "vs_abc123",
+                        "deleted": True,
+                        "response": "Vector store deleted successfully",
+                    },
+                },
+                {
+                    "label": "not found",
+                    "value": {
+                        "vector_store_id": "vs_abc123",
+                        "deleted": False,
+                        "response": "Vector store not found",
+                    },
+                },
+            ]
+        }
+    }
+
+
+class VectorStoreFileDeleteResponse(AbstractDeleteResponse):
+    """Result of deleting a file from a vector store (always HTTP 200)."""
+
+    resource_name: ClassVar[str] = "Vector store file"
+    file_id: str = Field(
+        ...,
+        description="File identifier that was passed to delete.",
+        examples=["file_abc123"],
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "label": "deleted",
+                    "value": {
+                        "file_id": "file_abc123",
+                        "deleted": True,
+                        "response": "Vector store file deleted successfully",
+                    },
+                },
+                {
+                    "label": "not found",
+                    "value": {
+                        "file_id": "file_abc123",
+                        "deleted": False,
+                        "response": "Vector store file not found",
+                    },
+                },
+            ]
+        }
+    }
+
+
+class FileResponse(AbstractSuccessfulResponse):
+    """Response model containing a file object.
+
+    Attributes:
+        id: File ID.
+        filename: File name.
+        bytes: File size in bytes.
+        created_at: Unix timestamp when created.
+        purpose: File purpose.
+        object: Object type (always "file").
+    """
+
+    id: str = Field(..., description="File ID")
+    filename: str = Field(..., description="File name")
+    bytes: int = Field(..., description="File size in bytes")
+    created_at: int = Field(..., description="Unix timestamp when created")
+    purpose: str = Field(default="assistants", description="File purpose")
+    object: str = Field(default="file", description="Object type")
+
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "id": "file_abc123",
+                    "filename": "documentation.pdf",
+                    "bytes": 524288,
+                    "created_at": 1704067200,
+                    "purpose": "assistants",
+                    "object": "file",
+                }
+            ]
+        },
+    }
+
+
+class VectorStoreFileResponse(AbstractSuccessfulResponse):
+    """Response model containing a vector store file object.
+
+    Attributes:
+        id: Vector store file ID.
+        vector_store_id: ID of the vector store.
+        status: File processing status.
+        attributes: Optional metadata key-value pairs.
+        last_error: Optional error message if processing failed.
+        object: Object type (always "vector_store.file").
+    """
+
+    id: str = Field(..., description="Vector store file ID")
+    vector_store_id: str = Field(..., description="ID of the vector store")
+    status: str = Field(..., description="File processing status")
+    attributes: Optional[dict[str, str | float | bool]] = Field(
+        None,
+        description=(
+            "Set of up to 16 key-value pairs for storing additional information. "
+            "Keys: strings (max 64 chars). Values: strings (max 512 chars), booleans, or numbers."
+        ),
+    )
+    last_error: Optional[str] = Field(
+        None, description="Error message if processing failed"
+    )
+    object: str = Field(default="vector_store.file", description="Object type")
+
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "id": "file_abc123",
+                    "vector_store_id": "vs_abc123",
+                    "status": "completed",
+                    "attributes": {"chunk_size": "512", "indexed": True},
+                    "last_error": None,
+                    "object": "vector_store.file",
+                }
+            ]
+        },
+    }
+
+
+class VectorStoreFilesListResponse(AbstractSuccessfulResponse):
+    """Response model containing a list of vector store files.
+
+    Attributes:
+        data: List of vector store file objects.
+        object: Object type (always "list").
+    """
+
+    data: list[VectorStoreFileResponse] = Field(
+        default_factory=list, description="List of vector store files"
+    )
+    object: str = Field(default="list", description="Object type")
+
+    model_config = {
+        "extra": "forbid",
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "data": [
+                        {
+                            "id": "file_abc123",
+                            "vector_store_id": "vs_abc123",
+                            "status": "completed",
+                            "attributes": {"chunk_size": "512"},
+                            "last_error": None,
+                            "object": "vector_store.file",
+                        },
+                        {
+                            "id": "file_def456",
+                            "vector_store_id": "vs_abc123",
+                            "status": "processing",
+                            "attributes": None,
+                            "last_error": None,
+                            "object": "vector_store.file",
+                        },
+                    ],
+                    "object": "list",
+                }
+            ]
+        },
+    }
diff --git a/src/models/cache_entry.py b/src/models/cache_entry.py
index ce4872ee3..f0768eb4a 100644
--- a/src/models/cache_entry.py
+++ b/src/models/cache_entry.py
@@ -4,7 +4,11 @@
 
 from pydantic import BaseModel
 
-from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary
+from models.common.turn_summary import (
+    ReferencedDocument,
+    ToolCallSummary,
+    ToolResultSummary,
+)
 
 
 class CacheEntry(BaseModel):
diff --git a/src/models/common/__init__.py b/src/models/common/__init__.py
new file mode 100644
index 000000000..017d9614a
--- /dev/null
+++ b/src/models/common/__init__.py
@@ -0,0 +1,49 @@
+"""Shared Pydantic models used across API layers (not response envelopes)."""
+
+from models.common.conversation import (
+    ConversationData,
+    ConversationDetails,
+    ConversationTurn,
+    Message,
+)
+from models.common.health import ProviderHealthStatus
+from models.common.mcp import MCPServerAuthInfo, MCPServerInfo
+from models.common.moderation import (
+    ShieldModerationBlocked,
+    ShieldModerationPassed,
+    ShieldModerationResult,
+)
+from models.common.responses.responses_conversation_context import (
+    ResponsesConversationContext,
+)
+from models.common.transcripts import Transcript, TranscriptMetadata
+from models.common.turn_summary import (
+    RAGChunk,
+    RAGContext,
+    ReferencedDocument,
+    ToolCallSummary,
+    ToolResultSummary,
+    TurnSummary,
+)
+
+__all__ = [
+    "ConversationData",
+    "ConversationDetails",
+    "ConversationTurn",
+    "MCPServerAuthInfo",
+    "MCPServerInfo",
+    "Message",
+    "ProviderHealthStatus",
+    "RAGChunk",
+    "RAGContext",
+    "ReferencedDocument",
+    "ResponsesConversationContext",
+    "ShieldModerationBlocked",
+    "ShieldModerationPassed",
+    "ShieldModerationResult",
+    "ToolCallSummary",
+    "ToolResultSummary",
+    "Transcript",
+    "TranscriptMetadata",
+    "TurnSummary",
+]
diff --git a/src/models/common/conversation.py b/src/models/common/conversation.py
new file mode 100644
index 000000000..e02884a23
--- /dev/null
+++ b/src/models/common/conversation.py
@@ -0,0 +1,166 @@
+"""Conversation list rows, metadata, and simplified turn/message shapes for APIs."""
+
+from typing import Literal, Optional
+
+from pydantic import BaseModel, Field
+
+from models.common.turn_summary import (
+    ReferencedDocument,
+    ToolCallSummary,
+    ToolResultSummary,
+)
+
+
+class ConversationData(BaseModel):
+    """Model representing conversation data returned by cache list operations.
+
+    Attributes:
+        conversation_id: The conversation ID
+        topic_summary: The topic summary for the conversation (can be None)
+        last_message_timestamp: The timestamp of the last message in the conversation
+    """
+
+    conversation_id: str
+    topic_summary: Optional[str]
+    last_message_timestamp: float
+
+
+class ConversationDetails(BaseModel):
+    """Model representing the details of a user conversation.
+
+    Attributes:
+        conversation_id: The conversation ID (UUID).
+        created_at: When the conversation was created.
+        last_message_at: When the last message was sent.
+        message_count: Number of user messages in the conversation.
+        last_used_model: The last model used for the conversation.
+        last_used_provider: The provider of the last used model.
+        topic_summary: The topic summary for the conversation.
+
+    Example:
+        ```python
+        conversation = ConversationDetails(
+            conversation_id="123e4567-e89b-12d3-a456-426614174000",
+            created_at="2024-01-01T00:00:00Z",
+            last_message_at="2024-01-01T00:05:00Z",
+            message_count=5,
+            last_used_model="gemini/gemini-2.0-flash",
+            last_used_provider="gemini",
+            topic_summary="Openshift Microservices Deployment Strategies",
+        )
+        ```
+    """
+
+    conversation_id: str = Field(
+        ...,
+        description="Conversation ID (UUID)",
+        examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"],
+    )
+
+    created_at: Optional[str] = Field(
+        None,
+        description="When the conversation was created",
+        examples=["2024-01-01T01:00:00Z"],
+    )
+
+    last_message_at: Optional[str] = Field(
+        None,
+        description="When the last message was sent",
+        examples=["2024-01-01T01:00:00Z"],
+    )
+
+    message_count: Optional[int] = Field(
+        None,
+        description="Number of user messages in the conversation",
+        examples=[42],
+    )
+
+    last_used_model: Optional[str] = Field(
+        None,
+        description="Identification of the last model used for the conversation",
+        examples=["gpt-4-turbo", "gpt-3.5-turbo-0125"],
+    )
+
+    last_used_provider: Optional[str] = Field(
+        None,
+        description="Identification of the last provider used for the conversation",
+        examples=["openai", "gemini"],
+    )
+
+    topic_summary: Optional[str] = Field(
+        None,
+        description="Topic summary for the conversation",
+        examples=["Openshift Microservices Deployment Strategies"],
+    )
+
+
+class Message(BaseModel):
+    """Model representing a message in a conversation turn.
+
+    Attributes:
+        content: The message content.
+        type: The type of message.
+        referenced_documents: Optional list of documents referenced in an assistant response.
+    """
+
+    content: str = Field(
+        ...,
+        description="The message content",
+        examples=["Hello, how can I help you?"],
+    )
+    type: Literal["user", "assistant", "system", "developer"] = Field(
+        ...,
+        description="The type of message",
+        examples=["user", "assistant", "system", "developer"],
+    )
+    referenced_documents: Optional[list[ReferencedDocument]] = Field(
+        None,
+        description="List of documents referenced in the response (assistant messages only)",
+    )
+
+
+class ConversationTurn(BaseModel):
+    """Model representing a single conversation turn.
+
+    Attributes:
+        messages: List of messages in this turn.
+        tool_calls: List of tool calls made in this turn.
+        tool_results: List of tool results from this turn.
+        provider: Provider identifier used for this turn.
+        model: Model identifier used for this turn.
+        started_at: ISO 8601 timestamp when the turn started.
+        completed_at: ISO 8601 timestamp when the turn completed.
+    """
+
+    messages: list[Message] = Field(
+        default_factory=list,
+        description="List of messages in this turn",
+    )
+    tool_calls: list[ToolCallSummary] = Field(
+        default_factory=list,
+        description="List of tool calls made in this turn",
+    )
+    tool_results: list[ToolResultSummary] = Field(
+        default_factory=list,
+        description="List of tool results from this turn",
+    )
+    provider: str = Field(
+        ...,
+        description="Provider identifier used for this turn",
+        examples=["openai"],
+    )
+    model: str = Field(
+        ...,
+        description="Model identifier used for this turn",
+        examples=["gpt-4o-mini"],
+    )
+    started_at: str = Field(
+        ...,
+        description="ISO 8601 timestamp when the turn started",
+        examples=["2024-01-01T00:01:00Z"],
+    )
+    completed_at: str = Field(
+        ...,
+        description="ISO 8601 timestamp when the turn completed",
+        examples=["2024-01-01T00:01:05Z"],
+    )
diff --git a/src/models/common/health.py b/src/models/common/health.py
new file mode 100644
index 000000000..6ccb87f75
--- /dev/null
+++ b/src/models/common/health.py
@@ -0,0 +1,28 @@
+"""Health-related shared models for readiness and diagnostics."""
+
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+class ProviderHealthStatus(BaseModel):
+    """Model representing the health status of a provider.
+
+    Attributes:
+        provider_id: The ID of the provider.
+        status: The health status ('ok', 'unhealthy', 'not_implemented').
+        message: Optional message about the health status.
+    """
+
+    provider_id: str = Field(
+        description="The ID of the provider",
+    )
+    status: str = Field(
+        description="The health status",
+        examples=["ok", "unhealthy", "not_implemented"],
+    )
+    message: Optional[str] = Field(
+        None,
+        description="Optional message about the health status",
+        examples=["All systems operational", "Llama Stack is unavailable"],
+    )
diff --git a/src/models/common/mcp.py b/src/models/common/mcp.py
new file mode 100644
index 000000000..6f671ddc0
--- /dev/null
+++ b/src/models/common/mcp.py
@@ -0,0 +1,33 @@
+"""MCP server metadata models shared by registration and list responses."""
+
+from pydantic import BaseModel, Field
+
+
+class MCPServerAuthInfo(BaseModel):
+    """Information about MCP server client authentication options."""
+
+    name: str = Field(..., description="MCP server name")
+    client_auth_headers: list[str] = Field(
+        ...,
+        description="List of authentication header names for client-provided tokens",
+    )
+
+
+class MCPServerInfo(BaseModel):
+    """Information about a registered MCP server.
+
+    Attributes:
+        name: Unique name of the MCP server.
+        url: URL of the MCP server endpoint.
+        provider_id: MCP provider identification.
+        source: Whether the server was registered statically (config) or dynamically (api).
+    """
+
+    name: str = Field(..., description="MCP server name")
+    url: str = Field(..., description="MCP server URL")
+    provider_id: str = Field(..., description="MCP provider identification")
+    source: str = Field(
+        ...,
+        description="How the server was registered: 'config' (static) or 'api' (dynamic)",
+        examples=["config", "api"],
+    )
diff --git a/src/models/common/moderation.py b/src/models/common/moderation.py
new file mode 100644
index 000000000..1e4f16368
--- /dev/null
+++ b/src/models/common/moderation.py
@@ -0,0 +1,29 @@
+"""Shield moderation outcomes for the responses pipeline."""
+
+from typing import Annotated, Literal
+
+from llama_stack_api.openai_responses import (
+    OpenAIResponseMessage as ResponseMessage,
+)
+from pydantic import BaseModel, Field
+
+
+class ShieldModerationPassed(BaseModel):
+    """Shield moderation passed; no refusal."""
+
+    decision: Literal["passed"] = "passed"
+
+
+class ShieldModerationBlocked(BaseModel):
+    """Shield moderation blocked the content; refusal details are present."""
+
+    decision: Literal["blocked"] = "blocked"
+    message: str
+    moderation_id: str
+    refusal_response: ResponseMessage
+
+
+ShieldModerationResult = Annotated[
+    ShieldModerationPassed | ShieldModerationBlocked,
+    Field(discriminator="decision"),
+]
diff --git a/src/models/common/responses/__init__.py b/src/models/common/responses/__init__.py
new file mode 100644
index 000000000..6cdf7e5c7
--- /dev/null
+++ b/src/models/common/responses/__init__.py
@@ -0,0 +1,21 @@
+"""Shared models for the OpenAI-compatible Responses API pipeline."""
+
+from models.common.responses.responses_api_params import ResponsesApiParams
+from models.common.responses.responses_context import ResponsesContext
+from models.common.responses.responses_conversation_context import (
+    ResponsesConversationContext,
+)
+from models.common.responses.types import (
+    IncludeParameter,
+    ResponseInput,
+    ResponseItem,
+)
+
+__all__ = [
+    "ResponseInput",
+    "ResponseItem",
+    "IncludeParameter",
+    "ResponsesApiParams",
+    "ResponsesContext",
+    "ResponsesConversationContext",
+]
diff --git a/src/models/common/responses/responses_api_params.py b/src/models/common/responses/responses_api_params.py
index 6767c392e..acb219c89 100644
--- a/src/models/common/responses/responses_api_params.py
+++ b/src/models/common/responses/responses_api_params.py
@@ -23,8 +23,8 @@
 )
 from pydantic import BaseModel, Field
 
+from models.common.responses.types import IncludeParameter, ResponseInput
 from utils.tool_formatter import translate_vector_store_ids_to_user_facing
-from utils.types import IncludeParameter, ResponseInput
 
 # Attribute names that are echoed back in the response.
 _ECHOED_FIELDS: Final[set[str]] = set(
@@ -161,7 +161,7 @@ def echoed_params(self, rag_id_mapping: Mapping[str, str]) -> dict[str, Any]:
         data = self.model_dump(include=_ECHOED_FIELDS)
         if self.tools is not None:
             tool_dicts: list[dict[str, Any]] = []
-            for t in self.tools:
+            for t in list(self.tools):
                 if t.type == "mcp":
                     validated = OutputToolMCP.model_validate(t.model_dump())
                     tool_dicts.append(validated.model_dump())
diff --git a/src/models/common/responses/responses_context.py b/src/models/common/responses/responses_context.py
index 3cb6e718f..8e61f0e77 100644
--- a/src/models/common/responses/responses_context.py
+++ b/src/models/common/responses/responses_context.py
@@ -7,7 +7,8 @@
 from llama_stack_client import AsyncLlamaStackClient
 from pydantic import BaseModel, ConfigDict, Field
 
-from utils.types import RAGContext, ShieldModerationResult
+from models.common.moderation import ShieldModerationResult
+from models.common.turn_summary import RAGContext
 
 
 class ResponsesContext(BaseModel):
diff --git a/src/models/common/responses/responses_conversation_context.py b/src/models/common/responses/responses_conversation_context.py
new file mode 100644
index 000000000..05229e9b5
--- /dev/null
+++ b/src/models/common/responses/responses_conversation_context.py
@@ -0,0 +1,33 @@
+"""Conversation resolution result model for the OpenAI-compatible responses endpoint."""
+
+from typing import Optional
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from models.database.conversations import UserConversation
+
+
+class ResponsesConversationContext(BaseModel):
+    """Result of resolving conversation context for the responses endpoint.
+
+    Holds the conversation ID to use for the LLM, the optional user conversation
+    record, and the resolved generate_topic_summary flag. Caller assigns these
+    to the request in outer scope instead of mutating the request inside the
+    resolver.
+
+    Attributes:
+        conversation: Conversation ID in llama-stack format to use for the request.
+        user_conversation: Resolved user conversation record, or None for new ones.
+        generate_topic_summary: Resolved value for request.generate_topic_summary.
+    """
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    conversation: str = Field(description="Conversation ID in llama-stack format")
+    user_conversation: Optional[UserConversation] = Field(
+        default=None,
+        description="Resolved user conversation record, or None for new conversations",
+    )
+    generate_topic_summary: bool = Field(
+        description="Resolved value for request.generate_topic_summary",
+    )
diff --git a/src/models/common/responses/types.py b/src/models/common/responses/types.py
new file mode 100644
index 000000000..992d5a4df
--- /dev/null
+++ b/src/models/common/responses/types.py
@@ -0,0 +1,55 @@
+"""Type aliases for OpenAI-compatible Responses API input shapes."""
+
+from typing import Literal
+
+from llama_stack_api.openai_responses import (
+    OpenAIResponseInputFunctionToolCallOutput as FunctionToolCallOutput,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseMCPApprovalRequest as McpApprovalRequest,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseMCPApprovalResponse as McpApprovalResponse,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseMessage as ResponseMessage,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseOutputMessageFileSearchToolCall as FileSearchToolCall,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseOutputMessageFunctionToolCall as FunctionToolCall,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseOutputMessageMCPCall as McpCall,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseOutputMessageMCPListTools as McpListTools,
+)
+from llama_stack_api.openai_responses import (
+    OpenAIResponseOutputMessageWebSearchToolCall as WebSearchToolCall,
+)
+
+type IncludeParameter = Literal[
+    "web_search_call.action.sources",
+    "code_interpreter_call.outputs",
+    "computer_call_output.output.image_url",
+    "file_search_call.results",
+    "message.input_image.image_url",
+    "message.output_text.logprobs",
+    "reasoning.encrypted_content",
+]
+
+type ResponseItem = (
+    ResponseMessage
+    | WebSearchToolCall
+    | FileSearchToolCall
+    | FunctionToolCallOutput
+    | McpCall
+    | McpListTools
+    | McpApprovalRequest
+    | FunctionToolCall
+    | McpApprovalResponse
+)
+
+type ResponseInput = str | list[ResponseItem]
diff --git a/src/models/common/transcripts.py b/src/models/common/transcripts.py
new file mode 100644
index 000000000..6ff648ba3
--- /dev/null
+++ b/src/models/common/transcripts.py
@@ -0,0 +1,31 @@
+"""Pydantic models for persisted query/response transcript entries."""
+
+from typing import Any, Optional
+
+from pydantic import BaseModel, Field
+
+
+class TranscriptMetadata(BaseModel):
+    """Metadata for a transcript entry."""
+
+    provider: Optional[str] = None
+    model: str
+    query_provider: Optional[str] = None
+    query_model: Optional[str] = None
+    user_id: str
+    conversation_id: str
+    timestamp: str
+
+
+class Transcript(BaseModel):
+    """Model representing a transcript entry to be stored."""
+
+    metadata: TranscriptMetadata
+    redacted_query: str
+    query_is_valid: bool
+    llm_response: str
+    rag_chunks: list[dict[str, Any]] = Field(default_factory=list)
+    truncated: bool
+    attachments: list[dict[str, Any]] = Field(default_factory=list)
+    tool_calls: list[dict[str, Any]] = Field(default_factory=list)
+    tool_results: list[dict[str, Any]] = Field(default_factory=list)
diff --git a/src/models/common/turn_summary.py b/src/models/common/turn_summary.py
new file mode 100644
index 000000000..920a17c71
--- /dev/null
+++ b/src/models/common/turn_summary.py
@@ -0,0 +1,104 @@
+"""RAG context, chunks, document refs, tool summaries, and per-turn aggregation.
+
+Used on query and streaming paths.
+"""
+
+from typing import Any, Optional
+
+from pydantic import AnyUrl, BaseModel, Field
+
+from utils.token_counter import TokenCounter
+
+
+class RAGChunk(BaseModel):
+    """Model representing a RAG chunk used in the response."""
+
+    content: str = Field(description="The content of the chunk")
+    source: Optional[str] = Field(
+        default=None,
+        description="Index name identifying the knowledge source from configuration",
+    )
+    score: Optional[float] = Field(default=None, description="Relevance score")
+    attributes: Optional[dict[str, Any]] = Field(
+        default=None,
+        description="Document metadata from the RAG provider (e.g., url, title, author)",
+    )
+
+
+class ReferencedDocument(BaseModel):
+    """Model representing a document referenced in generating a response.
+
+    Attributes:
+        doc_url: Url to the referenced doc.
+        doc_title: Title of the referenced doc.
+    """
+
+    doc_url: Optional[AnyUrl] = Field(
+        default=None, description="URL of the referenced document"
+    )
+
+    doc_title: Optional[str] = Field(
+        default=None, description="Title of the referenced document"
+    )
+
+    source: Optional[str] = Field(
+        default=None,
+        description="Index name identifying the knowledge source from configuration",
+    )
+
+
+class RAGContext(BaseModel):
+    """Result of building RAG context from all enabled pre-query RAG sources.
+
+    Attributes:
+        context_text: Formatted RAG context string for injection into the query.
+        rag_chunks: RAG chunks from pre-query sources (BYOK + Solr).
+        referenced_documents: Referenced documents from pre-query sources.
+    """
+
+    context_text: str = Field(default="", description="Formatted context for injection")
+    rag_chunks: list[RAGChunk] = Field(
+        default_factory=list,
+        description="RAG chunks from pre-query sources",
+    )
+    referenced_documents: list[ReferencedDocument] = Field(
+        default_factory=list,
+        description="Documents from pre-query sources",
+    )
+
+
+class ToolCallSummary(BaseModel):
+    """Model representing a tool call made during response generation (for tool_calls list)."""
+
+    id: str = Field(description="ID of the tool call")
+    name: str = Field(description="Name of the tool called")
+    args: dict[str, Any] = Field(
+        default_factory=dict, description="Arguments passed to the tool"
+    )
+    type: str = Field("tool_call", description="Type indicator for tool call")
+
+
+class ToolResultSummary(BaseModel):
+    """Model representing a result from a tool call (for tool_results list)."""
+
+    id: str = Field(
+        description="ID of the tool call/result, matches the corresponding tool call 'id'"
+    )
+    status: str = Field(
+        ..., description="Status of the tool execution (e.g., 'success')"
+    )
+    content: str = Field(..., description="Content/result returned from the tool")
+    type: str = Field("tool_result", description="Type indicator for tool result")
+    round: int = Field(..., description="Round number or step of tool execution")
+
+
+class TurnSummary(BaseModel):
+    """Summary of a turn in llama stack."""
+
+    id: str = Field(default="", description="ID of the response")
+    llm_response: str = ""
+    tool_calls: list[ToolCallSummary] = Field(default_factory=list)
+    tool_results: list[ToolResultSummary] = Field(default_factory=list)
+    rag_chunks: list[RAGChunk] = Field(default_factory=list)
+    referenced_documents: list[ReferencedDocument] = Field(default_factory=list)
+    token_usage: TokenCounter = Field(default_factory=TokenCounter)
diff --git a/src/models/context.py b/src/models/context.py
index 8df6f3692..c861b883a 100644
--- a/src/models/context.py
+++ b/src/models/context.py
@@ -4,8 +4,9 @@
 
 from llama_stack_client import AsyncLlamaStackClient
 
+from models.common.moderation import ShieldModerationResult
+from models.common.turn_summary import RAGContext
 from models.requests import QueryRequest
-from utils.types import RAGContext, ShieldModerationResult
 
 
 @dataclass
diff --git a/src/models/requests.py b/src/models/requests.py
index 69b084a46..524d02346 100644
--- a/src/models/requests.py
+++ b/src/models/requests.py
@@ -33,8 +33,8 @@
     SOLR_VECTOR_SEARCH_DEFAULT_MODE,
 )
 from log import get_logger
+from models.common.responses.types import IncludeParameter, ResponseInput
 from utils import suid
-from utils.types import IncludeParameter, ResponseInput
 
 logger = get_logger(__name__)
 
diff --git a/src/models/responses.py b/src/models/responses.py
deleted file mode 100644
index b89827645..000000000
--- a/src/models/responses.py
+++ /dev/null
@@ -1,2048 +0,0 @@
-# pylint: disable=too-many-lines
-
-"""Models for REST API responses."""
-
-from typing import Any, ClassVar, Literal, Optional, cast
-
-from llama_stack_api.openai_responses import (
-    OpenAIResponseError as Error,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseInputToolChoice as ToolChoice,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseOutput as Output,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponsePrompt as Prompt,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseReasoning as Reasoning,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseText as Text,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseTool as OutputTool,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseUsage as Usage,
-)
-from pydantic import BaseModel, Field, computed_field
-from pydantic_core import SchemaError
-
-from constants import MEDIA_TYPE_EVENT_STREAM
-from log import get_logger
-from models.api.responses.constants import SUCCESSFUL_RESPONSE_DESCRIPTION
-from models.config import Configuration
-from utils.types import RAGChunk, ReferencedDocument, ToolCallSummary, ToolResultSummary
-
-logger = get_logger(__name__)
-
-
-class AbstractSuccessfulResponse(BaseModel):
-    """Base class for all successful response models."""
-
-    @classmethod
-    def openapi_response(cls) -> dict[str, Any]:
-        """Generate FastAPI response dict with a single example from model_config."""
-        schema = cls.model_json_schema()
-        model_examples = schema.get("examples")
-        if not model_examples:
-            raise SchemaError(f"Examples not found in {cls.__name__}")
-        example_value = model_examples[0]
-        content = {"application/json": {"example": example_value}}
-
-        return {
-            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
-            "model": cls,
-            "content": content,
-        }
-
-
-class AbstractDeleteResponse(BaseModel):
-    """Base model for successful delete responses."""
-
-    deleted: bool = Field(
-        ...,
-        description="Whether the deletion was successful.",
-        examples=[True, False],
-    )
-    resource_name: ClassVar[str]
-
-    @computed_field
-    def response(self) -> str:
-        """Human-readable outcome of the delete operation."""
-        return (
-            f"{self.resource_name} deleted successfully"
-            if self.deleted
-            else f"{self.resource_name} not found"
-        )
-
-    @classmethod
-    def openapi_response(cls) -> dict[str, Any]:
-        """Build FastAPI/OpenAPI metadata with named application/json examples.
-
-        Returns:
-            A response dict with description, model, and content keys.
-
-        Raises:
-            SchemaError: If the model JSON schema has no examples list.
-        """
-        schema = cls.model_json_schema()
-        model_examples = schema.get("examples")
-        if not model_examples:
-            raise SchemaError(f"Examples not found in {cls.__name__}")
-
-        examples: dict[str, dict[str, Any]] = {}
-        for index, example in enumerate(model_examples):
-            if "label" not in example:
-                raise SchemaError(
-                    f"Example at index {index} in {cls.__name__} has no label"
-                )
-            if "value" not in example:
-                raise SchemaError(
-                    f"Example at index {index} in {cls.__name__} has no value"
-                )
-            examples[example["label"]] = {"value": example["value"]}
-
-        return {
-            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
-            "model": cls,
-            "content": {"application/json": {"examples": examples}},
-        }
-
-
-class ModelsResponse(AbstractSuccessfulResponse):
-    """Model representing a response to models request."""
-
-    models: list[dict[str, Any]] = Field(
-        ...,
-        description="List of models available",
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "models": [
-                        {
-                            "identifier": "openai/gpt-4-turbo",
-                            "metadata": {},
-                            "api_model_type": "llm",
-                            "provider_id": "openai",
-                            "type": "model",
-                            "provider_resource_id": "gpt-4-turbo",
-                            "model_type": "llm",
-                        },
-                    ],
-                }
-            ]
-        }
-    }
-
-
-class ToolsResponse(AbstractSuccessfulResponse):
-    """Model representing a response to tools request."""
-
-    tools: list[dict[str, Any]] = Field(
-        description=(
-            "List of tools available from all configured MCP servers and built-in toolgroups"
-        ),
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "tools": [
-                        {
-                            "identifier": "filesystem_read",
-                            "description": "Read contents of a file from the filesystem",
-                            "parameters": [
-                                {
-                                    "name": "path",
-                                    "description": "Path to the file to read",
-                                    "parameter_type": "string",
-                                    "required": True,
-                                    "default": None,
-                                }
-                            ],
-                            "provider_id": "model-context-protocol",
-                            "toolgroup_id": "filesystem-tools",
-                            "server_source": "http://localhost:3000",
-                            "type": "tool",
-                        }
-                    ],
-                }
-            ]
-        }
-    }
-
-
-class MCPServerAuthInfo(BaseModel):
-    """Information about MCP server client authentication options."""
-
-    name: str = Field(..., description="MCP server name")
-    client_auth_headers: list[str] = Field(
-        ...,
-        description="List of authentication header names for client-provided tokens",
-    )
-
-
-class MCPClientAuthOptionsResponse(AbstractSuccessfulResponse):
-    """Response containing MCP servers that accept client-provided authorization."""
-
-    servers: list[MCPServerAuthInfo] = Field(
-        default_factory=list,
-        description="List of MCP servers that accept client-provided authorization",
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "servers": [
-                        {
-                            "name": "github",
-                            "client_auth_headers": ["Authorization"],
-                        },
-                        {
-                            "name": "gitlab",
-                            "client_auth_headers": ["Authorization", "X-API-Key"],
-                        },
-                    ]
-                }
-            ]
-        }
-    }
-
-
-class MCPServerInfo(BaseModel):
-    """Information about a registered MCP server.
-
-    Attributes:
-        name: Unique name of the MCP server.
-        url: URL of the MCP server endpoint.
-        provider_id: MCP provider identification.
-        source: Whether the server was registered statically (config) or dynamically (api).
-    """
-
-    name: str = Field(..., description="MCP server name")
-    url: str = Field(..., description="MCP server URL")
-    provider_id: str = Field(..., description="MCP provider identification")
-    source: str = Field(
-        ...,
-        description="How the server was registered: 'config' (static) or 'api' (dynamic)",
-        examples=["config", "api"],
-    )
-
-
-class MCPServerRegistrationResponse(AbstractSuccessfulResponse):
-    """Response for a successful MCP server registration."""
-
-    name: str = Field(..., description="Registered MCP server name")
-    url: str = Field(..., description="Registered MCP server URL")
-    provider_id: str = Field(..., description="MCP provider identification")
-    message: str = Field(..., description="Status message")
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "name": "mcp-integration-tools",
-                    "url": "http://host.docker.internal:7008/api/mcp-actions/v1",
-                    "provider_id": "model-context-protocol",
-                    "message": "MCP server 'mcp-integration-tools' registered successfully",
-                }
-            ]
-        }
-    }
-
-
-class MCPServerListResponse(AbstractSuccessfulResponse):
-    """Response listing all registered MCP servers."""
-
-    servers: list[MCPServerInfo] = Field(
-        default_factory=list,
-        description="List of all registered MCP servers (static and dynamic)",
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "servers": [
-                        {
-                            "name": "mcp-integration-tools",
-                            "url": "http://host.docker.internal:7008/api/mcp-actions/v1",
-                            "provider_id": "model-context-protocol",
-                            "source": "config",
-                        },
-                        {
-                            "name": "test-mcp-server",
-                            "url": "http://host.docker.internal:8888/mcp",
-                            "provider_id": "model-context-protocol",
-                            "source": "api",
-                        },
-                    ]
-                }
-            ]
-        }
-    }
-
-
-class MCPServerDeleteResponse(AbstractSuccessfulResponse):
-    """Response for a successful MCP server deletion."""
-
-    name: str = Field(..., description="Deleted MCP server name")
-    message: str = Field(..., description="Status message")
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "name": "test-mcp-server",
-                    "message": "MCP server 'test-mcp-server' unregistered successfully",
-                }
-            ]
-        }
-    }
-
-
-class ShieldsResponse(AbstractSuccessfulResponse):
-    """Model representing a response to shields request."""
-
-    shields: list[dict[str, Any]] = Field(
-        ...,
-        description="List of shields available",
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "shields": [
-                        {
-                            "identifier": "lightspeed_question_validity-shield",
-                            "provider_resource_id": "lightspeed_question_validity-shield",
-                            "provider_id": "lightspeed_question_validity",
-                            "type": "shield",
-                            "params": {},
-                        }
-                    ],
-                }
-            ]
-        }
-    }
-
-
-class RAGInfoResponse(AbstractSuccessfulResponse):
-    """Model representing a response with information about RAG DB."""
-
-    id: str = Field(
-        ..., description="Vector DB unique ID", examples=["vs_00000000_0000_0000"]
-    )
-    name: Optional[str] = Field(
-        None,
-        description="Human readable vector DB name",
-        examples=["Faiss Store with Knowledge base"],
-    )
-    created_at: int = Field(
-        ...,
-        description="When the vector store was created, represented as Unix time",
-        examples=[1763391371],
-    )
-    last_active_at: Optional[int] = Field(
-        None,
-        description="When the vector store was last active, represented as Unix time",
-        examples=[1763391371],
-    )
-    usage_bytes: int = Field(
-        ...,
-        description="Storage byte(s) used by this vector DB",
-        examples=[0],
-    )
-    expires_at: Optional[int] = Field(
-        None,
-        description="When the vector store expires, represented as Unix time",
-        examples=[1763391371],
-    )
-    object: str = Field(
-        ...,
-        description="Object type",
-        examples=["vector_store"],
-    )
-    status: str = Field(
-        ...,
-        description="Vector DB status",
-        examples=["completed"],
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "id": "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
-                    "name": "Faiss Store with Knowledge base",
-                    "created_at": 1763391371,
-                    "last_active_at": 1763391371,
-                    "usage_bytes": 1024000,
-                    "expires_at": None,
-                    "object": "vector_store",
-                    "status": "completed",
-                }
-            ]
-        }
-    }
-
-
-class RAGListResponse(AbstractSuccessfulResponse):
-    """Model representing a response to list RAGs request."""
-
-    rags: list[str] = Field(
-        ...,
-        title="RAG list response",
-        description="List of RAG identifiers",
-        examples=[
-            "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
-            "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3",
-        ],
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "rags": [
-                        "vs_00000000-cafe-babe-0000-000000000000",
-                        "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
-                        "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3",
-                    ]
-                }
-            ]
-        }
-    }
-
-
-class ProvidersListResponse(AbstractSuccessfulResponse):
-    """Model representing a response to providers request."""
-
-    providers: dict[str, list[dict[str, Any]]] = Field(
-        ...,
-        description="List of available API types and their corresponding providers",
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "providers": {
-                        "inference": [
-                            {
-                                "provider_id": "sentence-transformers",
-                                "provider_type": "inline::sentence-transformers",
-                            },
-                            {
-                                "provider_id": "openai",
-                                "provider_type": "remote::openai",
-                            },
-                        ],
-                        "agents": [
-                            {
-                                "provider_id": "meta-reference",
-                                "provider_type": "inline::meta-reference",
-                            },
-                        ],
-                    },
-                }
-            ]
-        }
-    }
-
-
-class ProviderResponse(AbstractSuccessfulResponse):
-    """Model representing a response to get specific provider request."""
-
-    api: str = Field(
-        ...,
-        description="The API this provider implements",
-    )
-    config: dict[str, Any] = Field(
-        ...,
-        description="Provider configuration parameters",
-    )
-    health: dict[str, Any] = Field(
-        ...,
-        description="Current health status of the provider",
-    )
-    provider_id: str = Field(..., description="Unique provider identifier")
-    provider_type: str = Field(..., description="Provider implementation type")
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "api": "inference",
-                    "config": {"api_key": "********"},
-                    "health": {"status": "OK", "message": "Healthy"},
-                    "provider_id": "openai",
-                    "provider_type": "remote::openai",
-                }
-            ]
-        }
-    }
-
-
-class ConversationData(BaseModel):
-    """Model representing conversation data returned by cache list operations.
-
-    Attributes:
-        conversation_id: The conversation ID
-        topic_summary: The topic summary for the conversation (can be None)
-        last_message_timestamp: The timestamp of the last message in the conversation
-    """
-
-    conversation_id: str
-    topic_summary: Optional[str]
-    last_message_timestamp: float
-
-
-class QueryResponse(AbstractSuccessfulResponse):
-    """Model representing LLM response to a query.
-
-    Attributes:
-        conversation_id: The optional conversation ID (UUID).
-        response: The response.
-        rag_chunks: Deprecated. List of RAG chunks used to generate the response.
-            This information is now available in tool_results under file_search_call type.
-        referenced_documents: The URLs and titles for the documents used to generate the response.
-        tool_calls: List of tool calls made during response generation.
-        tool_results: List of tool results.
-        truncated: Whether conversation history was truncated.
-        input_tokens: Number of tokens sent to LLM.
-        output_tokens: Number of tokens received from LLM.
-        available_quotas: Quota available as measured by all configured quota limiters.
-    """
-
-    conversation_id: Optional[str] = Field(
-        None,
-        description="The optional conversation ID (UUID)",
-        examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"],
-    )
-
-    response: str = Field(
-        description="Response from LLM",
-        examples=[
-            "Kubernetes is an open-source container orchestration system for automating ..."
-        ],
-    )
-
-    rag_chunks: list[RAGChunk] = Field(
-        default_factory=list,
-        description="Deprecated: List of RAG chunks used to generate the response.",
-    )
-
-    referenced_documents: list[ReferencedDocument] = Field(
-        default_factory=list,
-        description="List of documents referenced in generating the response",
-        examples=[
-            [
-                {
-                    "doc_url": "https://docs.openshift.com/"
-                    "container-platform/4.15/operators/olm/index.html",
-                    "doc_title": "Operator Lifecycle Manager (OLM)",
-                }
-            ]
-        ],
-    )
-
-    truncated: bool = Field(
-        False,
-        description="Deprecated:Whether conversation history was truncated",
-        examples=[False, True],
-    )
-
-    input_tokens: int = Field(
-        0,
-        description="Number of tokens sent to LLM",
-        examples=[150, 250, 500],
-    )
-
-    output_tokens: int = Field(
-        0,
-        description="Number of tokens received from LLM",
-        examples=[50, 100, 200],
-    )
-
-    available_quotas: dict[str, int] = Field(
-        default_factory=dict,
-        description="Quota available as measured by all configured quota limiters",
-        examples=[{"daily": 1000, "monthly": 50000}],
-    )
-
-    tool_calls: list[ToolCallSummary] = Field(
-        default_factory=list,
-        description="List of tool calls made during response generation",
-    )
-
-    tool_results: list[ToolResultSummary] = Field(
-        default_factory=list,
-        description="List of tool results",
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                    "response": "Operator Lifecycle Manager (OLM) helps users install...",
-                    "referenced_documents": [
-                        {
-                            "doc_url": "https://docs.openshift.com/container-platform/4.15/"
-                            "operators/understanding/olm/olm-understanding-olm.html",
-                            "doc_title": "Operator Lifecycle Manager concepts and resources",
-                        },
-                    ],
-                    "truncated": False,
-                    "input_tokens": 123,
-                    "output_tokens": 456,
-                    "available_quotas": {
-                        "UserQuotaLimiter": 998911,
-                        "ClusterQuotaLimiter": 998911,
-                    },
-                    "tool_calls": [
-                        {"name": "tool1", "args": {}, "id": "1", "type": "tool_call"}
-                    ],
-                    "tool_results": [
-                        {
-                            "id": "1",
-                            "status": "success",
-                            "content": "bla",
-                            "type": "tool_result",
-                            "round": 1,
-                        }
-                    ],
-                }
-            ]
-        }
-    }
-
-
-class StreamingQueryResponse(AbstractSuccessfulResponse):
-    """Documentation-only model for streaming query responses using Server-Sent Events (SSE)."""
-
-    @classmethod
-    def openapi_response(cls) -> dict[str, Any]:
-        """Generate FastAPI response dict for SSE streaming with examples.
-
-        Note: This is used for OpenAPI documentation only. The actual endpoint
-        returns a StreamingResponse object, not this Pydantic model.
-        """
-        schema = cls.model_json_schema()
-        model_examples = schema.get("examples")
-        if not model_examples:
-            raise SchemaError(f"Examples not found in {cls.__name__}")
-        example_value = model_examples[0]
-        content = {
-            MEDIA_TYPE_EVENT_STREAM: {
-                "schema": {"type": "string", "format": MEDIA_TYPE_EVENT_STREAM},
-                "example": example_value,
-            }
-        }
-
-        return {
-            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
-            "content": content,
-            # Note: No "model" key since we're not actually serializing this model
-        }
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                (
-                    'data: {"event": "start", "data": {'
-                    '"conversation_id": "123e4567-e89b-12d3-a456-426614174000", '
-                    '"request_id": "123e4567-e89b-12d3-a456-426614174001"}}\n\n'
-                    'data: {"event": "token", "data": {'
-                    '"id": 0, "token": "No Violation"}}\n\n'
-                    'data: {"event": "token", "data": {'
-                    '"id": 1, "token": ""}}\n\n'
-                    'data: {"event": "token", "data": {'
-                    '"id": 2, "token": "Hello"}}\n\n'
-                    'data: {"event": "token", "data": {'
-                    '"id": 3, "token": "!"}}\n\n'
-                    'data: {"event": "token", "data": {'
-                    '"id": 4, "token": " How"}}\n\n'
-                    'data: {"event": "token", "data": {'
-                    '"id": 5, "token": " can"}}\n\n'
-                    'data: {"event": "token", "data": {'
-                    '"id": 6, "token": " I"}}\n\n'
-                    'data: {"event": "token", "data": {'
-                    '"id": 7, "token": " assist"}}\n\n'
-                    'data: {"event": "token", "data": {'
-                    '"id": 8, "token": " you"}}\n\n'
-                    'data: {"event": "token", "data": {'
-                    '"id": 9, "token": " today"}}\n\n'
-                    'data: {"event": "token", "data": {'
-                    '"id": 10, "token": "?"}}\n\n'
-                    'data: {"event": "turn_complete", "data": {'
-                    '"token": "Hello! How can I assist you today?"}}\n\n'
-                    'data: {"event": "end", "data": {'
-                    '"referenced_documents": [], '
-                    '"truncated": null, "input_tokens": 11, "output_tokens": 19}, '
-                    '"available_quotas": {}}\n\n'
-                ),
-            ]
-        }
-    }
-
-
-class StreamingInterruptResponse(AbstractSuccessfulResponse):
-    """Model representing a response to a streaming interrupt request.
-
-    Attributes:
-        request_id: The streaming request ID targeted by the interrupt call.
-        interrupted: Whether an in-progress stream was interrupted.
-        message: Human-readable interruption status message.
-
-    Example:
-        ```python
-        response = StreamingInterruptResponse(
-            request_id="123e4567-e89b-12d3-a456-426614174000",
-            interrupted=True,
-            message="Streaming request interrupted",
-        )
-        ```
-    """
-
-    request_id: str = Field(
-        description="The streaming request ID targeted by the interrupt call",
-        examples=["123e4567-e89b-12d3-a456-426614174000"],
-    )
-
-    interrupted: bool = Field(
-        description="Whether an in-progress stream was interrupted",
-        examples=[True],
-    )
-
-    message: str = Field(
-        description="Human-readable interruption status message",
-        examples=["Streaming request interrupted"],
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "request_id": "123e4567-e89b-12d3-a456-426614174000",
-                    "interrupted": True,
-                    "message": "Streaming request interrupted",
-                }
-            ]
-        }
-    }
-
-
-class InfoResponse(AbstractSuccessfulResponse):
-    """Model representing a response to an info request.
-
-    Attributes:
-        name: Service name.
-        service_version: Service version.
-        llama_stack_version: Llama Stack version.
-
-    Example:
-        ```python
-        info_response = InfoResponse(
-            name="Lightspeed Stack",
-            service_version="1.0.0",
-            llama_stack_version="0.2.22",
-        )
-        ```
-    """
-
-    name: str = Field(
-        description="Service name",
-        examples=["Lightspeed Stack"],
-    )
-
-    service_version: str = Field(
-        description="Service version",
-        examples=["0.1.0", "0.2.0", "1.0.0"],
-    )
-
-    llama_stack_version: str = Field(
-        description="Llama Stack version",
-        examples=["0.2.1", "0.2.2", "0.2.18", "0.2.21", "0.2.22"],
-    )
-
-    # provides examples for /docs endpoint
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "name": "Lightspeed Stack",
-                    "service_version": "1.0.0",
-                    "llama_stack_version": "1.0.0",
-                }
-            ]
-        }
-    }
-
-
-class ProviderHealthStatus(BaseModel):
-    """Model representing the health status of a provider.
-
-    Attributes:
-        provider_id: The ID of the provider.
-        status: The health status ('ok', 'unhealthy', 'not_implemented').
-        message: Optional message about the health status.
-    """
-
-    provider_id: str = Field(
-        description="The ID of the provider",
-    )
-    status: str = Field(
-        description="The health status",
-        examples=["ok", "unhealthy", "not_implemented"],
-    )
-    message: Optional[str] = Field(
-        None,
-        description="Optional message about the health status",
-        examples=["All systems operational", "Llama Stack is unavailable"],
-    )
-
-
-class ReadinessResponse(AbstractSuccessfulResponse):
-    """Model representing response to a readiness request.
-
-    Attributes:
-        ready: If service is ready.
-        reason: The reason for the readiness.
-        providers: List of unhealthy providers in case of readiness failure.
-
-    Example:
-        ```python
-        readiness_response = ReadinessResponse(
-            ready=False,
-            reason="Service is not ready",
-            providers=[
-                ProviderHealthStatus(
-                    provider_id="ollama",
-                    status="unhealthy",
-                    message="Server is unavailable"
-                )
-            ]
-        )
-        ```
-    """
-
-    ready: bool = Field(
-        ...,
-        description="Flag indicating if service is ready",
-        examples=[True, False],
-    )
-
-    reason: str = Field(
-        ...,
-        description="The reason for the readiness",
-        examples=["Service is ready"],
-    )
-
-    providers: list[ProviderHealthStatus] = Field(
-        ...,
-        description="List of unhealthy providers in case of readiness failure.",
-        examples=[],
-    )
-
-    # provides examples for /docs endpoint
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "ready": True,
-                    "reason": "Service is ready",
-                    "providers": [],
-                }
-            ]
-        }
-    }
-
-
-class LivenessResponse(AbstractSuccessfulResponse):
-    """Model representing a response to a liveness request.
-
-    Attributes:
-        alive: If app is alive.
-
-    Example:
-        ```python
-        liveness_response = LivenessResponse(alive=True)
-        ```
-    """
-
-    alive: bool = Field(
-        ...,
-        description="Flag indicating that the app is alive",
-        examples=[True, False],
-    )
-
-    # provides examples for /docs endpoint
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "alive": True,
-                }
-            ]
-        }
-    }
-
-
-class FeedbackResponse(AbstractSuccessfulResponse):
-    """Model representing a response to a feedback request.
-
-    Attributes:
-        response: The response of the feedback request.
-
-    Example:
-        ```python
-        feedback_response = FeedbackResponse(response="feedback received")
-        ```
-    """
-
-    response: str = Field(
-        ...,
-        description="The response of the feedback request.",
-        examples=["feedback received"],
-    )
-
-    # provides examples for /docs endpoint
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "response": "feedback received",
-                }
-            ]
-        }
-    }
-
-
-class StatusResponse(AbstractSuccessfulResponse):
-    """Model representing a response to a status request.
-
-    Attributes:
-        functionality: The functionality of the service.
-        status: The status of the service.
-
-    Example:
-        ```python
-        status_response = StatusResponse(
-            functionality="feedback",
-            status={"enabled": True},
-        )
-        ```
-    """
-
-    functionality: str = Field(
-        ...,
-        description="The functionality of the service",
-        examples=["feedback"],
-    )
-
-    status: dict = Field(
-        ...,
-        description="The status of the service",
-        examples=[{"enabled": True}],
-    )
-
-    # provides examples for /docs endpoint
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "functionality": "feedback",
-                    "status": {"enabled": True},
-                }
-            ]
-        }
-    }
-
-
-class AuthorizedResponse(AbstractSuccessfulResponse):
-    """Model representing a response to an authorization request.
-
-    Attributes:
-        user_id: The ID of the logged in user.
-        username: The name of the logged in user.
-        skip_userid_check: Whether to skip the user ID check.
-    """
-
-    user_id: str = Field(
-        ...,
-        description="User ID, for example UUID",
-        examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"],
-    )
-    username: str = Field(
-        ...,
-        description="User name",
-        examples=["John Doe", "Adam Smith"],
-    )
-    skip_userid_check: bool = Field(
-        ...,
-        description="Whether to skip the user ID check",
-        examples=[True, False],
-    )
-
-    # provides examples for /docs endpoint
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "user_id": "123e4567-e89b-12d3-a456-426614174000",
-                    "username": "user1",
-                    "skip_userid_check": False,
-                }
-            ]
-        }
-    }
-
-
-class Message(BaseModel):
-    """Model representing a message in a conversation turn.
-
-    Attributes:
-        content: The message content.
-        type: The type of message.
-        referenced_documents: Optional list of documents referenced in an assistant response.
-    """
-
-    content: str = Field(
-        ...,
-        description="The message content",
-        examples=["Hello, how can I help you?"],
-    )
-    type: Literal["user", "assistant", "system", "developer"] = Field(
-        ...,
-        description="The type of message",
-        examples=["user", "assistant", "system", "developer"],
-    )
-    referenced_documents: Optional[list[ReferencedDocument]] = Field(
-        None,
-        description="List of documents referenced in the response (assistant messages only)",
-    )
-
-
-class ConversationTurn(BaseModel):
-    """Model representing a single conversation turn.
-
-    Attributes:
-        messages: List of messages in this turn.
-        tool_calls: List of tool calls made in this turn.
-        tool_results: List of tool results from this turn.
-        provider: Provider identifier used for this turn.
-        model: Model identifier used for this turn.
-        started_at: ISO 8601 timestamp when the turn started.
-        completed_at: ISO 8601 timestamp when the turn completed.
-    """
-
-    messages: list[Message] = Field(
-        default_factory=list,
-        description="List of messages in this turn",
-    )
-    tool_calls: list[ToolCallSummary] = Field(
-        default_factory=list,
-        description="List of tool calls made in this turn",
-    )
-    tool_results: list[ToolResultSummary] = Field(
-        default_factory=list,
-        description="List of tool results from this turn",
-    )
-    provider: str = Field(
-        ...,
-        description="Provider identifier used for this turn",
-        examples=["openai"],
-    )
-    model: str = Field(
-        ...,
-        description="Model identifier used for this turn",
-        examples=["gpt-4o-mini"],
-    )
-    started_at: str = Field(
-        ...,
-        description="ISO 8601 timestamp when the turn started",
-        examples=["2024-01-01T00:01:00Z"],
-    )
-    completed_at: str = Field(
-        ...,
-        description="ISO 8601 timestamp when the turn completed",
-        examples=["2024-01-01T00:01:05Z"],
-    )
-
-
-class ConversationResponse(AbstractSuccessfulResponse):
-    """Model representing a response for retrieving a conversation.
-
-    Attributes:
-        conversation_id: The conversation ID (UUID).
-        chat_history: The chat history as a list of conversation turns.
-    """
-
-    conversation_id: str = Field(
-        ...,
-        description="Conversation ID (UUID)",
-        examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"],
-    )
-
-    chat_history: list[ConversationTurn] = Field(
-        ...,
-        description="The simplified chat history as a list of conversation turns",
-        examples=[
-            {
-                "messages": [
-                    {"content": "Hello", "type": "user"},
-                    {"content": "Hi there!", "type": "assistant"},
-                ],
-                "tool_calls": [],
-                "tool_results": [],
-                "provider": "openai",
-                "model": "gpt-4o-mini",
-                "started_at": "2024-01-01T00:01:00Z",
-                "completed_at": "2024-01-01T00:01:05Z",
-            }
-        ],
-    )
-
-    # provides examples for /docs endpoint
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                    "chat_history": [
-                        {
-                            "messages": [
-                                {"content": "Hello", "type": "user"},
-                                {"content": "Hi there!", "type": "assistant"},
-                            ],
-                            "tool_calls": [],
-                            "tool_results": [],
-                            "provider": "openai",
-                            "model": "gpt-4o-mini",
-                            "started_at": "2024-01-01T00:01:00Z",
-                            "completed_at": "2024-01-01T00:01:05Z",
-                        }
-                    ],
-                }
-            ]
-        }
-    }
-
-
-class ConversationDeleteResponse(AbstractDeleteResponse):
-    """Response for deleting a conversation."""
-
-    resource_name: ClassVar[str] = "Conversation"
-    conversation_id: str = Field(
-        ...,
-        description="Conversation identifier that was passed to delete.",
-        examples=["123e4567-e89b-12d3-a456-426614174000"],
-    )
-
-    @computed_field(json_schema_extra={"deprecated": True})
-    def success(self) -> bool:
-        """Successful response flag."""
-        logger.warning("DEPRECATED: Will be removed in a future release.")
-        return True
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "label": "deleted",
-                    "value": {
-                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                        "deleted": True,
-                        "response": "Conversation deleted successfully",
-                    },
-                },
-                {
-                    "label": "not found",
-                    "value": {
-                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                        "deleted": False,
-                        "response": "Conversation not found",
-                    },
-                },
-            ]
-        }
-    }
-
-
-class ConversationDetails(BaseModel):
-    """Model representing the details of a user conversation.
-
-    Attributes:
-        conversation_id: The conversation ID (UUID).
-        created_at: When the conversation was created.
-        last_message_at: When the last message was sent.
-        message_count: Number of user messages in the conversation.
-        last_used_model: The last model used for the conversation.
-        last_used_provider: The provider of the last used model.
-        topic_summary: The topic summary for the conversation.
-
-    Example:
-        ```python
-        conversation = ConversationDetails(
-            conversation_id="123e4567-e89b-12d3-a456-426614174000",
-            created_at="2024-01-01T00:00:00Z",
-            last_message_at="2024-01-01T00:05:00Z",
-            message_count=5,
-            last_used_model="gemini/gemini-2.0-flash",
-            last_used_provider="gemini",
-            topic_summary="Openshift Microservices Deployment Strategies",
-        )
-        ```
-    """
-
-    conversation_id: str = Field(
-        ...,
-        description="Conversation ID (UUID)",
-        examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"],
-    )
-
-    created_at: Optional[str] = Field(
-        None,
-        description="When the conversation was created",
-        examples=["2024-01-01T01:00:00Z"],
-    )
-
-    last_message_at: Optional[str] = Field(
-        None,
-        description="When the last message was sent",
-        examples=["2024-01-01T01:00:00Z"],
-    )
-
-    message_count: Optional[int] = Field(
-        None,
-        description="Number of user messages in the conversation",
-        examples=[42],
-    )
-
-    last_used_model: Optional[str] = Field(
-        None,
-        description="Identification of the last model used for the conversation",
-        examples=["gpt-4-turbo", "gpt-3.5-turbo-0125"],
-    )
-
-    last_used_provider: Optional[str] = Field(
-        None,
-        description="Identification of the last provider used for the conversation",
-        examples=["openai", "gemini"],
-    )
-
-    topic_summary: Optional[str] = Field(
-        None,
-        description="Topic summary for the conversation",
-        examples=["Openshift Microservices Deployment Strategies"],
-    )
-
-
-class ConversationsListResponse(AbstractSuccessfulResponse):
-    """Model representing a response for listing conversations of a user.
-
-    Attributes:
-        conversations: List of conversation details associated with the user.
-    """
-
-    conversations: list[ConversationDetails]
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "conversations": [
-                        {
-                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                            "created_at": "2024-01-01T00:00:00Z",
-                            "last_message_at": "2024-01-01T00:05:00Z",
-                            "message_count": 5,
-                            "last_used_model": "gemini/gemini-2.0-flash",
-                            "last_used_provider": "gemini",
-                            "topic_summary": "Openshift Microservices Deployment Strategies",
-                        },
-                        {
-                            "conversation_id": "456e7890-e12b-34d5-a678-901234567890",
-                            "created_at": "2024-01-01T01:00:00Z",
-                            "message_count": 2,
-                            "last_used_model": "gemini/gemini-2.5-flash",
-                            "last_used_provider": "gemini",
-                            "topic_summary": "RHDH Purpose Summary",
-                        },
-                    ]
-                }
-            ]
-        }
-    }
-
-
-class ConversationsListResponseV2(AbstractSuccessfulResponse):
-    """Model representing a response for listing conversations of a user.
-
-    Attributes:
-        conversations: List of conversation data associated with the user.
-    """
-
-    conversations: list[ConversationData]
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "conversations": [
-                        {
-                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                            "topic_summary": "Openshift Microservices Deployment Strategies",
-                            "last_message_timestamp": 1704067200.0,
-                        }
-                    ],
-                }
-            ]
-        }
-    }
-
-
-class FeedbackStatusUpdateResponse(AbstractSuccessfulResponse):
-    """
-    Model representing a response to a feedback status update request.
-
-    Attributes:
-        status: The previous and current status of the service and who updated it.
-
-    Example:
-        ```python
-        status_response = StatusResponse(
-            status={
-                "previous_status": true,
-                "updated_status": false,
-                "updated_by": "user/test",
-                "timestamp": "2023-03-15 12:34:56"
-            },
-        )
-        ```
-    """
-
-    status: dict
-
-    # provides examples for /docs endpoint
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "status": {
-                        "previous_status": True,
-                        "updated_status": False,
-                        "updated_by": "user/test",
-                        "timestamp": "2023-03-15 12:34:56",
-                    },
-                }
-            ]
-        }
-    }
-
-
-class ConversationUpdateResponse(AbstractSuccessfulResponse):
-    """Model representing a response for updating a conversation topic summary.
-
-    Attributes:
-        conversation_id: The conversation ID (UUID) that was updated.
-        success: Whether the update was successful.
-        message: A message about the update result.
-
-    Example:
-        ```python
-        update_response = ConversationUpdateResponse(
-            conversation_id="123e4567-e89b-12d3-a456-426614174000",
-            success=True,
-            message="Topic summary updated successfully",
-        )
-        ```
-    """
-
-    conversation_id: str = Field(
-        ...,
-        description="The conversation ID (UUID) that was updated",
-        examples=["123e4567-e89b-12d3-a456-426614174000"],
-    )
-    success: bool = Field(
-        ...,
-        description="Whether the update was successful",
-        examples=[True],
-    )
-    message: str = Field(
-        ...,
-        description="A message about the update result",
-        examples=["Topic summary updated successfully"],
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                    "success": True,
-                    "message": "Topic summary updated successfully",
-                }
-            ]
-        }
-    }
-
-
-class ConfigurationResponse(AbstractSuccessfulResponse):
-    """Success response model for the config endpoint."""
-
-    configuration: Configuration
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "configuration": {
-                        "name": "lightspeed-stack",
-                        "service": {
-                            "host": "localhost",
-                            "port": 8080,
-                            "auth_enabled": False,
-                            "workers": 1,
-                            "color_log": True,
-                            "access_log": True,
-                            "tls_config": {
-                                "tls_certificate_path": None,
-                                "tls_key_path": None,
-                                "tls_key_password": None,
-                            },
-                            "cors": {
-                                "allow_origins": ["*"],
-                                "allow_credentials": False,
-                                "allow_methods": ["*"],
-                                "allow_headers": ["*"],
-                            },
-                        },
-                        "llama_stack": {
-                            "url": "http://localhost:8321",
-                            "api_key": "*****",
-                            "use_as_library_client": False,
-                            "library_client_config_path": None,
-                        },
-                        "user_data_collection": {
-                            "feedback_enabled": True,
-                            "feedback_storage": "/tmp/data/feedback",
-                            "transcripts_enabled": False,
-                            "transcripts_storage": "/tmp/data/transcripts",
-                        },
-                        "database": {
-                            "sqlite": {"db_path": "/tmp/lightspeed-stack.db"},
-                            "postgres": None,
-                        },
-                        "mcp_servers": [
-                            {
-                                "name": "server1",
-                                "provider_id": "provider1",
-                                "url": "http://url.com:1",
-                            },
-                        ],
-                        "authentication": {
-                            "module": "noop",
-                            "skip_tls_verification": False,
-                        },
-                        "authorization": {"access_rules": []},
-                        "customization": None,
-                        "inference": {
-                            "default_model": "gpt-4-turbo",
-                            "default_provider": "openai",
-                        },
-                        "conversation_cache": {
-                            "type": None,
-                            "memory": None,
-                            "sqlite": None,
-                            "postgres": None,
-                        },
-                        "byok_rag": [],
-                        "quota_handlers": {
-                            "sqlite": None,
-                            "postgres": None,
-                            "limiters": [],
-                            "scheduler": {"period": 1},
-                            "enable_token_history": False,
-                        },
-                    }
-                }
-            ]
-        }
-    }
-
-
-class ResponsesResponse(AbstractSuccessfulResponse):
-    """Model representing a response from the Responses API following LCORE specification.
-
-    Attributes:
-        created_at: Unix timestamp when the response was created.
-        completed_at: Unix timestamp when the response was completed, if applicable.
-        error: Error details if the response failed or was blocked.
-        id: Unique identifier for this response.
-        model: Model identifier in "provider/model" format used for generation.
-        object: Object type identifier, always "response".
-        output: List of structured output items containing messages, tool calls, and
-            other content. This is the primary response content.
-        parallel_tool_calls: Whether the model can make multiple tool calls in parallel.
-        previous_response_id: Identifier of the previous response in a multi-turn
-            conversation.
-        prompt: The input prompt object that was sent to the model.
-        status: Current status of the response (e.g., "completed", "blocked",
-            "in_progress").
-        temperature: Temperature parameter used for generation (controls randomness).
-        text: Text response configuration object used for OpenAI responses.
-        top_p: Top-p sampling parameter used for generation.
-        tools: List of tools available to the model during generation.
-        tool_choice: Tool selection strategy used (e.g., "auto", "required", "none").
-        truncation: Strategy used for handling content that exceeds context limits.
-        usage: Token usage statistics including input_tokens, output_tokens, and
-            total_tokens.
-        instructions: System instructions or guidelines provided to the model.
-        max_tool_calls: Maximum number of tool calls allowed in a single response.
-        reasoning: Reasoning configuration (effort level) used for the response.
-        max_output_tokens: Upper bound for tokens generated in the response.
-        safety_identifier: Safety/guardrail identifier applied to the request.
-        metadata: Additional metadata dictionary with custom key-value pairs.
-        store: Whether the response was stored.
-        conversation: Conversation ID linking this response to a conversation thread
-            (LCORE-specific).
-        available_quotas: Remaining token quotas for the user (LCORE-specific).
-        output_text: Aggregated text output from all output_text items in the
-            output array.
-    """
-
-    created_at: int
-    completed_at: Optional[int] = None
-    error: Optional[Error] = None
-    id: str
-    model: str
-    object: Literal["response"] = "response"
-    output: list[Output]
-    parallel_tool_calls: bool = True
-    previous_response_id: Optional[str] = None
-    prompt: Optional[Prompt] = None
-    status: str
-    temperature: Optional[float] = None
-    text: Optional[Text] = None
-    top_p: Optional[float] = None
-    tools: Optional[list[OutputTool]] = None
-    tool_choice: Optional[ToolChoice] = None
-    truncation: Optional[str] = None
-    usage: Optional[Usage] = None
-    instructions: Optional[str] = None
-    max_tool_calls: Optional[int] = None
-    reasoning: Optional[Reasoning] = None
-    max_output_tokens: Optional[int] = None
-    safety_identifier: Optional[str] = None
-    metadata: Optional[dict[str, str]] = None
-    store: Optional[bool] = None
-    # LCORE-specific attributes
-    conversation: Optional[str] = None
-    available_quotas: dict[str, int]
-    output_text: str
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "created_at": 1704067200,
-                    "completed_at": 1704067250,
-                    "id": "resp_abc123",
-                    "model": "openai/gpt-4-turbo",
-                    "object": "response",
-                    "output": [
-                        {
-                            "type": "message",
-                            "role": "assistant",
-                            "content": [
-                                {
-                                    "type": "output_text",
-                                    "text": (
-                                        "Kubernetes is an open-source container "
-                                        "orchestration system..."
-                                    ),
-                                }
-                            ],
-                        }
-                    ],
-                    "parallel_tool_calls": True,
-                    "status": "completed",
-                    "temperature": 0.7,
-                    "text": {"format": {"type": "text"}},
-                    "usage": {
-                        "input_tokens": 100,
-                        "output_tokens": 50,
-                        "total_tokens": 150,
-                        "input_tokens_details": {"cached_tokens": 0},
-                        "output_tokens_details": {"reasoning_tokens": 0},
-                    },
-                    "instructions": "You are a helpful assistant",
-                    "store": True,
-                    "conversation": "0d21ba731f21f798dc9680125d5d6f493e4a7ab79f25670e",
-                    "available_quotas": {"daily": 1000, "monthly": 50000},
-                    "output_text": (
-                        "Kubernetes is an open-source container "
-                        "orchestration system..."
-                    ),
-                }
-            ],
-            "sse_example": (
-                "event: response.created\n"
-                'data: {"type":"response.created","sequence_number":0,'
-                '"response":{"id":"resp_abc","object":"response",'
-                '"created_at":1704067200,"status":"in_progress","model":"openai/gpt-4o-mini",'
-                '"output":[],"store":true,"text":{"format":{"type":"text"}},'
-                '"conversation":"0d21ba731f21f798dc9680125d5d6f49",'
-                '"available_quotas":{},"output_text":""}}\n\n'
-                "event: response.output_item.added\n"
-                'data: {"type":"response.output_item.added","sequence_number":1,'
-                '"response_id":"resp_abc","output_index":0,'
-                '"item":{"id":"msg_abc","type":"message","status":"in_progress",'
-                '"role":"assistant","content":[]}}\n\n'
-                "...\n\n"
-                "event: response.completed\n"
-                'data: {"type":"response.completed","sequence_number":30,'
-                '"response":{"id":"resp_abc","object":"response",'
-                '"created_at":1704067200,"status":"completed","model":"openai/gpt-4o-mini",'
-                '"output":[{"id":"msg_abc","type":"message","status":"completed",'
-                '"role":"assistant","content":[{"type":"output_text",'
-                '"text":"Hello! How can I help?","annotations":[]}]}],'
-                '"store":true,"text":{"format":{"type":"text"}},'
-                '"usage":{"input_tokens":10,"output_tokens":6,"total_tokens":16,'
-                '"input_tokens_details":{"cached_tokens":0},'
-                '"output_tokens_details":{"reasoning_tokens":0}},'
-                '"conversation":"0d21ba731f21f798dc9680125d5d6f49",'
-                '"available_quotas":{"daily":1000,"monthly":50000},'
-                '"output_text":"Hello! How can I help?"}}\n\n'
-                "data: [DONE]\n\n"
-            ),
-        }
-    }
-
-    @classmethod
-    def openapi_response(cls) -> dict[str, Any]:
-        """
-        Build OpenAPI response dict with application/json and text/event-stream.
-
-        Uses the single JSON example from the model schema and adds
-        text/event-stream example from json_schema_extra.sse_example.
-        """
-        schema = cls.model_json_schema()
-        model_examples = schema.get("examples", [])
-        json_example = model_examples[0] if model_examples else None
-
-        schema_extra = (
-            cast(dict[str, Any], dict(cls.model_config)).get("json_schema_extra") or {}
-        )
-        sse_example = schema_extra.get("sse_example", "")
-
-        content: dict[str, Any] = {
-            "application/json": {"example": json_example} if json_example else {},
-            "text/event-stream": {
-                "schema": {"type": "string"},
-                "example": sse_example,
-            },
-        }
-
-        return {
-            "description": SUCCESSFUL_RESPONSE_DESCRIPTION,
-            "model": cls,
-            "content": content,
-        }
-
-
-class VectorStoreResponse(AbstractSuccessfulResponse):
-    """Response model containing a single vector store.
-
-    Attributes:
-        id: Vector store ID.
-        name: Vector store name.
-        created_at: Unix timestamp when created.
-        last_active_at: Unix timestamp of last activity.
-        expires_at: Optional Unix timestamp when it expires.
-        status: Vector store status.
-        usage_bytes: Storage usage in bytes.
-        metadata: Optional metadata dictionary for storing session information.
-    """
-
-    id: str = Field(..., description="Vector store ID")
-    name: str = Field(..., description="Vector store name")
-    created_at: int = Field(..., description="Unix timestamp when created")
-    last_active_at: Optional[int] = Field(
-        None, description="Unix timestamp of last activity"
-    )
-    expires_at: Optional[int] = Field(
-        None, description="Unix timestamp when it expires"
-    )
-    status: str = Field(..., description="Vector store status")
-    usage_bytes: int = Field(default=0, description="Storage usage in bytes")
-    metadata: Optional[dict[str, Any]] = Field(
-        None,
-        description="Metadata dictionary for storing session information",
-        examples=[
-            {"conversation_id": "conv_123", "document_ids": ["doc_456", "doc_789"]}
-        ],
-    )
-
-    model_config = {
-        "extra": "forbid",
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "id": "vs_abc123",
-                    "name": "customer_support_docs",
-                    "created_at": 1704067200,
-                    "last_active_at": 1704153600,
-                    "expires_at": None,
-                    "status": "active",
-                    "usage_bytes": 1048576,
-                    "metadata": {
-                        "conversation_id": "conv_123",
-                        "document_ids": ["doc_456", "doc_789"],
-                    },
-                }
-            ]
-        },
-    }
-
-
-class VectorStoresListResponse(AbstractSuccessfulResponse):
-    """Response model containing a list of vector stores.
-
-    Attributes:
-        data: List of vector store objects.
-        object: Object type (always "list").
-    """
-
-    data: list[VectorStoreResponse] = Field(
-        default_factory=list, description="List of vector stores"
-    )
-    object: str = Field(default="list", description="Object type")
-
-    model_config = {
-        "extra": "forbid",
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "data": [
-                        {
-                            "id": "vs_abc123",
-                            "name": "customer_support_docs",
-                            "created_at": 1704067200,
-                            "last_active_at": 1704153600,
-                            "expires_at": None,
-                            "status": "active",
-                            "usage_bytes": 1048576,
-                            "metadata": {"conversation_id": "conv_123"},
-                        },
-                        {
-                            "id": "vs_def456",
-                            "name": "product_documentation",
-                            "created_at": 1704070800,
-                            "last_active_at": 1704157200,
-                            "expires_at": None,
-                            "status": "active",
-                            "usage_bytes": 2097152,
-                            "metadata": None,
-                        },
-                    ],
-                    "object": "list",
-                }
-            ]
-        },
-    }
-
-
-class VectorStoreDeleteResponse(AbstractDeleteResponse):
-    """Result of deleting a vector store (always HTTP 200)."""
-
-    resource_name: ClassVar[str] = "Vector store"
-    vector_store_id: str = Field(
-        ...,
-        description="Vector store identifier that was passed to delete.",
-        examples=["vs_abc123"],
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "label": "deleted",
-                    "value": {
-                        "vector_store_id": "vs_abc123",
-                        "deleted": True,
-                        "response": "Vector store deleted successfully",
-                    },
-                },
-                {
-                    "label": "not found",
-                    "value": {
-                        "vector_store_id": "vs_abc123",
-                        "deleted": False,
-                        "response": "Vector store not found",
-                    },
-                },
-            ]
-        }
-    }
-
-
-class VectorStoreFileDeleteResponse(AbstractDeleteResponse):
-    """Result of deleting a file from a vector store (always HTTP 200)."""
-
-    resource_name: ClassVar[str] = "Vector store file"
-    file_id: str = Field(
-        ...,
-        description="File identifier that was passed to delete.",
-        examples=["file_abc123"],
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "label": "deleted",
-                    "value": {
-                        "file_id": "file_abc123",
-                        "deleted": True,
-                        "response": "Vector store file deleted successfully",
-                    },
-                },
-                {
-                    "label": "not found",
-                    "value": {
-                        "file_id": "file_abc123",
-                        "deleted": False,
-                        "response": "Vector store file not found",
-                    },
-                },
-            ]
-        }
-    }
-
-
-class PromptResourceResponse(AbstractSuccessfulResponse):
-    """A stored prompt template as returned by Llama Stack."""
-
-    prompt_id: str = Field(..., description="Prompt identifier from Llama Stack")
-    version: int = Field(..., description="Version number for this prompt")
-    is_default: Optional[bool] = Field(
-        None, description="Whether this version is the default"
-    )
-    prompt: Optional[str] = Field(None, description="Prompt text with placeholders")
-    variables: Optional[list[str]] = Field(
-        None, description="Variable names used in the template"
-    )
-
-    model_config = {
-        "extra": "forbid",
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567",
-                    "version": 1,
-                    "is_default": True,
-                    "prompt": "Summarize: {{text}}",
-                    "variables": ["text"],
-                }
-            ]
-        },
-    }
-
-
-class PromptsListResponse(AbstractSuccessfulResponse):
-    """List of stored prompt templates returned by Llama Stack."""
-
-    data: list[PromptResourceResponse] = Field(
-        default_factory=list,
-        description="Prompt entries (as returned by Llama Stack list)",
-    )
-
-    model_config = {
-        "extra": "forbid",
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "data": [
-                        {
-                            "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567",
-                            "version": 1,
-                            "is_default": True,
-                            "prompt": "Summarize: {{text}}",
-                            "variables": ["text"],
-                        }
-                    ],
-                }
-            ]
-        },
-    }
-
-
-class PromptDeleteResponse(AbstractDeleteResponse):
-    """Result of deleting a stored prompt (always HTTP 200, like conversations v2)."""
-
-    resource_name: ClassVar[str] = "Prompt"
-    prompt_id: str = Field(
-        ...,
-        description="Prompt identifier that was passed to delete.",
-        examples=["pmpt_0123456789abcdef0123456789abcdef01234567"],
-    )
-
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "label": "deleted",
-                    "value": {
-                        "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567",
-                        "deleted": True,
-                        "response": "Prompt deleted successfully",
-                    },
-                },
-                {
-                    "label": "not found",
-                    "value": {
-                        "prompt_id": "pmpt_0123456789abcdef0123456789abcdef01234567",
-                        "deleted": False,
-                        "response": "Prompt not found",
-                    },
-                },
-            ]
-        }
-    }
-
-
-class FileResponse(AbstractSuccessfulResponse):
-    """Response model containing a file object.
-
-    Attributes:
-        id: File ID.
-        filename: File name.
-        bytes: File size in bytes.
-        created_at: Unix timestamp when created.
-        purpose: File purpose.
-        object: Object type (always "file").
-    """
-
-    id: str = Field(..., description="File ID")
-    filename: str = Field(..., description="File name")
-    bytes: int = Field(..., description="File size in bytes")
-    created_at: int = Field(..., description="Unix timestamp when created")
-    purpose: str = Field(default="assistants", description="File purpose")
-    object: str = Field(default="file", description="Object type")
-
-    model_config = {
-        "extra": "forbid",
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "id": "file_abc123",
-                    "filename": "documentation.pdf",
-                    "bytes": 524288,
-                    "created_at": 1704067200,
-                    "purpose": "assistants",
-                    "object": "file",
-                }
-            ]
-        },
-    }
-
-
-class VectorStoreFileResponse(AbstractSuccessfulResponse):
-    """Response model containing a vector store file object.
-
-    Attributes:
-        id: Vector store file ID.
-        vector_store_id: ID of the vector store.
-        status: File processing status.
-        attributes: Optional metadata key-value pairs.
-        last_error: Optional error message if processing failed.
-        object: Object type (always "vector_store.file").
-    """
-
-    id: str = Field(..., description="Vector store file ID")
-    vector_store_id: str = Field(..., description="ID of the vector store")
-    status: str = Field(..., description="File processing status")
-    attributes: Optional[dict[str, str | float | bool]] = Field(
-        None,
-        description=(
-            "Set of up to 16 key-value pairs for storing additional information. "
-            "Keys: strings (max 64 chars). Values: strings (max 512 chars), booleans, or numbers."
-        ),
-    )
-    last_error: Optional[str] = Field(
-        None, description="Error message if processing failed"
-    )
-    object: str = Field(default="vector_store.file", description="Object type")
-
-    model_config = {
-        "extra": "forbid",
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "id": "file_abc123",
-                    "vector_store_id": "vs_abc123",
-                    "status": "completed",
-                    "attributes": {"chunk_size": "512", "indexed": True},
-                    "last_error": None,
-                    "object": "vector_store.file",
-                }
-            ]
-        },
-    }
-
-
-class VectorStoreFilesListResponse(AbstractSuccessfulResponse):
-    """Response model containing a list of vector store files.
-
-    Attributes:
-        data: List of vector store file objects.
-        object: Object type (always "list").
-    """
-
-    data: list[VectorStoreFileResponse] = Field(
-        default_factory=list, description="List of vector store files"
-    )
-    object: str = Field(default="list", description="Object type")
-
-    model_config = {
-        "extra": "forbid",
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "data": [
-                        {
-                            "id": "file_abc123",
-                            "vector_store_id": "vs_abc123",
-                            "status": "completed",
-                            "attributes": {"chunk_size": "512"},
-                            "last_error": None,
-                            "object": "vector_store.file",
-                        },
-                        {
-                            "id": "file_def456",
-                            "vector_store_id": "vs_abc123",
-                            "status": "processing",
-                            "attributes": None,
-                            "last_error": None,
-                            "object": "vector_store.file",
-                        },
-                    ],
-                    "object": "list",
-                }
-            ]
-        },
-    }
diff --git a/src/models/rlsapi/responses.py b/src/models/rlsapi/responses.py
index 502fe44b7..6d85dd514 100644
--- a/src/models/rlsapi/responses.py
+++ b/src/models/rlsapi/responses.py
@@ -4,14 +4,14 @@
 
 from pydantic import Field
 
-from models.config import ConfigurationBase
-from models.responses import (
-    AbstractSuccessfulResponse,
+from models.api.responses.successful.bases import AbstractSuccessfulResponse
+from models.common import (
     RAGChunk,
     ReferencedDocument,
     ToolCallSummary,
     ToolResultSummary,
 )
+from models.config import ConfigurationBase
 
 
 class RlsapiV1InferData(ConfigurationBase):
diff --git a/src/utils/conversations.py b/src/utils/conversations.py
index 130d75b01..ac2659688 100644
--- a/src/utils/conversations.py
+++ b/src/utils/conversations.py
@@ -41,17 +41,18 @@
 )
 
 from constants import DEFAULT_RAG_TOOL
-from models.api.responses import (
+from models.api.responses.error import (
     InternalServerErrorResponse,
     ServiceUnavailableResponse,
 )
-from models.database.conversations import UserTurn
-from models.responses import (
+from models.common.conversation import (
     ConversationTurn,
     Message,
 )
+from models.common.responses.types import ResponseInput
+from models.common.turn_summary import ToolCallSummary, ToolResultSummary
+from models.database.conversations import UserTurn
 from utils.responses import parse_arguments_string
-from utils.types import ResponseInput, ToolCallSummary, ToolResultSummary
 
 
 def _extract_text_from_content(content: str | list[Any]) -> str:
diff --git a/src/utils/endpoints.py b/src/utils/endpoints.py
index 5eba25258..a9d2a5754 100644
--- a/src/utils/endpoints.py
+++ b/src/utils/endpoints.py
@@ -11,15 +11,18 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import AppConfig, LogicError
 from log import get_logger
-from models.api.responses import (
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
     NotFoundResponse,
 )
+from models.common.responses.responses_conversation_context import (
+    ResponsesConversationContext,
+)
+from models.common.turn_summary import ReferencedDocument, TurnSummary
 from models.database.conversations import UserConversation, UserTurn
 from utils.responses import create_new_conversation
 from utils.suid import normalize_conversation_id, to_llama_stack_conversation_id
-from utils.types import ReferencedDocument, ResponsesConversationContext, TurnSummary
 
 logger = get_logger(__name__)
 
diff --git a/src/utils/mcp_oauth_probe.py b/src/utils/mcp_oauth_probe.py
index dba2fa3fc..570e968eb 100644
--- a/src/utils/mcp_oauth_probe.py
+++ b/src/utils/mcp_oauth_probe.py
@@ -14,7 +14,7 @@
 import constants
 from configuration import AppConfig
 from log import get_logger
-from models.api.responses import UnauthorizedResponse
+from models.api.responses.error import UnauthorizedResponse
 from utils.mcp_headers import McpHeaders, build_mcp_headers
 
 logger = get_logger(__name__)
diff --git a/src/utils/prompts.py b/src/utils/prompts.py
index 9a51371e8..5822f7b19 100644
--- a/src/utils/prompts.py
+++ b/src/utils/prompts.py
@@ -6,7 +6,7 @@
 
 import constants
 from configuration import configuration
-from models.api.responses import UnprocessableEntityResponse
+from models.api.responses.error import UnprocessableEntityResponse
 
 
 def get_system_prompt(
diff --git a/src/utils/query.py b/src/utils/query.py
index c62145d60..c6b9b686d 100644
--- a/src/utils/query.py
+++ b/src/utils/query.py
@@ -25,7 +25,7 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
-from models.api.responses import (
+from models.api.responses.error import (
     AbstractErrorResponse,
     ForbiddenResponse,
     InternalServerErrorResponse,
@@ -35,6 +35,7 @@
     UnprocessableEntityResponse,
 )
 from models.cache_entry import CacheEntry
+from models.common.turn_summary import TurnSummary
 from models.config import Action
 from models.database.conversations import UserConversation, UserTurn
 from models.requests import Attachment, QueryRequest
@@ -46,7 +47,6 @@
     create_transcript_metadata,
     store_transcript,
 )
-from utils.types import TurnSummary
 
 logger = get_logger(__name__)
 
diff --git a/src/utils/quota.py b/src/utils/quota.py
index bef10b910..b66d9b022 100644
--- a/src/utils/quota.py
+++ b/src/utils/quota.py
@@ -7,7 +7,10 @@
 from fastapi import HTTPException
 
 from log import get_logger
-from models.api.responses import InternalServerErrorResponse, QuotaExceededResponse
+from models.api.responses.error import (
+    InternalServerErrorResponse,
+    QuotaExceededResponse,
+)
 from quota.quota_exceed_error import QuotaExceedError
 from quota.quota_limiter import QuotaLimiter
 from quota.token_usage_history import TokenUsageHistory
diff --git a/src/utils/responses.py b/src/utils/responses.py
index 3d97efdfd..4fa3f96b0 100644
--- a/src/utils/responses.py
+++ b/src/utils/responses.py
@@ -91,13 +91,21 @@
 from constants import DEFAULT_RAG_TOOL
 from log import get_logger
 from metrics import recording
-from models.api.responses import (
+from models.api.responses.error import (
     ConflictResponse,
     InternalServerErrorResponse,
     NotFoundResponse,
     ServiceUnavailableResponse,
 )
 from models.common.responses.responses_api_params import ResponsesApiParams
+from models.common.responses.types import ResponseInput, ResponseItem
+from models.common.turn_summary import (
+    RAGChunk,
+    ReferencedDocument,
+    ToolCallSummary,
+    ToolResultSummary,
+    TurnSummary,
+)
 from models.config import ByokRag
 from models.database.conversations import UserConversation
 from models.requests import QueryRequest
@@ -114,15 +122,6 @@
 )
 from utils.suid import to_llama_stack_conversation_id
 from utils.token_counter import TokenCounter
-from utils.types import (
-    RAGChunk,
-    ReferencedDocument,
-    ResponseInput,
-    ResponseItem,
-    ToolCallSummary,
-    ToolResultSummary,
-    TurnSummary,
-)
 
 logger = get_logger(__name__)
 
diff --git a/src/utils/shields.py b/src/utils/shields.py
index 821727be8..56cd729eb 100644
--- a/src/utils/shields.py
+++ b/src/utils/shields.py
@@ -18,19 +18,19 @@
 from constants import DEFAULT_VIOLATION_MESSAGE
 from log import get_logger
 from metrics import recording
-from models.api.responses import (
+from models.api.responses.error import (
     InternalServerErrorResponse,
     NotFoundResponse,
     ServiceUnavailableResponse,
     UnprocessableEntityResponse,
 )
-from models.requests import QueryRequest
-from utils.query import handle_known_apistatus_errors
-from utils.types import (
+from models.common.moderation import (
     ShieldModerationBlocked,
     ShieldModerationPassed,
     ShieldModerationResult,
 )
+from models.requests import QueryRequest
+from utils.query import handle_known_apistatus_errors
 
 logger = get_logger(__name__)
 
diff --git a/src/utils/transcripts.py b/src/utils/transcripts.py
index bb7606b5d..ec2ef53d6 100644
--- a/src/utils/transcripts.py
+++ b/src/utils/transcripts.py
@@ -15,14 +15,11 @@
 
 from configuration import configuration
 from log import get_logger
-from models.api.responses import InternalServerErrorResponse
+from models.api.responses.error import InternalServerErrorResponse
+from models.common.transcripts import Transcript, TranscriptMetadata
+from models.common.turn_summary import TurnSummary
 from models.requests import Attachment
 from utils.suid import get_suid
-from utils.types import (
-    Transcript,
-    TranscriptMetadata,
-    TurnSummary,
-)
 
 logger = get_logger(__name__)
 
diff --git a/src/utils/types.py b/src/utils/types.py
index 48f0fc226..1e616a773 100644
--- a/src/utils/types.py
+++ b/src/utils/types.py
@@ -1,39 +1,8 @@
 """Common types for the project."""
 
-from typing import Annotated, Any, Literal, Optional
+from typing import Any
 
 from llama_stack_api import ImageContentItem, TextContentItem
-from llama_stack_api.openai_responses import (
-    OpenAIResponseInputFunctionToolCallOutput as FunctionToolCallOutput,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseMCPApprovalRequest as McpApprovalRequest,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseMCPApprovalResponse as McpApprovalResponse,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseMessage as ResponseMessage,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseOutputMessageFileSearchToolCall as FileSearchToolCall,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseOutputMessageFunctionToolCall as FunctionToolCall,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseOutputMessageMCPCall as McpCall,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseOutputMessageMCPListTools as McpListTools,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseOutputMessageWebSearchToolCall as WebSearchToolCall,
-)
-from pydantic import AnyUrl, BaseModel, ConfigDict, Field
-
-from models.database.conversations import UserConversation
-from utils.token_counter import TokenCounter
 
 type SingletonInstances = dict[type, Any]
 
@@ -80,193 +49,3 @@ def __call__(cls, *args: Any, **kwargs: Any) -> Any:
         if cls not in cls._instances:
             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
         return cls._instances[cls]
-
-
-class ShieldModerationPassed(BaseModel):
-    """Shield moderation passed; no refusal."""
-
-    decision: Literal["passed"] = "passed"
-
-
-class ResponsesConversationContext(BaseModel):
-    """Result of resolving conversation context for the responses endpoint.
-
-    Holds the conversation ID to use for the LLM, the optional user conversation
-    record, and the resolved generate_topic_summary flag. Caller assigns these
-    to the request in outer scope instead of mutating the request inside the
-    resolver.
-
-    Attributes:
-        conversation: Conversation ID in llama-stack format to use for the request.
-        user_conversation: Resolved user conversation record, or None for new ones.
-        generate_topic_summary: Resolved value for request.generate_topic_summary.
-    """
-
-    conversation: str = Field(description="Conversation ID in llama-stack format")
-    user_conversation: Optional[UserConversation] = Field(
-        default=None,
-        description="Resolved user conversation record, or None for new conversations",
-    )
-    generate_topic_summary: bool = Field(
-        description="Resolved value for request.generate_topic_summary",
-    )
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-
-
-class ShieldModerationBlocked(BaseModel):
-    """Shield moderation blocked the content; refusal details are present."""
-
-    decision: Literal["blocked"] = "blocked"
-    message: str
-    moderation_id: str
-    refusal_response: ResponseMessage
-
-
-ShieldModerationResult = Annotated[
-    ShieldModerationPassed | ShieldModerationBlocked,
-    Field(discriminator="decision"),
-]
-
-type IncludeParameter = Literal[
-    "web_search_call.action.sources",
-    "code_interpreter_call.outputs",
-    "computer_call_output.output.image_url",
-    "file_search_call.results",
-    "message.input_image.image_url",
-    "message.output_text.logprobs",
-    "reasoning.encrypted_content",
-]
-
-type ResponseItem = (
-    ResponseMessage
-    | WebSearchToolCall
-    | FileSearchToolCall
-    | FunctionToolCallOutput
-    | McpCall
-    | McpListTools
-    | McpApprovalRequest
-    | FunctionToolCall
-    | McpApprovalResponse
-)
-
-type ResponseInput = str | list[ResponseItem]
-
-
-class ToolCallSummary(BaseModel):
-    """Model representing a tool call made during response generation (for tool_calls list)."""
-
-    id: str = Field(description="ID of the tool call")
-    name: str = Field(description="Name of the tool called")
-    args: dict[str, Any] = Field(
-        default_factory=dict, description="Arguments passed to the tool"
-    )
-    type: str = Field("tool_call", description="Type indicator for tool call")
-
-
-class ToolResultSummary(BaseModel):
-    """Model representing a result from a tool call (for tool_results list)."""
-
-    id: str = Field(
-        description="ID of the tool call/result, matches the corresponding tool call 'id'"
-    )
-    status: str = Field(
-        ..., description="Status of the tool execution (e.g., 'success')"
-    )
-    content: str = Field(..., description="Content/result returned from the tool")
-    type: str = Field("tool_result", description="Type indicator for tool result")
-    round: int = Field(..., description="Round number or step of tool execution")
-
-
-class RAGChunk(BaseModel):
-    """Model representing a RAG chunk used in the response."""
-
-    content: str = Field(description="The content of the chunk")
-    source: Optional[str] = Field(
-        default=None,
-        description="Index name identifying the knowledge source from configuration",
-    )
-    score: Optional[float] = Field(default=None, description="Relevance score")
-    attributes: Optional[dict[str, Any]] = Field(
-        default=None,
-        description="Document metadata from the RAG provider (e.g., url, title, author)",
-    )
-
-
-class ReferencedDocument(BaseModel):
-    """Model representing a document referenced in generating a response.
-
-    Attributes:
-        doc_url: Url to the referenced doc.
-        doc_title: Title of the referenced doc.
-    """
-
-    doc_url: Optional[AnyUrl] = Field(
-        default=None, description="URL of the referenced document"
-    )
-
-    doc_title: Optional[str] = Field(
-        default=None, description="Title of the referenced document"
-    )
-
-    source: Optional[str] = Field(
-        default=None,
-        description="Index name identifying the knowledge source from configuration",
-    )
-
-
-class RAGContext(BaseModel):
-    """Result of building RAG context from all enabled pre-query RAG sources.
-
-    Attributes:
-        context_text: Formatted RAG context string for injection into the query.
-        rag_chunks: RAG chunks from pre-query sources (BYOK + Solr).
-        referenced_documents: Referenced documents from pre-query sources.
-    """
-
-    context_text: str = Field(default="", description="Formatted context for injection")
-    rag_chunks: list[RAGChunk] = Field(
-        default_factory=list,
-        description="RAG chunks from pre-query sources",
-    )
-    referenced_documents: list[ReferencedDocument] = Field(
-        default_factory=list,
-        description="Documents from pre-query sources",
-    )
-
-
-class TurnSummary(BaseModel):
-    """Summary of a turn in llama stack."""
-
-    id: str = Field(default="", description="ID of the response")
-    llm_response: str = ""
-    tool_calls: list[ToolCallSummary] = Field(default_factory=list)
-    tool_results: list[ToolResultSummary] = Field(default_factory=list)
-    rag_chunks: list[RAGChunk] = Field(default_factory=list)
-    referenced_documents: list[ReferencedDocument] = Field(default_factory=list)
-    token_usage: TokenCounter = Field(default_factory=TokenCounter)
-
-
-class TranscriptMetadata(BaseModel):
-    """Metadata for a transcript entry."""
-
-    provider: Optional[str] = None
-    model: str
-    query_provider: Optional[str] = None
-    query_model: Optional[str] = None
-    user_id: str
-    conversation_id: str
-    timestamp: str
-
-
-class Transcript(BaseModel):
-    """Model representing a transcript entry to be stored."""
-
-    metadata: TranscriptMetadata
-    redacted_query: str
-    query_is_valid: bool
-    llm_response: str
-    rag_chunks: list[dict[str, Any]] = Field(default_factory=list)
-    truncated: bool
-    attachments: list[dict[str, Any]] = Field(default_factory=list)
-    tool_calls: list[dict[str, Any]] = Field(default_factory=list)
-    tool_results: list[dict[str, Any]] = Field(default_factory=list)
diff --git a/src/utils/vector_search.py b/src/utils/vector_search.py
index 4b2e20448..a12d0ce71 100644
--- a/src/utils/vector_search.py
+++ b/src/utils/vector_search.py
@@ -18,10 +18,10 @@
 import constants
 from configuration import configuration
 from log import get_logger
+from models.common.responses.types import ResponseInput
+from models.common.turn_summary import RAGChunk, RAGContext, ReferencedDocument
 from models.requests import SolrVectorSearchRequest
-from models.responses import ReferencedDocument
 from utils.responses import resolve_vector_store_ids
-from utils.types import RAGChunk, RAGContext, ResponseInput
 
 logger = get_logger(__name__)
 
diff --git a/tests/integration/endpoints/test_query_byok_integration.py b/tests/integration/endpoints/test_query_byok_integration.py
index 56030c107..650d840a7 100644
--- a/tests/integration/endpoints/test_query_byok_integration.py
+++ b/tests/integration/endpoints/test_query_byok_integration.py
@@ -15,8 +15,8 @@
 from app.endpoints.query import query_endpoint_handler
 from authentication.interface import AuthTuple
 from configuration import AppConfig
+from models.api.responses.successful import QueryResponse
 from models.requests import QueryRequest
-from models.responses import QueryResponse
 
 # ---------------------------------------------------------------------------
 # Helpers
diff --git a/tests/integration/endpoints/test_rlsapi_v1_integration.py b/tests/integration/endpoints/test_rlsapi_v1_integration.py
index b04adb5dd..25be6f125 100644
--- a/tests/integration/endpoints/test_rlsapi_v1_integration.py
+++ b/tests/integration/endpoints/test_rlsapi_v1_integration.py
@@ -21,6 +21,7 @@
 from app.endpoints.rlsapi_v1 import infer_endpoint
 from authentication.interface import AuthTuple
 from configuration import AppConfig
+from models.common.moderation import ShieldModerationPassed
 from models.rlsapi.requests import (
     RlsapiV1Attachment,
     RlsapiV1CLA,
@@ -32,7 +33,6 @@
 from models.rlsapi.responses import RlsapiV1InferResponse
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 from utils.suid import check_suid
-from utils.types import ShieldModerationPassed
 from version import __version__
 
 # ==========================================
diff --git a/tests/integration/test_middleware_integration.py b/tests/integration/test_middleware_integration.py
index 3c5126aae..62e337673 100644
--- a/tests/integration/test_middleware_integration.py
+++ b/tests/integration/test_middleware_integration.py
@@ -4,7 +4,7 @@
 from fastapi.testclient import TestClient
 
 from configuration import configuration
-from models.api.responses import InternalServerErrorResponse
+from models.api.responses.error import InternalServerErrorResponse
 
 
 class TestGlobalExceptionMiddlewareIntegration:  # pylint: disable=too-few-public-methods
diff --git a/tests/unit/app/endpoints/test_a2a.py b/tests/unit/app/endpoints/test_a2a.py
index 337eb1ea0..6138d2568 100644
--- a/tests/unit/app/endpoints/test_a2a.py
+++ b/tests/unit/app/endpoints/test_a2a.py
@@ -353,7 +353,8 @@ class TestGetLightspeedAgentCard:
     """Tests for the agent card generation."""
 
     def test_get_agent_card_with_config(
-        self, setup_configuration: AppConfig  # pylint: disable=unused-argument
+        self,
+        setup_configuration: AppConfig,  # pylint: disable=unused-argument
     ) -> None:
         """Test getting agent card with full configuration."""
         agent_card = get_lightspeed_agent_card()
diff --git a/tests/unit/app/endpoints/test_config.py b/tests/unit/app/endpoints/test_config.py
index cd108ec2d..2cd39fa5c 100644
--- a/tests/unit/app/endpoints/test_config.py
+++ b/tests/unit/app/endpoints/test_config.py
@@ -33,7 +33,8 @@ async def test_config_endpoint_handler_configuration_not_loaded(
 
     with pytest.raises(HTTPException) as exc_info:
         await config_endpoint_handler(
-            auth=auth, request=request  # pyright:ignore[reportArgumentType]
+            auth=auth,
+            request=request,  # pyright:ignore[reportArgumentType]
         )
     assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
 
@@ -66,7 +67,8 @@ async def test_config_endpoint_handler_configuration_loaded(
     auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
 
     response = await config_endpoint_handler(
-        auth=auth, request=request  # pyright:ignore[reportArgumentType]
+        auth=auth,
+        request=request,  # pyright:ignore[reportArgumentType]
     )
     assert response is not None
     assert response.configuration == minimal_config.configuration
diff --git a/tests/unit/app/endpoints/test_conversations.py b/tests/unit/app/endpoints/test_conversations.py
index 3f7b86e10..09af41bb4 100644
--- a/tests/unit/app/endpoints/test_conversations.py
+++ b/tests/unit/app/endpoints/test_conversations.py
@@ -19,19 +19,19 @@
     update_conversation_endpoint_handler,
 )
 from configuration import AppConfig
-from models.api.responses import (
+from models.api.responses.error import (
     ForbiddenResponse,
     InternalServerErrorResponse,
 )
-from models.config import Action
-from models.database.conversations import UserConversation, UserTurn
-from models.requests import ConversationUpdateRequest
-from models.responses import (
+from models.api.responses.successful import (
     ConversationDeleteResponse,
     ConversationResponse,
     ConversationsListResponse,
     ConversationUpdateResponse,
 )
+from models.config import Action
+from models.database.conversations import UserConversation, UserTurn
+from models.requests import ConversationUpdateRequest
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 from utils.conversations import build_conversation_turns_from_items
 
diff --git a/tests/unit/app/endpoints/test_conversations_v2.py b/tests/unit/app/endpoints/test_conversations_v2.py
index 59d2face9..10c19222b 100644
--- a/tests/unit/app/endpoints/test_conversations_v2.py
+++ b/tests/unit/app/endpoints/test_conversations_v2.py
@@ -20,14 +20,16 @@
     update_conversation_endpoint_handler,
 )
 from configuration import AppConfig
+from models.api.responses.successful import ConversationUpdateResponse
 from models.cache_entry import CacheEntry
-from models.requests import ConversationUpdateRequest
-from models.responses import (
-    ConversationData,
-    ConversationUpdateResponse,
+from models.common import ConversationData
+from models.common.turn_summary import (
+    ReferencedDocument,
+    ToolCallSummary,
+    ToolResultSummary,
 )
+from models.requests import ConversationUpdateRequest
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
-from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary
 
 MOCK_AUTH = ("mock_user_id", "mock_username", False, "mock_token")
 VALID_CONVERSATION_ID = "123e4567-e89b-12d3-a456-426614174000"
diff --git a/tests/unit/app/endpoints/test_health.py b/tests/unit/app/endpoints/test_health.py
index a6a746f06..82ddd78d1 100644
--- a/tests/unit/app/endpoints/test_health.py
+++ b/tests/unit/app/endpoints/test_health.py
@@ -14,7 +14,8 @@
     readiness_probe_get_method,
 )
 from authentication.interface import AuthTuple
-from models.responses import ProviderHealthStatus, ReadinessResponse
+from models.api.responses.successful import ReadinessResponse
+from models.common import ProviderHealthStatus
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
 
diff --git a/tests/unit/app/endpoints/test_mcp_auth.py b/tests/unit/app/endpoints/test_mcp_auth.py
index 6df771501..0e06c3688 100644
--- a/tests/unit/app/endpoints/test_mcp_auth.py
+++ b/tests/unit/app/endpoints/test_mcp_auth.py
@@ -10,6 +10,7 @@
 from app.endpoints import mcp_auth
 from authentication.interface import AuthTuple
 from configuration import AppConfig
+from models.api.responses.successful import MCPClientAuthOptionsResponse
 from models.config import (
     Configuration,
     LlamaStackConfiguration,
@@ -17,7 +18,6 @@
     ServiceConfiguration,
     UserDataCollection,
 )
-from models.responses import MCPClientAuthOptionsResponse
 
 # Shared mock auth tuple with 4 fields as expected by the application
 MOCK_AUTH: AuthTuple = ("mock_user_id", "mock_username", False, "mock_token")
diff --git a/tests/unit/app/endpoints/test_mcp_servers.py b/tests/unit/app/endpoints/test_mcp_servers.py
index 9623c8625..5c34ee043 100644
--- a/tests/unit/app/endpoints/test_mcp_servers.py
+++ b/tests/unit/app/endpoints/test_mcp_servers.py
@@ -14,6 +14,11 @@
 from app.endpoints import mcp_servers
 from authentication.interface import AuthTuple
 from configuration import AppConfig
+from models.api.responses.successful import (
+    MCPServerDeleteResponse,
+    MCPServerListResponse,
+    MCPServerRegistrationResponse,
+)
 from models.config import (
     Configuration,
     CORSConfiguration,
@@ -24,11 +29,6 @@
     UserDataCollection,
 )
 from models.requests import MCPServerRegistrationRequest
-from models.responses import (
-    MCPServerDeleteResponse,
-    MCPServerListResponse,
-    MCPServerRegistrationResponse,
-)
 
 MOCK_AUTH: AuthTuple = ("mock_user_id", "mock_username", False, "mock_token")
 
diff --git a/tests/unit/app/endpoints/test_prompts.py b/tests/unit/app/endpoints/test_prompts.py
index 8387a5473..a02503978 100644
--- a/tests/unit/app/endpoints/test_prompts.py
+++ b/tests/unit/app/endpoints/test_prompts.py
@@ -17,8 +17,8 @@
 )
 from authentication.interface import AuthTuple
 from configuration import AppConfig
+from models.api.responses.successful import PromptDeleteResponse
 from models.requests import PromptCreateRequest, PromptUpdateRequest
-from models.responses import PromptDeleteResponse
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
 MOCK_AUTH: AuthTuple = ("mock_user_id", "mock_username", False, "mock_token")
diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py
index 58a955289..9458aadf5 100644
--- a/tests/unit/app/endpoints/test_query.py
+++ b/tests/unit/app/endpoints/test_query.py
@@ -12,20 +12,20 @@
 
 from app.endpoints.query import query_endpoint_handler, retrieve_response
 from configuration import AppConfig
+from models.api.responses.successful import QueryResponse
+from models.common.moderation import ShieldModerationPassed
 from models.common.responses.responses_api_params import ResponsesApiParams
-from models.database.conversations import UserConversation
-from models.requests import Attachment, QueryRequest
-from models.responses import QueryResponse
-from utils.token_counter import TokenCounter
-from utils.types import (
+from models.common.turn_summary import (
     RAGChunk,
     RAGContext,
     ReferencedDocument,
-    ShieldModerationPassed,
     ToolCallSummary,
     ToolResultSummary,
     TurnSummary,
 )
+from models.database.conversations import UserConversation
+from models.requests import Attachment, QueryRequest
+from utils.token_counter import TokenCounter
 
 # User ID must be proper UUID
 MOCK_AUTH = (
diff --git a/tests/unit/app/endpoints/test_responses.py b/tests/unit/app/endpoints/test_responses.py
index 53ffc3d00..d4c0d631a 100644
--- a/tests/unit/app/endpoints/test_responses.py
+++ b/tests/unit/app/endpoints/test_responses.py
@@ -26,19 +26,17 @@
 )
 from configuration import AppConfig
 from constants import DEFAULT_SYSTEM_PROMPT, SUBSTITUTED_INSTRUCTIONS_PLACEHOLDER
+from models.api.responses.successful import ResponsesResponse
+from models.common.moderation import ShieldModerationBlocked, ShieldModerationPassed
 from models.common.responses.responses_api_params import ResponsesApiParams
 from models.common.responses.responses_context import ResponsesContext
+from models.common.responses.responses_conversation_context import (
+    ResponsesConversationContext,
+)
+from models.common.turn_summary import RAGContext, TurnSummary
 from models.config import Action, ModelContextProtocolServer
 from models.database.conversations import UserConversation
 from models.requests import ResponsesRequest
-from models.responses import ResponsesResponse
-from utils.types import (
-    RAGContext,
-    ResponsesConversationContext,
-    ShieldModerationBlocked,
-    ShieldModerationPassed,
-    TurnSummary,
-)
 
 MOCK_AUTH = (
     "00000001-0001-0001-0001-000000000001",
diff --git a/tests/unit/app/endpoints/test_responses_splunk.py b/tests/unit/app/endpoints/test_responses_splunk.py
index 7e5c34bb0..4822ab556 100644
--- a/tests/unit/app/endpoints/test_responses_splunk.py
+++ b/tests/unit/app/endpoints/test_responses_splunk.py
@@ -22,10 +22,10 @@
     handle_streaming_response,
 )
 from configuration import AppConfig
+from models.common.turn_summary import RAGContext, TurnSummary
 from models.requests import ResponsesRequest
 from observability.formats.responses import ResponsesEventData
 from tests.unit.app.endpoints.test_responses import build_api_params_and_context
-from utils.types import RAGContext, TurnSummary
 
 MODULE = "app.endpoints.responses"
 MOCK_AUTH = (
diff --git a/tests/unit/app/endpoints/test_rlsapi_v1.py b/tests/unit/app/endpoints/test_rlsapi_v1.py
index 87ab91000..38227cf4c 100644
--- a/tests/unit/app/endpoints/test_rlsapi_v1.py
+++ b/tests/unit/app/endpoints/test_rlsapi_v1.py
@@ -35,7 +35,8 @@
 from authentication.interface import AuthTuple
 from authentication.rh_identity import RHIdentityData
 from configuration import AppConfig
-from models.api.responses import ServiceUnavailableResponse
+from models.api.responses.error import ServiceUnavailableResponse
+from models.common.moderation import ShieldModerationBlocked, ShieldModerationPassed
 from models.rlsapi.requests import (
     RlsapiV1Attachment,
     RlsapiV1Context,
@@ -47,7 +48,6 @@
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 from utils.rh_identity import get_rh_identity_context
 from utils.suid import check_suid
-from utils.types import ShieldModerationBlocked, ShieldModerationPassed
 
 MOCK_AUTH: AuthTuple = ("mock_user_id", "mock_username", False, "mock_token")
 
diff --git a/tests/unit/app/endpoints/test_shields.py b/tests/unit/app/endpoints/test_shields.py
index d9e260093..354e2817d 100644
--- a/tests/unit/app/endpoints/test_shields.py
+++ b/tests/unit/app/endpoints/test_shields.py
@@ -10,7 +10,7 @@
 from app.endpoints.shields import shields_endpoint_handler
 from authentication.interface import AuthTuple
 from configuration import AppConfig
-from models.responses import ShieldsResponse
+from models.api.responses.successful import ShieldsResponse
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
 
diff --git a/tests/unit/app/endpoints/test_stream_interrupt.py b/tests/unit/app/endpoints/test_stream_interrupt.py
index 8a767ee36..dccdb592a 100644
--- a/tests/unit/app/endpoints/test_stream_interrupt.py
+++ b/tests/unit/app/endpoints/test_stream_interrupt.py
@@ -7,8 +7,8 @@
 from fastapi import HTTPException
 
 from app.endpoints.stream_interrupt import stream_interrupt_endpoint_handler
+from models.api.responses.successful import StreamingInterruptResponse
 from models.requests import StreamingInterruptRequest
-from models.responses import StreamingInterruptResponse
 from utils.stream_interrupts import StreamInterruptRegistry
 
 REQUEST_ID_SUCCESS = "123e4567-e89b-12d3-a456-426614174000"
diff --git a/tests/unit/app/endpoints/test_streaming_query.py b/tests/unit/app/endpoints/test_streaming_query.py
index 30c52c341..d29fb6c50 100644
--- a/tests/unit/app/endpoints/test_streaming_query.py
+++ b/tests/unit/app/endpoints/test_streaming_query.py
@@ -64,20 +64,20 @@
     MEDIA_TYPE_JSON,
     MEDIA_TYPE_TEXT,
 )
-from models.api.responses import InternalServerErrorResponse
+from models.api.responses.error import InternalServerErrorResponse
+from models.common.moderation import ShieldModerationPassed
 from models.common.responses.responses_api_params import ResponsesApiParams
-from models.config import Action
-from models.context import ResponseGeneratorContext
-from models.requests import Attachment, QueryRequest
-from utils.stream_interrupts import StreamInterruptRegistry
-from utils.token_counter import TokenCounter
-from utils.types import (
+from models.common.turn_summary import (
     RAGChunk,
     RAGContext,
     ReferencedDocument,
-    ShieldModerationPassed,
     TurnSummary,
 )
+from models.config import Action
+from models.context import ResponseGeneratorContext
+from models.requests import Attachment, QueryRequest
+from utils.stream_interrupts import StreamInterruptRegistry
+from utils.token_counter import TokenCounter
 
 MOCK_AUTH_STREAMING = (
     "00000001-0001-0001-0001-000000000001",
diff --git a/tests/unit/app/endpoints/test_tools.py b/tests/unit/app/endpoints/test_tools.py
index f6c0fa754..436fda75a 100644
--- a/tests/unit/app/endpoints/test_tools.py
+++ b/tests/unit/app/endpoints/test_tools.py
@@ -16,6 +16,7 @@
 from app.endpoints.tools import _input_schema_to_parameters
 from authentication.interface import AuthTuple
 from configuration import AppConfig
+from models.api.responses.successful import ToolsResponse
 from models.config import (
     Configuration,
     CORSConfiguration,
@@ -25,7 +26,6 @@
     TLSConfiguration,
     UserDataCollection,
 )
-from models.responses import ToolsResponse
 
 # Shared mock auth tuple with 4 fields as expected by the application
 MOCK_AUTH: AuthTuple = ("mock_user_id", "mock_username", False, "mock_token")
diff --git a/tests/unit/app/test_main_middleware.py b/tests/unit/app/test_main_middleware.py
index ea2ab9265..f0b76885a 100644
--- a/tests/unit/app/test_main_middleware.py
+++ b/tests/unit/app/test_main_middleware.py
@@ -10,7 +10,7 @@
 from starlette.types import Message, Receive, Scope, Send
 
 from app.main import GlobalExceptionMiddleware, RestApiMetricsMiddleware
-from models.api.responses import InternalServerErrorResponse
+from models.api.responses.error import InternalServerErrorResponse
 
 
 def _make_scope(path: str = "/test", root_path: str = "") -> Scope:
diff --git a/tests/unit/cache/test_postgres_cache.py b/tests/unit/cache/test_postgres_cache.py
index b01490919..bf2855de3 100644
--- a/tests/unit/cache/test_postgres_cache.py
+++ b/tests/unit/cache/test_postgres_cache.py
@@ -11,10 +11,14 @@
 from cache.cache_error import CacheError
 from cache.postgres_cache import PostgresCache
 from models.cache_entry import CacheEntry
+from models.common import ConversationData
+from models.common.turn_summary import (
+    ReferencedDocument,
+    ToolCallSummary,
+    ToolResultSummary,
+)
 from models.config import PostgreSQLDatabaseConfiguration
-from models.responses import ConversationData
 from utils import suid
-from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary
 
 USER_ID_1 = suid.get_suid()
 USER_ID_2 = suid.get_suid()
diff --git a/tests/unit/cache/test_sqlite_cache.py b/tests/unit/cache/test_sqlite_cache.py
index e4de441b3..3c87e677e 100644
--- a/tests/unit/cache/test_sqlite_cache.py
+++ b/tests/unit/cache/test_sqlite_cache.py
@@ -10,10 +10,14 @@
 from cache.cache_error import CacheError
 from cache.sqlite_cache import SQLiteCache
 from models.cache_entry import CacheEntry
+from models.common import ConversationData
+from models.common.turn_summary import (
+    ReferencedDocument,
+    ToolCallSummary,
+    ToolResultSummary,
+)
 from models.config import SQLiteDatabaseConfiguration
-from models.responses import ConversationData
 from utils import suid
-from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary
 
 USER_ID_1 = suid.get_suid()
 USER_ID_2 = suid.get_suid()
diff --git a/tests/unit/models/responses/test_authorized_response.py b/tests/unit/models/responses/test_authorized_response.py
index 3a8675078..c200d2dc1 100644
--- a/tests/unit/models/responses/test_authorized_response.py
+++ b/tests/unit/models/responses/test_authorized_response.py
@@ -3,7 +3,7 @@
 import pytest
 from pydantic import ValidationError
 
-from models.responses import AuthorizedResponse
+from models.api.responses.successful import AuthorizedResponse
 
 
 class TestAuthorizedResponse:
diff --git a/tests/unit/models/responses/test_error_responses.py b/tests/unit/models/responses/test_error_responses.py
index aa99aebe6..602ccab94 100644
--- a/tests/unit/models/responses/test_error_responses.py
+++ b/tests/unit/models/responses/test_error_responses.py
@@ -6,7 +6,7 @@
 from fastapi import status
 from pydantic_core import SchemaError
 
-from models.api.responses import (
+from models.api.responses.constants import (
     BAD_REQUEST_DESCRIPTION,
     FORBIDDEN_DESCRIPTION,
     INTERNAL_SERVER_ERROR_DESCRIPTION,
@@ -16,6 +16,8 @@
     SERVICE_UNAVAILABLE_DESCRIPTION,
     UNAUTHORIZED_DESCRIPTION,
     UNPROCESSABLE_CONTENT_DESCRIPTION,
+)
+from models.api.responses.error import (
     AbstractErrorResponse,
     BadRequestResponse,
     DetailModel,
diff --git a/tests/unit/models/responses/test_query_response.py b/tests/unit/models/responses/test_query_response.py
index 95e7e0498..ce547ec1a 100644
--- a/tests/unit/models/responses/test_query_response.py
+++ b/tests/unit/models/responses/test_query_response.py
@@ -2,8 +2,12 @@
 
 from pydantic import AnyUrl
 
-from models.responses import QueryResponse
-from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary
+from models.api.responses.successful import QueryResponse
+from models.common.turn_summary import (
+    ReferencedDocument,
+    ToolCallSummary,
+    ToolResultSummary,
+)
 
 
 class TestQueryResponse:
diff --git a/tests/unit/models/responses/test_rag_chunk.py b/tests/unit/models/responses/test_rag_chunk.py
index 5e545f867..2586614da 100644
--- a/tests/unit/models/responses/test_rag_chunk.py
+++ b/tests/unit/models/responses/test_rag_chunk.py
@@ -2,8 +2,8 @@
 
 from pydantic import HttpUrl
 
-from models.responses import ReferencedDocument
-from utils.types import RAGChunk, RAGContext
+from models.common import ReferencedDocument
+from models.common.turn_summary import RAGChunk, RAGContext
 
 
 class TestRAGChunk:
diff --git a/tests/unit/models/responses/test_response_types.py b/tests/unit/models/responses/test_response_types.py
index 69743e1db..2616cb801 100644
--- a/tests/unit/models/responses/test_response_types.py
+++ b/tests/unit/models/responses/test_response_types.py
@@ -3,7 +3,11 @@
 import pytest
 from pydantic import ValidationError
 
-from models.responses import ConversationData, ConversationDetails, ProviderHealthStatus
+from models.common import (
+    ConversationData,
+    ConversationDetails,
+    ProviderHealthStatus,
+)
 
 
 class TestConversationDetails:
diff --git a/tests/unit/models/responses/test_successful_responses.py b/tests/unit/models/responses/test_successful_responses.py
index 4d5004148..bd86f9ceb 100644
--- a/tests/unit/models/responses/test_successful_responses.py
+++ b/tests/unit/models/responses/test_successful_responses.py
@@ -6,19 +6,10 @@
 from pydantic import AnyHttpUrl, AnyUrl, ConfigDict, ValidationError
 from pydantic_core import SchemaError
 
-from models.config import (
-    Configuration,
-    LlamaStackConfiguration,
-    ServiceConfiguration,
-    UserDataCollection,
-)
-from models.responses import (
-    AbstractSuccessfulResponse,
+from models.api.responses.successful import (
     AuthorizedResponse,
     ConfigurationResponse,
-    ConversationData,
     ConversationDeleteResponse,
-    ConversationDetails,
     ConversationResponse,
     ConversationsListResponse,
     ConversationsListResponseV2,
@@ -28,9 +19,7 @@
     InfoResponse,
     LivenessResponse,
     MCPClientAuthOptionsResponse,
-    MCPServerAuthInfo,
     ModelsResponse,
-    ProviderHealthStatus,
     ProviderResponse,
     ProvidersListResponse,
     QueryResponse,
@@ -42,7 +31,24 @@
     StreamingQueryResponse,
     ToolsResponse,
 )
-from utils.types import ReferencedDocument, ToolCallSummary, ToolResultSummary
+from models.api.responses.successful.bases import AbstractSuccessfulResponse
+from models.common import (
+    ConversationData,
+    ConversationDetails,
+    MCPServerAuthInfo,
+    ProviderHealthStatus,
+)
+from models.common.turn_summary import (
+    ReferencedDocument,
+    ToolCallSummary,
+    ToolResultSummary,
+)
+from models.config import (
+    Configuration,
+    LlamaStackConfiguration,
+    ServiceConfiguration,
+    UserDataCollection,
+)
 
 
 class TestModelsResponse:
@@ -1108,7 +1114,6 @@ def test_openapi_response_structure(self) -> None:
 
         schema = content["schema"]
         assert schema["type"] == "string"
-        assert schema["format"] == "text/event-stream"
 
     def test_model_json_schema_has_examples(self) -> None:
         """Test that model_json_schema() includes examples.
diff --git a/tests/unit/models/responses/test_types.py b/tests/unit/models/responses/test_types.py
index 90e73a1f8..411248b97 100644
--- a/tests/unit/models/responses/test_types.py
+++ b/tests/unit/models/responses/test_types.py
@@ -3,7 +3,11 @@
 import pytest
 from pydantic import ValidationError
 
-from models.responses import ConversationData, ConversationDetails, ProviderHealthStatus
+from models.common import (
+    ConversationData,
+    ConversationDetails,
+    ProviderHealthStatus,
+)
 
 
 class TestConversationDetails:
diff --git a/tests/unit/models/rlsapi/test_responses.py b/tests/unit/models/rlsapi/test_responses.py
index f561b42f8..0511d5587 100644
--- a/tests/unit/models/rlsapi/test_responses.py
+++ b/tests/unit/models/rlsapi/test_responses.py
@@ -6,7 +6,7 @@
 import pytest
 from pydantic import BaseModel, ValidationError
 
-from models.responses import AbstractSuccessfulResponse
+from models.api.responses.successful.bases import AbstractSuccessfulResponse
 from models.rlsapi.responses import (
     RlsapiV1InferData,
     RlsapiV1InferResponse,
diff --git a/tests/unit/utils/test_conversations.py b/tests/unit/utils/test_conversations.py
index 2ddc65373..3003e2e35 100644
--- a/tests/unit/utils/test_conversations.py
+++ b/tests/unit/utils/test_conversations.py
@@ -10,6 +10,7 @@
 from pytest_mock import MockerFixture
 
 from constants import DEFAULT_RAG_TOOL
+from models.common.turn_summary import ToolCallSummary
 from models.database.conversations import UserTurn
 from utils.conversations import (
     _build_tool_call_summary_from_item,
@@ -18,7 +19,6 @@
     build_conversation_turns_from_items,
     get_all_conversation_items,
 )
-from utils.types import ToolCallSummary
 
 # Default conversation start time for tests
 DEFAULT_CONVERSATION_START_TIME = datetime.fromisoformat(
diff --git a/tests/unit/utils/test_endpoints.py b/tests/unit/utils/test_endpoints.py
index 03a88e8e1..2220d25d2 100644
--- a/tests/unit/utils/test_endpoints.py
+++ b/tests/unit/utils/test_endpoints.py
@@ -11,9 +11,12 @@
 from pytest_mock import MockerFixture
 from sqlalchemy.exc import SQLAlchemyError
 
+from models.common.responses.responses_conversation_context import (
+    ResponsesConversationContext,
+)
+from models.common.turn_summary import ReferencedDocument
 from models.database.conversations import UserConversation, UserTurn
 from utils import endpoints
-from utils.types import ReferencedDocument, ResponsesConversationContext
 
 
 @pytest.fixture(name="input_file")
diff --git a/tests/unit/utils/test_query.py b/tests/unit/utils/test_query.py
index f21f5189f..e13ec18ab 100644
--- a/tests/unit/utils/test_query.py
+++ b/tests/unit/utils/test_query.py
@@ -15,12 +15,13 @@
 
 from cache.cache_error import CacheError
 from configuration import AppConfig
-from models.api.responses import (
+from models.api.responses.error import (
     InternalServerErrorResponse,
     PromptTooLongResponse,
     QuotaExceededResponse,
 )
 from models.cache_entry import CacheEntry
+from models.common.turn_summary import TurnSummary
 from models.config import Action
 from models.database.conversations import UserConversation, UserTurn
 from models.requests import Attachment, QueryRequest
@@ -41,7 +42,6 @@
     validate_model_provider_override,
 )
 from utils.token_counter import TokenCounter
-from utils.types import TurnSummary
 
 
 @pytest.fixture(name="mock_config")
diff --git a/tests/unit/utils/test_shields.py b/tests/unit/utils/test_shields.py
index b11562704..9d4dd3c48 100644
--- a/tests/unit/utils/test_shields.py
+++ b/tests/unit/utils/test_shields.py
@@ -481,9 +481,10 @@ def test_raises_422_when_shield_ids_provided_and_override_disabled(
             validate_shield_ids_override(query_request, mock_config)
 
         assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
-        # pylint: disable=line-too-long
-        assert "Shield IDs customization is disabled" in exc_info.value.detail["response"]  # type: ignore
-        assert "disable_shield_ids_override" in exc_info.value.detail["cause"]  # type: ignore
+        detail = exc_info.value.detail
+        assert isinstance(detail, dict)
+        assert "Shield IDs customization is disabled" in detail["response"]
+        assert "disable_shield_ids_override" in detail["cause"]
 
     def test_raises_422_when_empty_list_shield_ids_and_override_disabled(
         self, mocker: MockerFixture
diff --git a/tests/unit/utils/test_transcripts.py b/tests/unit/utils/test_transcripts.py
index 10de4fe4c..aeeafe7a0 100644
--- a/tests/unit/utils/test_transcripts.py
+++ b/tests/unit/utils/test_transcripts.py
@@ -5,6 +5,7 @@
 from pytest_mock import MockerFixture
 
 from configuration import AppConfig
+from models.common.turn_summary import ToolCallSummary, ToolResultSummary, TurnSummary
 from models.requests import QueryRequest
 from utils.transcripts import (
     construct_transcripts_path,
@@ -12,7 +13,6 @@
     create_transcript_metadata,
     store_transcript,
 )
-from utils.types import ToolCallSummary, ToolResultSummary, TurnSummary
 
 
 def test_construct_transcripts_path(mocker: MockerFixture) -> None:
diff --git a/tests/unit/utils/test_types.py b/tests/unit/utils/test_types.py
index 6a62c5da5..8447054f0 100644
--- a/tests/unit/utils/test_types.py
+++ b/tests/unit/utils/test_types.py
@@ -11,12 +11,12 @@
 from pydantic import AnyUrl, ValidationError
 
 from models.common.responses.responses_api_params import ResponsesApiParams
-from utils.types import (
+from models.common.turn_summary import (
     ReferencedDocument,
     ToolCallSummary,
     ToolResultSummary,
-    content_to_str,
 )
+from utils.types import content_to_str
 
 
 class TestContentToStr:
diff --git a/tests/unit/utils/test_vector_search.py b/tests/unit/utils/test_vector_search.py
index 2aafab0a7..64bc33b10 100644
--- a/tests/unit/utils/test_vector_search.py
+++ b/tests/unit/utils/test_vector_search.py
@@ -6,8 +6,8 @@
 
 import constants
 from configuration import AppConfig
+from models.common.turn_summary import RAGChunk
 from models.requests import SolrVectorSearchRequest
-from utils.types import RAGChunk
 from utils.vector_search import (
     _build_document_url,
     _build_query_params,