From 9750785ccee561280132a9e1f7bbf84a4097c634 Mon Sep 17 00:00:00 2001
From: Andrej Simurka <asimurka@asimurka-thinkpadp1gen7.tpb.csb>
Date: Sun, 23 Nov 2025 15:05:16 +0100
Subject: [PATCH 1/4] Added response models with examples

---
 src/models/responses.py                       | 1341 +++++++++++------
 .../responses/test_authorized_response.py     |    1 -
 .../models/responses/test_error_responses.py  |  684 +++++++++
 .../models/responses/test_status_response.py  |   19 -
 .../responses/test_successful_responses.py    |  984 ++++++++++++
 .../responses/test_unauthorized_response.py   |   21 -
 6 files changed, 2533 insertions(+), 517 deletions(-)
 create mode 100644 tests/unit/models/responses/test_error_responses.py
 delete mode 100644 tests/unit/models/responses/test_status_response.py
 create mode 100644 tests/unit/models/responses/test_successful_responses.py
 delete mode 100644 tests/unit/models/responses/test_unauthorized_response.py

diff --git a/src/models/responses.py b/src/models/responses.py
index e05fc114..e98be0f7 100644
--- a/src/models/responses.py
+++ b/src/models/responses.py
@@ -2,92 +2,157 @@
 
 """Models for REST API responses."""
 
-from typing import Any, Optional, Union
+from typing import Any, ClassVar, Optional, Union
 
+from fastapi import status
 from pydantic import AnyUrl, BaseModel, Field
+from pydantic_core import SchemaError
 
-from llama_stack_client.types import ProviderInfo
+from quota.quota_exceed_error import QuotaExceedError
+from models.config import Action, Configuration
 
+BAD_REQUEST_DESCRIPTION = "Invalid request format"
+UNAUTHORIZED_DESCRIPTION = "Unauthorized"
+FORBIDDEN_DESCRIPTION = "Permission denied"
+NOT_FOUND_DESCRIPTION = "Resource not found"
+UNPROCESSABLE_CONTENT_DESCRIPTION = "Request validation failed"
+INVALID_FEEDBACK_PATH_DESCRIPTION = "Invalid feedback storage path"
+SERVICE_UNAVAILABLE_DESCRIPTION = "Service unavailable"
+QUOTA_EXCEEDED_DESCRIPTION = "Quota limit exceeded"
+INTERNAL_SERVER_ERROR_DESCRIPTION = "Internal server error"
 
-class ModelsResponse(BaseModel):
+
+class RAGChunk(BaseModel):
+    """Model representing a RAG chunk used in the response."""
+
+    content: str = Field(description="The content of the chunk")
+    source: str | None = Field(None, description="Source document or URL")
+    score: float | None = Field(None, description="Relevance score")
+
+
+class ToolCall(BaseModel):
+    """Model representing a tool call made during response generation."""
+
+    tool_name: str = Field(description="Name of the tool called")
+    arguments: dict[str, Any] = Field(description="Arguments passed to the tool")
+    result: dict[str, Any] | None = Field(None, description="Result from the tool")
+
+
+class AbstractSuccessfulResponse(BaseModel):
+    """Base class for all successful response models."""
+
+    @classmethod
+    def openapi_response(cls) -> dict[str, Any]:
+        """Generate FastAPI response dict with a single example from model_config."""
+        schema = cls.model_json_schema()
+        model_examples = schema.get("examples")
+        if not model_examples:
+            raise SchemaError(f"Examples not found in {cls.__name__}")
+        example_value = model_examples[0]
+        content = {"application/json": {"example": example_value}}
+
+        return {
+            "description": "Successful response",
+            "model": cls,
+            "content": content,
+        }
+
+
+class ModelsResponse(AbstractSuccessfulResponse):
     """Model representing a response to models request."""
 
     models: list[dict[str, Any]] = Field(
         ...,
         description="List of models available",
-        examples=[
-            {
-                "identifier": "openai/gpt-4-turbo",
-                "metadata": {},
-                "api_model_type": "llm",
-                "provider_id": "openai",
-                "type": "model",
-                "provider_resource_id": "gpt-4-turbo",
-                "model_type": "llm",
-            },
-            {
-                "identifier": "openai/gpt-3.5-turbo-0125",
-                "metadata": {},
-                "api_model_type": "llm",
-                "provider_id": "openai",
-                "type": "model",
-                "provider_resource_id": "gpt-3.5-turbo-0125",
-                "model_type": "llm",
-            },
-        ],
     )
 
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "models": [
+                        {
+                            "identifier": "openai/gpt-4-turbo",
+                            "metadata": {},
+                            "api_model_type": "llm",
+                            "provider_id": "openai",
+                            "type": "model",
+                            "provider_resource_id": "gpt-4-turbo",
+                            "model_type": "llm",
+                        },
+                    ],
+                }
+            ]
+        }
+    }
+
 
-class ToolsResponse(BaseModel):
+class ToolsResponse(AbstractSuccessfulResponse):
     """Model representing a response to tools request."""
 
     tools: list[dict[str, Any]] = Field(
         description=(
             "List of tools available from all configured MCP servers and built-in toolgroups"
         ),
-        examples=[
-            [
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
                 {
-                    "identifier": "filesystem_read",
-                    "description": "Read contents of a file from the filesystem",
-                    "parameters": [
+                    "tools": [
                         {
-                            "name": "path",
-                            "description": "Path to the file to read",
-                            "parameter_type": "string",
-                            "required": True,
-                            "default": None,
+                            "identifier": "filesystem_read",
+                            "description": "Read contents of a file from the filesystem",
+                            "parameters": [
+                                {
+                                    "name": "path",
+                                    "description": "Path to the file to read",
+                                    "parameter_type": "string",
+                                    "required": True,
+                                    "default": None,
+                                }
+                            ],
+                            "provider_id": "model-context-protocol",
+                            "toolgroup_id": "filesystem-tools",
+                            "server_source": "http://localhost:3000",
+                            "type": "tool",
                         }
                     ],
-                    "provider_id": "model-context-protocol",
-                    "toolgroup_id": "filesystem-tools",
-                    "server_source": "http://localhost:3000",
-                    "type": "tool",
                 }
             ]
-        ],
-    )
+        }
+    }
 
 
-class ShieldsResponse(BaseModel):
+class ShieldsResponse(AbstractSuccessfulResponse):
     """Model representing a response to shields request."""
 
     shields: list[dict[str, Any]] = Field(
         ...,
         description="List of shields available",
-        examples=[
-            {
-                "identifier": "lightspeed_question_validity-shield",
-                "provider_resource_id": "lightspeed_question_validity-shield",
-                "provider_id": "lightspeed_question_validity",
-                "type": "shield",
-                "params": {},
-            }
-        ],
     )
 
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "shields": [
+                        {
+                            "identifier": "lightspeed_question_validity-shield",
+                            "provider_resource_id": "lightspeed_question_validity-shield",
+                            "provider_id": "lightspeed_question_validity",
+                            "type": "shield",
+                            "params": {},
+                        }
+                    ],
+                }
+            ]
+        }
+    }
+
 
-class RAGInfoResponse(BaseModel):
+class RAGInfoResponse(AbstractSuccessfulResponse):
     """Model representing a response with information about RAG DB."""
 
     id: str = Field(
@@ -129,8 +194,25 @@ class RAGInfoResponse(BaseModel):
         examples=["completed"],
     )
 
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "id": "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
+                    "name": "Faiss Store with Knowledge base",
+                    "created_at": 1763391371,
+                    "last_active_at": 1763391371,
+                    "usage_bytes": 1024000,
+                    "expires_at": None,
+                    "object": "vector_store",
+                    "status": "completed",
+                }
+            ]
+        }
+    }
+
 
-class RAGListResponse(BaseModel):
+class RAGListResponse(AbstractSuccessfulResponse):
     """Model representing a response to list RAGs request."""
 
     rags: list[str] = Field(
@@ -143,80 +225,88 @@ class RAGListResponse(BaseModel):
         ],
     )
 
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "rags": [
+                        "vs_00000000-cafe-babe-0000-000000000000",
+                        "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
+                        "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3",
+                    ]
+                }
+            ]
+        }
+    }
+
 
-class ProvidersListResponse(BaseModel):
+class ProvidersListResponse(AbstractSuccessfulResponse):
     """Model representing a response to providers request."""
 
     providers: dict[str, list[dict[str, Any]]] = Field(
         ...,
         description="List of available API types and their corresponding providers",
-        examples=[
-            {
-                "inference": [
-                    {
-                        "provider_id": "sentence-transformers",
-                        "provider_type": "inline::sentence-transformers",
-                    },
-                    {"provider_id": "openai", "provider_type": "remote::openai"},
-                ],
-                "agents": [
-                    {
-                        "provider_id": "meta-reference",
-                        "provider_type": "inline::meta-reference",
-                    },
-                ],
-                "datasetio": [
-                    {
-                        "provider_id": "huggingface",
-                        "provider_type": "remote::huggingface",
-                    },
-                    {"provider_id": "localfs", "provider_type": "inline::localfs"},
-                ],
-            },
-        ],
     )
 
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "providers": {
+                        "inference": [
+                            {
+                                "provider_id": "sentence-transformers",
+                                "provider_type": "inline::sentence-transformers",
+                            },
+                            {
+                                "provider_id": "openai",
+                                "provider_type": "remote::openai",
+                            },
+                        ],
+                        "agents": [
+                            {
+                                "provider_id": "meta-reference",
+                                "provider_type": "inline::meta-reference",
+                            },
+                        ],
+                    },
+                }
+            ]
+        }
+    }
+
 
-class ProviderResponse(ProviderInfo):
+class ProviderResponse(AbstractSuccessfulResponse):
     """Model representing a response to get specific provider request."""
 
     api: str = Field(
         ...,
         description="The API this provider implements",
-        example="inference",
-    )  # type: ignore
+    )
     config: dict[str, Union[bool, float, str, list[Any], object, None]] = Field(
         ...,
         description="Provider configuration parameters",
-        example={"api_key": "********"},
-    )  # type: ignore
+    )
     health: dict[str, Union[bool, float, str, list[Any], object, None]] = Field(
         ...,
         description="Current health status of the provider",
-        example={"status": "OK", "message": "Healthy"},
-    )  # type: ignore
-    provider_id: str = Field(
-        ..., description="Unique provider identifier", example="openai"
-    )  # type: ignore
-    provider_type: str = Field(
-        ..., description="Provider implementation type", example="remote::openai"
-    )  # type: ignore
-
-
-class RAGChunk(BaseModel):
-    """Model representing a RAG chunk used in the response."""
-
-    content: str = Field(description="The content of the chunk")
-    source: Optional[str] = Field(None, description="Source document or URL")
-    score: Optional[float] = Field(None, description="Relevance score")
-
-
-class ToolCall(BaseModel):
-    """Model representing a tool call made during response generation."""
+    )
+    provider_id: str = Field(..., description="Unique provider identifier")
+    provider_type: str = Field(..., description="Provider implementation type")
 
-    tool_name: str = Field(description="Name of the tool called")
-    arguments: dict[str, Any] = Field(description="Arguments passed to the tool")
-    result: Optional[dict[str, Any]] = Field(None, description="Result from the tool")
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "api": "inference",
+                    "config": {"api_key": "********"},
+                    "health": {"status": "OK", "message": "Healthy"},
+                    "provider_id": "openai",
+                    "provider_type": "remote::openai",
+                }
+            ]
+        }
+    }
 
 
 class ConversationData(BaseModel):
@@ -241,14 +331,12 @@ class ReferencedDocument(BaseModel):
         doc_title: Title of the referenced doc.
     """
 
-    doc_url: Optional[AnyUrl] = Field(
-        None, description="URL of the referenced document"
-    )
+    doc_url: AnyUrl | None = Field(None, description="URL of the referenced document")
 
     doc_title: str | None = Field(None, description="Title of the referenced document")
 
 
-class QueryResponse(BaseModel):
+class QueryResponse(AbstractSuccessfulResponse):
     """Model representing LLM response to a query.
 
     Attributes:
@@ -263,7 +351,7 @@ class QueryResponse(BaseModel):
         available_quotas: Quota available as measured by all configured quota limiters.
     """
 
-    conversation_id: Optional[str] = Field(
+    conversation_id: str | None = Field(
         None,
         description="The optional conversation ID (UUID)",
         examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"],
@@ -281,7 +369,7 @@ class QueryResponse(BaseModel):
         description="List of RAG chunks used to generate the response",
     )
 
-    tool_calls: Optional[list[ToolCall]] = Field(
+    tool_calls: list[ToolCall] | None = Field(
         None,
         description="List of tool calls made during response generation",
     )
@@ -324,7 +412,6 @@ class QueryResponse(BaseModel):
         examples=[{"daily": 1000, "monthly": 50000}],
     )
 
-    # provides examples for /docs endpoint
     model_config = {
         "json_schema_extra": {
             "examples": [
@@ -362,22 +449,13 @@ class QueryResponse(BaseModel):
     }
 
 
-class InfoResponse(BaseModel):
+class InfoResponse(AbstractSuccessfulResponse):
     """Model representing a response to an info request.
 
     Attributes:
         name: Service name.
         service_version: Service version.
         llama_stack_version: Llama Stack version.
-
-    Example:
-        ```python
-        info_response = InfoResponse(
-            name="Lightspeed Stack",
-            service_version="1.0.0",
-            llama_stack_version="0.2.22",
-        )
-        ```
     """
 
     name: str = Field(
@@ -395,7 +473,6 @@ class InfoResponse(BaseModel):
         examples=["0.2.1", "0.2.2", "0.2.18", "0.2.21", "0.2.22"],
     )
 
-    # provides examples for /docs endpoint
     model_config = {
         "json_schema_extra": {
             "examples": [
@@ -425,35 +502,20 @@ class ProviderHealthStatus(BaseModel):
         description="The health status",
         examples=["ok", "unhealthy", "not_implemented"],
     )
-    message: Optional[str] = Field(
+    message: str | None = Field(
         None,
         description="Optional message about the health status",
         examples=["All systems operational", "Llama Stack is unavailable"],
     )
 
 
-class ReadinessResponse(BaseModel):
+class ReadinessResponse(AbstractSuccessfulResponse):
     """Model representing response to a readiness request.
 
     Attributes:
         ready: If service is ready.
         reason: The reason for the readiness.
         providers: List of unhealthy providers in case of readiness failure.
-
-    Example:
-        ```python
-        readiness_response = ReadinessResponse(
-            ready=False,
-            reason="Service is not ready",
-            providers=[
-                ProviderHealthStatus(
-                    provider_id="ollama",
-                    status="unhealthy",
-                    message="Server is unavailable"
-                )
-            ]
-        )
-        ```
     """
 
     ready: bool = Field(
@@ -474,7 +536,6 @@ class ReadinessResponse(BaseModel):
         examples=[],
     )
 
-    # provides examples for /docs endpoint
     model_config = {
         "json_schema_extra": {
             "examples": [
@@ -488,16 +549,11 @@ class ReadinessResponse(BaseModel):
     }
 
 
-class LivenessResponse(BaseModel):
+class LivenessResponse(AbstractSuccessfulResponse):
     """Model representing a response to a liveness request.
 
     Attributes:
         alive: If app is alive.
-
-    Example:
-        ```python
-        liveness_response = LivenessResponse(alive=True)
-        ```
     """
 
     alive: bool = Field(
@@ -506,7 +562,6 @@ class LivenessResponse(BaseModel):
         examples=[True, False],
     )
 
-    # provides examples for /docs endpoint
     model_config = {
         "json_schema_extra": {
             "examples": [
@@ -518,55 +573,11 @@ class LivenessResponse(BaseModel):
     }
 
 
-class NotAvailableResponse(BaseModel):
-    """Model representing error response for readiness endpoint."""
-
-    detail: dict[str, str] = Field(
-        ...,
-        description="Detailed information about readiness state",
-        examples=[
-            {
-                "response": "Service is not ready",
-                "cause": "Index is not ready",
-            },
-            {
-                "response": "Service is not ready",
-                "cause": "LLM is not ready",
-            },
-        ],
-    )
-
-    # provides examples for /docs endpoint
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "detail": {
-                        "response": "Service is not ready",
-                        "cause": "Index is not ready",
-                    }
-                },
-                {
-                    "detail": {
-                        "response": "Service is not ready",
-                        "cause": "LLM is not ready",
-                    },
-                },
-            ]
-        }
-    }
-
-
-class FeedbackResponse(BaseModel):
+class FeedbackResponse(AbstractSuccessfulResponse):
     """Model representing a response to a feedback request.
 
     Attributes:
         response: The response of the feedback request.
-
-    Example:
-        ```python
-        feedback_response = FeedbackResponse(response="feedback received")
-        ```
     """
 
     response: str = Field(
@@ -575,7 +586,6 @@ class FeedbackResponse(BaseModel):
         examples=["feedback received"],
     )
 
-    # provides examples for /docs endpoint
     model_config = {
         "json_schema_extra": {
             "examples": [
@@ -587,20 +597,12 @@ class FeedbackResponse(BaseModel):
     }
 
 
-class StatusResponse(BaseModel):
+class StatusResponse(AbstractSuccessfulResponse):
     """Model representing a response to a status request.
 
     Attributes:
         functionality: The functionality of the service.
         status: The status of the service.
-
-    Example:
-        ```python
-        status_response = StatusResponse(
-            functionality="feedback",
-            status={"enabled": True},
-        )
-        ```
     """
 
     functionality: str = Field(
@@ -615,7 +617,6 @@ class StatusResponse(BaseModel):
         examples=[{"enabled": True}],
     )
 
-    # provides examples for /docs endpoint
     model_config = {
         "json_schema_extra": {
             "examples": [
@@ -628,7 +629,7 @@ class StatusResponse(BaseModel):
     }
 
 
-class AuthorizedResponse(BaseModel):
+class AuthorizedResponse(AbstractSuccessfulResponse):
     """Model representing a response to an authorization request.
 
     Attributes:
@@ -653,7 +654,6 @@ class AuthorizedResponse(BaseModel):
         examples=[True, False],
     )
 
-    # provides examples for /docs endpoint
     model_config = {
         "json_schema_extra": {
             "examples": [
@@ -667,29 +667,12 @@ class AuthorizedResponse(BaseModel):
     }
 
 
-class ConversationResponse(BaseModel):
+class ConversationResponse(AbstractSuccessfulResponse):
     """Model representing a response for retrieving a conversation.
 
     Attributes:
         conversation_id: The conversation ID (UUID).
         chat_history: The simplified chat history as a list of conversation turns.
-
-    Example:
-        ```python
-        conversation_response = ConversationResponse(
-            conversation_id="123e4567-e89b-12d3-a456-426614174000",
-            chat_history=[
-                {
-                    "messages": [
-                        {"content": "Hello", "type": "user"},
-                        {"content": "Hi there!", "type": "assistant"}
-                    ],
-                    "started_at": "2024-01-01T00:01:00Z",
-                    "completed_at": "2024-01-01T00:01:05Z"
-                }
-            ]
-        )
-        ```
     """
 
     conversation_id: str = Field(
@@ -713,7 +696,6 @@ class ConversationResponse(BaseModel):
         ],
     )
 
-    # provides examples for /docs endpoint
     model_config = {
         "json_schema_extra": {
             "examples": [
@@ -735,41 +717,100 @@ class ConversationResponse(BaseModel):
     }
 
 
-class ConversationDeleteResponse(BaseModel):
+class ConversationDeleteResponse(AbstractSuccessfulResponse):
     """Model representing a response for deleting a conversation.
 
     Attributes:
         conversation_id: The conversation ID (UUID) that was deleted.
         success: Whether the deletion was successful.
         response: A message about the deletion result.
-
-    Example:
-        ```python
-        delete_response = ConversationDeleteResponse(
-            conversation_id="123e4567-e89b-12d3-a456-426614174000",
-            success=True,
-            response="Conversation deleted successfully"
-        )
-        ```
     """
 
-    conversation_id: str
-    success: bool
-    response: str
+    conversation_id: str = Field(
+        ...,
+        description="The conversation ID (UUID) that was deleted.",
+        examples=["123e4567-e89b-12d3-a456-426614174000"],
+    )
+    success: bool = Field(
+        ..., description="Whether the deletion was successful.", examples=[True, False]
+    )
+    response: str = Field(
+        ...,
+        description="A message about the deletion result.",
+        examples=[
+            "Conversation deleted successfully",
+            "Conversation cannot be deleted",
+        ],
+    )
+
+    def __init__(self, *, deleted: bool, conversation_id: str) -> None:
+        """Initialize a ConversationDeleteResponse.
+
+        Args:
+            deleted: Whether the conversation was successfully deleted.
+            conversation_id: The ID of the conversation that was deleted.
+        """
+        response_msg = (
+            "Conversation deleted successfully"
+            if deleted
+            else "Conversation cannot be deleted"
+        )
+        super().__init__(
+            conversation_id=conversation_id,  # type: ignore[call-arg]
+            success=True,  # type: ignore[call-arg]
+            response=response_msg,  # type: ignore[call-arg]
+        )
 
-    # provides examples for /docs endpoint
     model_config = {
         "json_schema_extra": {
             "examples": [
                 {
-                    "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                    "success": True,
-                    "response": "Conversation deleted successfully",
-                }
+                    "label": "deleted",
+                    "value": {
+                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                        "success": True,
+                        "response": "Conversation deleted successfully",
+                    },
+                },
+                {
+                    "label": "not found",
+                    "value": {
+                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                        "success": True,
+                        "response": "Conversation can not be deleted",
+                    },
+                },
             ]
         }
     }
 
+    @classmethod
+    def openapi_response(cls) -> dict[str, Any]:
+        """Generate FastAPI response dict, using examples from model_config."""
+        schema = cls.model_json_schema()
+        model_examples = schema.get("examples", [])
+
+        named_examples: dict[str, Any] = {}
+
+        for ex in model_examples:
+            label = ex.get("label")
+            if label is None:
+                raise SchemaError(f"Example {ex} in {cls.__name__} has no label")
+
+            value = ex.get("value")
+            if value is None:
+                raise SchemaError(f"Example '{label}' in {cls.__name__} has no value")
+
+            named_examples[label] = {"value": value}
+
+        content = {"application/json": {"examples": named_examples or None}}
+
+        return {
+            "description": "Successful response",
+            "model": cls,
+            "content": content,
+        }
+
 
 class ConversationDetails(BaseModel):
     """Model representing the details of a user conversation.
@@ -782,19 +823,6 @@ class ConversationDetails(BaseModel):
         last_used_model: The last model used for the conversation.
         last_used_provider: The provider of the last used model.
         topic_summary: The topic summary for the conversation.
-
-    Example:
-        ```python
-        conversation = ConversationDetails(
-            conversation_id="123e4567-e89b-12d3-a456-426614174000"
-            created_at="2024-01-01T00:00:00Z",
-            last_message_at="2024-01-01T00:05:00Z",
-            message_count=5,
-            last_used_model="gemini/gemini-2.0-flash",
-            last_used_provider="gemini",
-            topic_summary="Openshift Microservices Deployment Strategies",
-        )
-        ```
     """
 
     conversation_id: str = Field(
@@ -803,78 +831,52 @@ class ConversationDetails(BaseModel):
         examples=["c5260aec-4d82-4370-9fdf-05cf908b3f16"],
     )
 
-    created_at: Optional[str] = Field(
+    created_at: str | None = Field(
         None,
         description="When the conversation was created",
         examples=["2024-01-01T01:00:00Z"],
     )
 
-    last_message_at: Optional[str] = Field(
+    last_message_at: str | None = Field(
         None,
         description="When the last message was sent",
         examples=["2024-01-01T01:00:00Z"],
     )
 
-    message_count: Optional[int] = Field(
+    message_count: int | None = Field(
         None,
         description="Number of user messages in the conversation",
         examples=[42],
     )
 
-    last_used_model: Optional[str] = Field(
+    last_used_model: str | None = Field(
         None,
         description="Identification of the last model used for the conversation",
         examples=["gpt-4-turbo", "gpt-3.5-turbo-0125"],
     )
 
-    last_used_provider: Optional[str] = Field(
+    last_used_provider: str | None = Field(
         None,
         description="Identification of the last provider used for the conversation",
         examples=["openai", "gemini"],
     )
 
-    topic_summary: Optional[str] = Field(
+    topic_summary: str | None = Field(
         None,
         description="Topic summary for the conversation",
         examples=["Openshift Microservices Deployment Strategies"],
     )
 
 
-class ConversationsListResponse(BaseModel):
+class ConversationsListResponse(AbstractSuccessfulResponse):
     """Model representing a response for listing conversations of a user.
 
     Attributes:
         conversations: List of conversation details associated with the user.
-
-    Example:
-        ```python
-        conversations_list = ConversationsListResponse(
-            conversations=[
-                ConversationDetails(
-                    conversation_id="123e4567-e89b-12d3-a456-426614174000",
-                    created_at="2024-01-01T00:00:00Z",
-                    last_message_at="2024-01-01T00:05:00Z",
-                    message_count=5,
-                    last_used_model="gemini/gemini-2.0-flash",
-                    last_used_provider="gemini",
-                    topic_summary="Openshift Microservices Deployment Strategies",
-                ),
-                ConversationDetails(
-                    conversation_id="456e7890-e12b-34d5-a678-901234567890"
-                    created_at="2024-01-01T01:00:00Z",
-                    message_count=2,
-                    last_used_model="gemini/gemini-2.0-flash",
-                    last_used_provider="gemini",
-                    topic_summary="RHDH Purpose Summary",
-                )
-            ]
-        )
-        ```
     """
 
     conversations: list[ConversationDetails]
 
-    # provides examples for /docs endpoint
     model_config = {
         "json_schema_extra": {
             "examples": [
@@ -904,7 +906,7 @@ class ConversationsListResponse(BaseModel):
     }
 
 
-class ConversationsListResponseV2(BaseModel):
+class ConversationsListResponseV2(AbstractSuccessfulResponse):
     """Model representing a response for listing conversations of a user.
 
     Attributes:
@@ -913,67 +915,33 @@ class ConversationsListResponseV2(BaseModel):
 
     conversations: list[ConversationData]
 
-
-class ErrorResponse(BaseModel):
-    """Model representing error response for query endpoint."""
-
-    detail: dict[str, str] = Field(
-        description="Error details",
-        examples=[
-            {
-                "response": "Error while validation question",
-                "cause": "Failed to handle request to https://bam-api.res.ibm.com/v2/text",
-            },
-            {
-                "response": "Error retrieving conversation history",
-                "cause": "Invalid conversation ID 1237-e89b-12d3-a456-426614174000",
-            },
-        ],
-    )
-
     model_config = {
         "json_schema_extra": {
             "examples": [
                 {
-                    "detail": {
-                        "response": "Error while validation question",
-                        "cause": "Failed to handle request to https://bam-api.res.ibm.com/v2/text",
-                    },
-                },
-                {
-                    "detail": {
-                        "response": "Error retrieving conversation history",
-                        "cause": "Invalid conversation ID 1237-e89b-12d3-a456-426614174000",
-                    },
-                },
+                    "conversations": [
+                        {
+                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                            "topic_summary": "Openshift Microservices Deployment Strategies",
+                            "last_message_timestamp": 1704067200.0,
+                        }
+                    ],
+                }
             ]
         }
     }
 
 
-class FeedbackStatusUpdateResponse(BaseModel):
+class FeedbackStatusUpdateResponse(AbstractSuccessfulResponse):
     """
     Model representing a response to a feedback status update request.
 
     Attributes:
         status: The previous and current status of the service and who updated it.
-
-    Example:
-        ```python
-        status_response = StatusResponse(
-            status={
-                "previous_status": true,
-                "updated_status": false,
-                "updated_by": "user/test",
-                "timestamp": "2023-03-15 12:34:56"
-            },
-        )
-        ```
     """
 
     status: dict
 
-    # provides examples for /docs endpoint
     model_config = {
         "json_schema_extra": {
             "examples": [
@@ -990,40 +958,124 @@ class FeedbackStatusUpdateResponse(BaseModel):
     }
 
 
-class ConversationUpdateResponse(BaseModel):
+class ConversationUpdateResponse(AbstractSuccessfulResponse):
     """Model representing a response for updating a conversation topic summary.
 
     Attributes:
         conversation_id: The conversation ID (UUID) that was updated.
         success: Whether the update was successful.
         message: A message about the update result.
-
-    Example:
-        ```python
-        update_response = ConversationUpdateResponse(
-            conversation_id="123e4567-e89b-12d3-a456-426614174000",
-            success=True,
-            message="Topic summary updated successfully",
-        )
-        ```
     """
 
     conversation_id: str = Field(
         ...,
         description="The conversation ID (UUID) that was updated",
-        examples=["123e4567-e89b-12d3-a456-426614174000"],
     )
     success: bool = Field(
         ...,
         description="Whether the update was successful",
-        examples=[True],
     )
     message: str = Field(
         ...,
         description="A message about the update result",
-        examples=["Topic summary updated successfully"],
     )
 
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                    "success": True,
+                    "message": "Topic summary updated successfully",
+                }
+            ]
+        }
+    }
+
+
+class ConfigurationResponse(AbstractSuccessfulResponse):
+    """Success response model for the config endpoint."""
+
+    configuration: Configuration
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "configuration": {
+                        "name": "lightspeed-stack",
+                        "service": {
+                            "host": "localhost",
+                            "port": 8080,
+                            "auth_enabled": False,
+                            "workers": 1,
+                            "color_log": True,
+                            "access_log": True,
+                            "tls_config": {
+                                "tls_certificate_path": None,
+                                "tls_key_path": None,
+                                "tls_key_password": None,
+                            },
+                            "cors": {
+                                "allow_origins": ["*"],
+                                "allow_credentials": False,
+                                "allow_methods": ["*"],
+                                "allow_headers": ["*"],
+                            },
+                        },
+                        "llama_stack": {
+                            "url": "http://localhost:8321",
+                            "api_key": "*****",
+                            "use_as_library_client": False,
+                            "library_client_config_path": None,
+                        },
+                        "user_data_collection": {
+                            "feedback_enabled": True,
+                            "feedback_storage": "/tmp/data/feedback",
+                            "transcripts_enabled": False,
+                            "transcripts_storage": "/tmp/data/transcripts",
+                        },
+                        "database": {
+                            "sqlite": {"db_path": "/tmp/lightspeed-stack.db"},
+                            "postgres": None,
+                        },
+                        "mcp_servers": [
+                            {
+                                "name": "server1",
+                                "provider_id": "provider1",
+                                "url": "http://url.com:1",
+                            },
+                        ],
+                        "authentication": {
+                            "module": "noop",
+                            "skip_tls_verification": False,
+                        },
+                        "authorization": {"access_rules": []},
+                        "customization": None,
+                        "inference": {
+                            "default_model": "gpt-4-turbo",
+                            "default_provider": "openai",
+                        },
+                        "conversation_cache": {
+                            "type": None,
+                            "memory": None,
+                            "sqlite": None,
+                            "postgres": None,
+                        },
+                        "byok_rag": [],
+                        "quota_handlers": {
+                            "sqlite": None,
+                            "postgres": None,
+                            "limiters": [],
+                            "scheduler": {"period": 1},
+                            "enable_token_history": False,
+                        },
+                    }
+                }
+            ]
+        }
+    }
+
 
 class DetailModel(BaseModel):
     """Nested detail model for error responses."""
@@ -1033,264 +1085,601 @@ class DetailModel(BaseModel):
 
 
 class AbstractErrorResponse(BaseModel):
-    """Base class for all error responses.
+    """
+    Base class for error responses.
 
-    Contains a nested `detail` field.
+    Attributes:
+        status_code (int): HTTP status code for the error response.
+        detail (DetailModel): The detail model containing error summary and cause.
     """
 
+    status_code: int
     detail: DetailModel
 
-    def dump_detail(self) -> dict:
-        """Return dict in FastAPI HTTPException format."""
-        return self.detail.model_dump()
+    def __init__(self, *, response: str, cause: str, status_code: int):
+        """Initialize an AbstractErrorResponse.
 
-
-class BadRequestResponse(AbstractErrorResponse):
-    """400 Bad Request - Invalid resource identifier."""
-
-    def __init__(self, resource: str, resource_id: str):
-        """Initialize a BadRequestResponse for invalid resource identifiers."""
+        Args:
+            response: Short summary of the error.
+            cause: Detailed explanation of what caused the error.
+            status_code: HTTP status code for the error response.
+        """
         super().__init__(
-            detail=DetailModel(
-                response="Invalid conversation ID format",
-                cause=f"{resource.title()} ID {resource_id} has invalid format",
-            )
+            status_code=status_code, detail=DetailModel(response=response, cause=cause)
         )
 
-    model_config = {
-        "json_schema_extra": {
-            "examples": [
-                {
-                    "detail": {
-                        "response": "Invalid conversation ID format",
-                        "cause": "Conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format",  # pylint: disable=line-too-long
-                    }
-                }
-            ]
+    @classmethod
+    def get_description(cls) -> str:
+        """Get the description from the class attribute or docstring."""
+        return getattr(cls, "description", cls.__doc__ or "")
+
+    @classmethod
+    def openapi_response(cls, examples: Optional[list[str]] = None) -> dict[str, Any]:
+        """Generate FastAPI response dict with examples from model_config."""
+        schema = cls.model_json_schema()
+        model_examples = schema.get("examples", [])
+
+        named_examples: dict[str, Any] = {}
+        for ex in model_examples:
+            label = ex.get("label", None)
+            if label is None:
+                raise SchemaError(f"Example {ex} in {cls.__name__} has no label")
+            if examples is None or label in examples:
+                detail = ex.get("detail")
+                if detail is not None:
+                    named_examples[label] = {"value": {"detail": detail}}
+
+        content: dict[str, Any] = {
+            "application/json": {"examples": named_examples or None}
         }
-    }
 
+        return {
+            "description": cls.get_description(),
+            "model": cls,
+            "content": content,
+        }
 
-class AccessDeniedResponse(AbstractErrorResponse):
-    """403 Access Denied - User does not have permission to perform the action."""
 
-    def __init__(self, user_id: str, resource: str, resource_id: str, action: str):
-        """Initialize an AccessDeniedResponse when user lacks permission for an action."""
-        super().__init__(
-            detail=DetailModel(
-                response="Access denied",
-                cause=f"User {user_id} does not have permission to {action} {resource} with ID {resource_id}.",  # pylint: disable=line-too-long
-            )
-        )
+class BadRequestResponse(AbstractErrorResponse):
+    """400 Bad Request. Invalid resource identifier."""
 
+    description: ClassVar[str] = BAD_REQUEST_DESCRIPTION
     model_config = {
         "json_schema_extra": {
             "examples": [
                 {
+                    "label": "conversation_id",
                     "detail": {
-                        "response": "Access denied",
-                        "cause": "User 6789 does not have permission to access conversation with ID 123e4567-e89b-12d3-a456-426614174000.",  # pylint: disable=line-too-long
-                    }
+                        "response": "Invalid conversation ID format",
+                        "cause": (
+                            "The conversation ID "
+                            "123e4567-e89b-12d3-a456-426614174000 has invalid format."
+                        ),
+                    },
                 }
             ]
         }
     }
 
+    def __init__(self, *, resource: str, resource_id: str):
+        """Initialize a BadRequestResponse for invalid resource ID format.
 
-class NotFoundResponse(AbstractErrorResponse):
-    """404 Not Found - Resource does not exist."""
-
-    def __init__(self, resource: str, resource_id: str):
-        """Initialize a NotFoundResponse when a resource cannot be located."""
+        Args:
+            resource: The type of resource (e.g., "conversation", "provider").
+            resource_id: The invalid resource ID.
+        """
+        response = f"Invalid {resource} ID format"
+        cause = f"The {resource} ID {resource_id} has invalid format."
         super().__init__(
-            detail=DetailModel(
-                response=f"{resource.title()} not found",
-                cause=f"{resource.title()} with ID {resource_id} does not exist.",
-            )
+            response=response, cause=cause, status_code=status.HTTP_400_BAD_REQUEST
         )
 
+
+class UnauthorizedResponse(AbstractErrorResponse):
+    """401 Unauthorized - Missing or invalid credentials."""
+
+    description: ClassVar[str] = UNAUTHORIZED_DESCRIPTION
     model_config = {
         "json_schema_extra": {
             "examples": [
                 {
+                    "label": "missing header",
                     "detail": {
-                        "response": "Conversation not found",
-                        "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist.",  # pylint: disable=line-too-long
-                    }
-                }
+                        "response": "Missing or invalid credentials provided by client",
+                        "cause": "No Authorization header found",
+                    },
+                },
+                {
+                    "label": "missing token",
+                    "detail": {
+                        "response": "Missing or invalid credentials provided by client",
+                        "cause": "No token found in Authorization header",
+                    },
+                },
+                {
+                    "label": "expired token",
+                    "detail": {
+                        "response": "Missing or invalid credentials provided by client",
+                        "cause": "Token has expired",
+                    },
+                },
+                {
+                    "label": "invalid signature",
+                    "detail": {
+                        "response": "Missing or invalid credentials provided by client",
+                        "cause": "Invalid token signature",
+                    },
+                },
+                {
+                    "label": "invalid key",
+                    "detail": {
+                        "response": "Missing or invalid credentials provided by client",
+                        "cause": "Token signed by unknown key",
+                    },
+                },
+                {
+                    "label": "missing claim",
+                    "detail": {
+                        "response": "Missing or invalid credentials provided by client",
+                        "cause": "Token missing claim: user_id",
+                    },
+                },
+                {
+                    "label": "invalid k8s token",
+                    "detail": {
+                        "response": "Missing or invalid credentials provided by client",
+                        "cause": "Invalid or expired Kubernetes token",
+                    },
+                },
+                {
+                    "label": "invalid jwk token",
+                    "detail": {
+                        "response": "Missing or invalid credentials provided by client",
+                        "cause": "Authentication key server returned invalid data",
+                    },
+                },
             ]
         }
     }
 
-
-class ServiceUnavailableResponse(AbstractErrorResponse):
-    """503 Backend Unavailable - Unable to reach backend service."""
-
-    def __init__(self, backend_name: str, cause: str):
-        """Initialize a ServiceUnavailableResponse when a backend service is unreachable."""
+    def __init__(self, *, cause: str):
+        """Initialize UnauthorizedResponse."""
+        response_msg = "Missing or invalid credentials provided by client"
         super().__init__(
-            detail=DetailModel(
-                response=f"Unable to connect to {backend_name}", cause=cause
-            )
+            response=response_msg, cause=cause, status_code=status.HTTP_401_UNAUTHORIZED
         )
 
+
+class ForbiddenResponse(AbstractErrorResponse):
+    """403 Forbidden. Access denied."""
+
+    description: ClassVar[str] = FORBIDDEN_DESCRIPTION
     model_config = {
         "json_schema_extra": {
             "examples": [
                 {
+                    "label": "conversation read",
                     "detail": {
-                        "response": "Unable to connect to Llama Stack",
-                        "cause": "Connection error while trying to reach Llama Stack API.",
-                    }
-                }
+                        "response": "User does not have permission to perform this action",
+                        "cause": (
+                            "User 6789 does not have permission to read conversation "
+                            "with ID 123e4567-e89b-12d3-a456-426614174000"
+                        ),
+                    },
+                },
+                {
+                    "label": "conversation delete",
+                    "detail": {
+                        "response": "User does not have permission to perform this action",
+                        "cause": (
+                            "User 6789 does not have permission to delete conversation "
+                            "with ID 123e4567-e89b-12d3-a456-426614174000"
+                        ),
+                    },
+                },
+                {
+                    "label": "endpoint",
+                    "detail": {
+                        "response": "User does not have permission to access this endpoint",
+                        "cause": "User 6789 is not authorized to access this endpoint.",
+                    },
+                },
+                {
+                    "label": "feedback",
+                    "detail": {
+                        "response": "Storing feedback is disabled.",
+                        "cause": "Storing feedback is disabled.",
+                    },
+                },
+                {
+                    "label": "model override",
+                    "detail": {
+                        "response": (
+                            "This instance does not permit overriding model/provider in the "
+                            "query request (missing permission: MODEL_OVERRIDE). Please remove "
+                            "the model and provider fields from your request."
+                        ),
+                        "cause": (
+                            "User lacks model_override permission required "
+                            "to override model/provider."
+                        ),
+                    },
+                },
             ]
         }
     }
 
+    @classmethod
+    def conversation(
+        cls, action: str, resource_id: str, user_id: str
+    ) -> "ForbiddenResponse":
+        """Create a ForbiddenResponse for conversation access denied."""
+        response = "User does not have permission to perform this action"
+        cause = (
+            f"User {user_id} does not have permission to "
+            f"{action} conversation with ID {resource_id}"
+        )
+        return cls(response=response, cause=cause)
+
+    @classmethod
+    def endpoint(cls, user_id: str) -> "ForbiddenResponse":
+        """Create a ForbiddenResponse for endpoint access denied."""
+        response = "User does not have permission to access this endpoint"
+        cause = f"User {user_id} is not authorized to access this endpoint."
+        return cls(response=response, cause=cause)
+
+    @classmethod
+    def feedback_disabled(cls) -> "ForbiddenResponse":
+        """Create a ForbiddenResponse for disabled feedback."""
+        return cls(
+            response="Feedback is disabled",
+            cause="Storing feedback is disabled.",
+        )
 
-class UnauthorizedResponse(AbstractErrorResponse):
-    """401 Unauthorized - Missing or invalid credentials."""
+    @classmethod
+    def model_override(cls) -> "ForbiddenResponse":
+        """Create a ForbiddenResponse for model/provider override denied."""
+        return cls(
+            response=(
+                "This instance does not permit overriding model/provider in the "
+                "query request (missing permission: MODEL_OVERRIDE). Please remove "
+                "the model and provider fields from your request."
+            ),
+            cause=(
+                f"User lacks {Action.MODEL_OVERRIDE.value} permission required "
+                "to override model/provider."
+            ),
+        )
 
-    def __init__(self, user_id: str | None = None):
-        """Initialize an UnauthorizedResponse when authentication fails."""
-        cause_msg = (
-            f"User {user_id} is unauthorized"
-            if user_id
-            else "Missing or invalid credentials provided by client"
+    def __init__(self, *, response: str, cause: str):
+        """Initialize a ForbiddenResponse."""
+        super().__init__(
+            response=response, cause=cause, status_code=status.HTTP_403_FORBIDDEN
         )
-        super().__init__(detail=DetailModel(response="Unauthorized", cause=cause_msg))
 
+
+class NotFoundResponse(AbstractErrorResponse):
+    """404 Not Found - Resource does not exist."""
+
+    description: ClassVar[str] = NOT_FOUND_DESCRIPTION
     model_config = {
         "json_schema_extra": {
             "examples": [
                 {
+                    "label": "conversation",
                     "detail": {
-                        "response": "Unauthorized",
-                        "cause": "Missing or invalid credentials provided by client",
-                    }
-                }
+                        "response": "Conversation not found",
+                        "cause": (
+                            "Conversation with ID "
+                            "123e4567-e89b-12d3-a456-426614174000 does not exist"
+                        ),
+                    },
+                },
+                {
+                    "label": "provider",
+                    "detail": {
+                        "response": "Provider not found",
+                        "cause": "Provider with ID openai does not exist",
+                    },
+                },
+                {
+                    "label": "model",
+                    "detail": {
+                        "response": "Model not found",
+                        "cause": "Model with ID gpt-4-turbo is not configured",
+                    },
+                },
+                {
+                    "label": "rag",
+                    "detail": {
+                        "response": "Rag not found",
+                        "cause": (
+                            "Rag with ID vs_7b52a8cf-0fa3-489c-beab-27e061d102f3 does not exist"
+                        ),
+                    },
+                },
             ]
         }
     }
 
+    def __init__(self, *, resource: str, resource_id: str):
+        """Initialize a NotFoundResponse for a missing resource.
 
-class ForbiddenResponse(UnauthorizedResponse):
-    """403 Forbidden - User does not have access to this resource."""
-
-    def __init__(self, user_id: str, resource: str, resource_id: str):
-        """Initialize a ForbiddenResponse when user is authenticated but lacks resource access."""
-        super().__init__(user_id=user_id)
-        self.detail = DetailModel(
-            response="Access denied",
-            cause=f"User {user_id} is not allowed to access {resource} with ID {resource_id}.",
+        Args:
+            resource: The type of resource that was not found (e.g., "conversation", "model").
+            resource_id: The ID of the resource that was not found.
+        """
+        response = f"{resource.title()} not found"
+        cause = f"{resource.title()} with ID {resource_id} does not exist"
+        super().__init__(
+            response=response, cause=cause, status_code=status.HTTP_404_NOT_FOUND
         )
 
+
+class UnprocessableEntityResponse(AbstractErrorResponse):
+    """422 Unprocessable Entity - Request validation failed."""
+
+    description: ClassVar[str] = UNPROCESSABLE_CONTENT_DESCRIPTION
     model_config = {
         "json_schema_extra": {
             "examples": [
                 {
+                    "label": "invalid format",
                     "detail": {
-                        "response": "Access denied",
-                        "cause": "User 42 is not allowed to access conversation with ID 123e4567-e89b-12d3-a456-426614174000.",  # pylint: disable=line-too-long
-                    }
-                }
+                        "response": "Invalid request format",
+                        "cause": "Invalid request format. The request body could not be parsed.",
+                    },
+                },
+                {
+                    "label": "missing attributes",
+                    "detail": {
+                        "response": "Missing required attributes",
+                        "cause": "Missing required attributes: ['query', 'model', 'provider']",
+                    },
+                },
+                {
+                    "label": "invalid value",
+                    "detail": {
+                        "response": "Invalid attribute value",
+                        "cause": "Invalid attatchment type: must be one of ['text/plain', "
+                        "'application/json', 'application/yaml', 'application/xml']",
+                    },
+                },
             ]
         }
     }
 
-
-class QuotaExceededResponse(AbstractErrorResponse):
-    """429 Too Many Requests - LLM quota exceeded."""
-
-    def __init__(
-        self,
-        user_id: str,
-        model_name: str,  # pylint: disable=unused-argument
-        limit: int,  # pylint: disable=unused-argument
-    ):
-        """Initialize a QuotaExceededResponse."""
+    def __init__(self, *, response: str, cause: str):
+        """Initialize UnprocessableEntityResponse."""
         super().__init__(
-            detail=DetailModel(
-                response="The quota has been exceeded",
-                cause=(f"User {user_id} has no available tokens."),
-            )
+            response=response,
+            cause=cause,
+            status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
         )
-        # TODO(LCORE-837): add factories for custom cause creation
 
+
+class QuotaExceededResponse(AbstractErrorResponse):
+    """429 Too Many Requests - Quota limit exceeded."""
+
+    description: ClassVar[str] = QUOTA_EXCEEDED_DESCRIPTION
     model_config = {
         "json_schema_extra": {
             "examples": [
                 {
+                    "label": "model",
+                    "detail": {
+                        "response": "The model quota has been exceeded",
+                        "cause": "The token quota for model gpt-4-turbo has been exceeded.",
+                    },
+                },
+                {
+                    "label": "user none",
                     "detail": {
                         "response": "The quota has been exceeded",
                         "cause": "User 123 has no available tokens.",
-                    }
+                    },
                 },
                 {
+                    "label": "cluster none",
                     "detail": {
                         "response": "The quota has been exceeded",
                         "cause": "Cluster has no available tokens.",
-                    }
+                    },
                 },
                 {
+                    "label": "subject none",
                     "detail": {
                         "response": "The quota has been exceeded",
                         "cause": "Unknown subject 999 has no available tokens.",
-                    }
+                    },
                 },
                 {
+                    "label": "user insufficient",
                     "detail": {
                         "response": "The quota has been exceeded",
                         "cause": "User 123 has 5 tokens, but 10 tokens are needed.",
-                    }
+                    },
                 },
                 {
+                    "label": "cluster insufficient",
                     "detail": {
                         "response": "The quota has been exceeded",
                         "cause": "Cluster has 500 tokens, but 900 tokens are needed.",
-                    }
+                    },
                 },
                 {
+                    "label": "subject insufficient",
                     "detail": {
                         "response": "The quota has been exceeded",
                         "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.",
-                    }
+                    },
+                },
+            ]
+        }
+    }
+
+    @classmethod
+    def model(cls, model_name: str) -> "QuotaExceededResponse":
+        """Create a QuotaExceededResponse for model quota exceeded."""
+        response = "The model quota has been exceeded"
+        cause = f"The token quota for model {model_name} has been exceeded."
+        return cls(response=response, cause=cause)
+
+    @classmethod
+    def from_exception(cls, exc: QuotaExceedError) -> "QuotaExceededResponse":
+        """Create a QuotaExceededResponse from a QuotaExceedError exception."""
+        response = "The quota has been exceeded"
+        cause = str(exc)
+        return cls(response=response, cause=cause)
+
+    def __init__(self, *, response: str, cause: str) -> None:
+        """Initialize a QuotaExceededResponse."""
+        super().__init__(
+            response=response,
+            cause=cause,
+            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+        )
+
+
+class InternalServerErrorResponse(AbstractErrorResponse):
+    """500 Internal Server Error."""
+
+    description: ClassVar[str] = INTERNAL_SERVER_ERROR_DESCRIPTION
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "label": "internal",
+                    "detail": {
+                        "response": "Internal server error",
+                        "cause": "An unexpected error occurred while processing the request.",
+                    },
                 },
                 {
+                    "label": "configuration",
                     "detail": {
-                        "response": "The model quota has been exceeded",
-                        "cause": "The token quota for model gpt-4-turbo has been exceeded.",
-                    }
+                        "response": "Configuration is not loaded",
+                        "cause": "Lightspeed Stack configuration has not been initialized.",
+                    },
+                },
+                {
+                    "label": "feedback storage",
+                    "detail": {
+                        "response": "Failed to store feedback",
+                        "cause": "Failed to store feedback at directory: /path/example",
+                    },
+                },
+                {
+                    "label": "query",
+                    "detail": {
+                        "response": "Error while processing query",
+                        "cause": "Failed to call backend API",
+                    },
+                },
+                {
+                    "label": "conversation cache",
+                    "detail": {
+                        "response": "Conversation cache not configured",
+                        "cause": "Conversation cache is not configured or unavailable.",
+                    },
+                },
+                {
+                    "label": "database",
+                    "detail": {
+                        "response": "Database query failed",
+                        "cause": "Failed to query the database",
+                    },
                 },
             ]
         }
     }
 
+    @classmethod
+    def generic(cls) -> "InternalServerErrorResponse":
+        """Create a generic InternalServerErrorResponse."""
+        return cls(
+            response="Internal server error",
+            cause="An unexpected error occurred while processing the request.",
+        )
+
+    @classmethod
+    def configuration_not_loaded(cls) -> "InternalServerErrorResponse":
+        """Create an InternalServerErrorResponse for configuration not loaded."""
+        return cls(
+            response="Configuration is not loaded",
+            cause="Lightspeed Stack configuration has not been initialized.",
+        )
+
+    @classmethod
+    def feedback_path_invalid(cls, path: str) -> "InternalServerErrorResponse":
+        """Create an InternalServerErrorResponse for invalid feedback path."""
+        return cls(
+            response="Failed to store feedback",
+            cause=f"Failed to store feedback at directory: {path}",
+        )
+
+    @classmethod
+    def query_failed(cls, backend_url: str) -> "InternalServerErrorResponse":
+        """Create an InternalServerErrorResponse for query failure."""
+        return cls(
+            response="Error while processing query",
+            cause=f"Failed to call backend: {backend_url}",
+        )
+
+    @classmethod
+    def cache_unavailable(cls) -> "InternalServerErrorResponse":
+        """Create an InternalServerErrorResponse for cache unavailable."""
+        return cls(
+            response="Conversation cache not configured",
+            cause="Conversation cache is not configured or unavailable.",
+        )
 
-class InvalidFeedbackStoragePathResponse(AbstractErrorResponse):
-    """500 Internal Error - Invalid feedback storage path."""
+    @classmethod
+    def database_error(cls) -> "InternalServerErrorResponse":
+        """Create an InternalServerErrorResponse for database error."""
+        return cls(
+            response="Database query failed",
+            cause="Failed to query the database",
+        )
 
-    def __init__(self, storage_path: str):
-        """Initialize an InvalidFeedbackStoragePathResponse for feedback storage failures."""
+    def __init__(self, *, response: str, cause: str) -> None:
+        """Initialize an InternalServerErrorResponse."""
         super().__init__(
-            detail=DetailModel(
-                response="Failed to store feedback",
-                cause=f"Invalid feedback storage path: {storage_path}",
-            )
+            response=response,
+            cause=cause,
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
         )
 
+
+class ServiceUnavailableResponse(AbstractErrorResponse):
+    """503 Backend Unavailable."""
+
+    description: ClassVar[str] = SERVICE_UNAVAILABLE_DESCRIPTION
     model_config = {
         "json_schema_extra": {
             "examples": [
                 {
+                    "label": "llama stack",
                     "detail": {
-                        "response": "Failed to store feedback",
-                        "cause": (
-                            "Invalid feedback storage path: "
-                            "/var/app/data/feedbacks/invalid_path"
-                        ),
-                    }
+                        "response": "Unable to connect to Llama Stack",
+                        "cause": "Connection error while trying to reach backend service.",
+                    },
                 }
             ]
         }
     }
+
+    def __init__(self, *, backend_name: str, cause: str):
+        """Initialize a ServiceUnavailableResponse.
+
+        Args:
+            backend_name: The name of the backend service that is unavailable.
+            cause: Detailed explanation of why the service is unavailable.
+        """
+        response = f"Unable to connect to {backend_name}"
+        super().__init__(
+            response=response,
+            cause=cause,
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+        )
diff --git a/tests/unit/models/responses/test_authorized_response.py b/tests/unit/models/responses/test_authorized_response.py
index a1ad0ea2..c60efc01 100644
--- a/tests/unit/models/responses/test_authorized_response.py
+++ b/tests/unit/models/responses/test_authorized_response.py
@@ -1,7 +1,6 @@
 """Unit tests for AuthorizedResponse model."""
 
 import pytest
-
 from pydantic import ValidationError
 
 from models.responses import AuthorizedResponse
diff --git a/tests/unit/models/responses/test_error_responses.py b/tests/unit/models/responses/test_error_responses.py
new file mode 100644
index 00000000..3d8fe7ee
--- /dev/null
+++ b/tests/unit/models/responses/test_error_responses.py
@@ -0,0 +1,684 @@
+# pylint: disable=unsupported-membership-test,unsubscriptable-object
+
+"""Unit tests for all error response models."""
+
+from pydantic_core import SchemaError
+import pytest
+from fastapi import status
+
+from models.responses import (
+    BAD_REQUEST_DESCRIPTION,
+    FORBIDDEN_DESCRIPTION,
+    INTERNAL_SERVER_ERROR_DESCRIPTION,
+    NOT_FOUND_DESCRIPTION,
+    QUOTA_EXCEEDED_DESCRIPTION,
+    SERVICE_UNAVAILABLE_DESCRIPTION,
+    UNAUTHORIZED_DESCRIPTION,
+    UNPROCESSABLE_CONTENT_DESCRIPTION,
+    AbstractErrorResponse,
+    BadRequestResponse,
+    DetailModel,
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    NotFoundResponse,
+    QuotaExceededResponse,
+    ServiceUnavailableResponse,
+    UnauthorizedResponse,
+    UnprocessableEntityResponse,
+)
+from quota.quota_exceed_error import QuotaExceedError
+
+
+class TestBadRequestResponse:
+    """Test cases for BadRequestResponse."""
+
+    def test_constructor(self) -> None:
+        """Test BadRequestResponse with valid parameters."""
+        response = BadRequestResponse(
+            resource="conversation", resource_id="test-id-123"
+        )
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_400_BAD_REQUEST
+        assert isinstance(response.detail, DetailModel)
+        assert response.detail.response == "Invalid conversation ID format"
+        assert (
+            response.detail.cause
+            == "The conversation ID test-id-123 has invalid format."
+        )
+
+    def test_different_resource_types(self) -> None:
+        """Test BadRequestResponse with different resource types."""
+        response = BadRequestResponse(resource="model", resource_id="model-123")
+        assert response.detail.response == "Invalid model ID format"
+        assert response.detail.cause == "The model ID model-123 has invalid format."
+
+        response = BadRequestResponse(resource="provider", resource_id="provider-456")
+        assert response.detail.response == "Invalid provider ID format"
+        assert (
+            response.detail.cause == "The provider ID provider-456 has invalid format."
+        )
+
+    def test_openapi_response(self) -> None:
+        """Test BadRequestResponse.openapi_response() method."""
+        schema = BadRequestResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = BadRequestResponse.openapi_response()
+        assert result["description"] == BAD_REQUEST_DESCRIPTION
+        assert result["model"] == BadRequestResponse
+        assert "examples" in result["content"]["application/json"]
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify example count matches schema examples count
+        assert len(examples) == expected_count
+        assert expected_count == 1
+
+        # Verify example structure
+        assert "conversation_id" in examples
+        conversation_example = examples["conversation_id"]
+        assert "value" in conversation_example
+        assert "detail" in conversation_example["value"]
+        assert conversation_example["value"]["detail"]["response"] == (
+            "Invalid conversation ID format"
+        )
+
+    def test_openapi_response_with_explicit_examples(self) -> None:
+        """Test BadRequestResponse.openapi_response() with explicit examples."""
+        result = BadRequestResponse.openapi_response(examples=["conversation_id"])
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify only 1 example is returned when explicitly specified
+        assert len(examples) == 1
+        assert "conversation_id" in examples
+
+
+class TestUnauthorizedResponse:
+    """Test cases for UnauthorizedResponse."""
+
+    def test_constructor(self) -> None:
+        """Test UnauthorizedResponse with cause."""
+        response = UnauthorizedResponse(cause="Token has expired")
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_401_UNAUTHORIZED
+        assert isinstance(response.detail, DetailModel)
+        assert (
+            response.detail.response
+            == "Missing or invalid credentials provided by client"
+        )
+        assert response.detail.cause == "Token has expired"
+
+    def test_different_causes(self) -> None:
+        """Test UnauthorizedResponse with different causes."""
+        response = UnauthorizedResponse(cause="No Authorization header found")
+        assert response.detail.cause == "No Authorization header found"
+
+        response = UnauthorizedResponse(cause="Invalid token signature")
+        assert response.detail.cause == "Invalid token signature"
+
+        response = UnauthorizedResponse(cause="Token missing claim: user_id")
+        assert response.detail.cause == "Token missing claim: user_id"
+
+    def test_openapi_response(self) -> None:
+        """Test UnauthorizedResponse.openapi_response() method."""
+        schema = UnauthorizedResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = UnauthorizedResponse.openapi_response()
+        assert result["description"] == UNAUTHORIZED_DESCRIPTION
+        assert result["model"] == UnauthorizedResponse
+        assert "examples" in result["content"]["application/json"]
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify example count matches schema examples count
+        assert len(examples) == expected_count
+        assert expected_count == 8
+
+        # Verify all labeled examples are present
+        assert "missing header" in examples
+        assert "missing token" in examples
+        assert "expired token" in examples
+        assert "invalid signature" in examples
+        assert "invalid key" in examples
+        assert "missing claim" in examples
+        assert "invalid k8s token" in examples
+        assert "invalid jwk token" in examples
+
+        # Verify example structure for one example
+        missing_creds_example = examples["missing header"]
+        assert "value" in missing_creds_example
+        assert "detail" in missing_creds_example["value"]
+        assert missing_creds_example["value"]["detail"]["response"] == (
+            "Missing or invalid credentials provided by client"
+        )
+        assert (
+            missing_creds_example["value"]["detail"]["cause"]
+            == "No Authorization header found"
+        )
+
+    def test_openapi_response_with_explicit_examples(self) -> None:
+        """Test UnauthorizedResponse.openapi_response() with explicit examples."""
+        result = UnauthorizedResponse.openapi_response(examples=["expired token"])
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify only 1 example is returned when explicitly specified
+        assert len(examples) == 1
+        assert "expired token" in examples
+        assert "missing credentials" not in examples
+
+
+class TestForbiddenResponse:
+    """Test cases for ForbiddenResponse."""
+
+    def test_factory_conversation(self) -> None:
+        """Test ForbiddenResponse.conversation() factory method."""
+        response = ForbiddenResponse.conversation("read", "conv-123", "user-456")
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_403_FORBIDDEN
+        assert isinstance(response.detail, DetailModel)
+        assert (
+            response.detail.response
+            == "User does not have permission to perform this action"
+        )
+        assert response.detail.cause == (
+            "User user-456 does not have permission to read conversation "
+            "with ID conv-123"
+        )
+
+    def test_factory_endpoint(self) -> None:
+        """Test ForbiddenResponse.endpoint() factory method."""
+        response = ForbiddenResponse.endpoint("user-789")
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_403_FORBIDDEN
+        assert isinstance(response.detail, DetailModel)
+        assert (
+            response.detail.response
+            == "User does not have permission to access this endpoint"
+        )
+        assert (
+            response.detail.cause
+            == "User user-789 is not authorized to access this endpoint."
+        )
+
+    def test_factory_feedback_disabled(self) -> None:
+        """Test ForbiddenResponse.feedback_disabled() factory method."""
+        response = ForbiddenResponse.feedback_disabled()
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_403_FORBIDDEN
+        assert isinstance(response.detail, DetailModel)
+        assert response.detail.response == "Feedback is disabled"
+        assert response.detail.cause == "Storing feedback is disabled."
+
+    def test_openapi_response(self) -> None:
+        """Test ForbiddenResponse.openapi_response() method."""
+        schema = ForbiddenResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ForbiddenResponse.openapi_response()
+        assert result["description"] == FORBIDDEN_DESCRIPTION
+        assert result["model"] == ForbiddenResponse
+        assert "examples" in result["content"]["application/json"]
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify example count matches schema examples count
+        assert len(examples) == expected_count
+        assert expected_count == 5
+
+        # Verify all labeled examples are present
+        assert "conversation read" in examples
+        assert "conversation delete" in examples
+        assert "endpoint" in examples
+        assert "feedback" in examples
+
+        # Verify example structure for one example
+        feedback_example = examples["feedback"]
+        assert "value" in feedback_example
+        assert "detail" in feedback_example["value"]
+        assert (
+            feedback_example["value"]["detail"]["response"]
+            == "Storing feedback is disabled."
+        )
+
+    def test_openapi_response_with_explicit_examples(self) -> None:
+        """Test ForbiddenResponse.openapi_response() with explicit examples."""
+        result = ForbiddenResponse.openapi_response(examples=["feedback"])
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify only 1 example is returned when explicitly specified
+        assert len(examples) == 1
+        assert "feedback" in examples
+        assert "conversation read" not in examples
+
+
+class TestUnprocessableEntityResponse:
+    """Test cases for UnprocessableEntityResponse."""
+
+    def test_constructor(self) -> None:
+        """Test UnprocessableEntityResponse with valid parameters."""
+        response = UnprocessableEntityResponse(
+            response="Invalid attribute value",
+            cause="Field 'temperature' must be a number between 0 and 2",
+        )
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_422_UNPROCESSABLE_CONTENT
+        assert isinstance(response.detail, DetailModel)
+        assert response.detail.response == "Invalid attribute value"
+        assert (
+            response.detail.cause
+            == "Field 'temperature' must be a number between 0 and 2"
+        )
+
+    def test_different_responses(self) -> None:
+        """Test UnprocessableEntityResponse with different response messages."""
+        response = UnprocessableEntityResponse(
+            response="Invalid request format",
+            cause="Invalid request format. The request body could not be parsed.",
+        )
+        assert response.detail.response == "Invalid request format"
+        assert response.detail.cause == (
+            "Invalid request format. The request body could not be parsed."
+        )
+
+        response = UnprocessableEntityResponse(
+            response="Missing required attributes",
+            cause="Missing required attributes: ['query', 'model', 'provider']",
+        )
+        assert response.detail.response == "Missing required attributes"
+        assert response.detail.cause == (
+            "Missing required attributes: ['query', 'model', 'provider']"
+        )
+
+    def test_openapi_response(self) -> None:
+        """Test UnprocessableEntityResponse.openapi_response() method."""
+        schema = UnprocessableEntityResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = UnprocessableEntityResponse.openapi_response()
+        assert result["description"] == UNPROCESSABLE_CONTENT_DESCRIPTION
+        assert result["model"] == UnprocessableEntityResponse
+        assert "examples" in result["content"]["application/json"]
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify example count matches schema examples count
+        assert len(examples) == expected_count
+        assert expected_count == 3
+
+        # Verify all labeled examples are present
+        assert "invalid format" in examples
+        assert "missing attributes" in examples
+        assert "invalid value" in examples
+
+        # Verify example structure for one example
+        invalid_format_example = examples["invalid format"]
+        assert "value" in invalid_format_example
+        assert "detail" in invalid_format_example["value"]
+        assert (
+            invalid_format_example["value"]["detail"]["response"]
+            == "Invalid request format"
+        )
+
+    def test_openapi_response_with_explicit_examples(self) -> None:
+        """Test UnprocessableEntityResponse.openapi_response() with explicit examples."""
+        result = UnprocessableEntityResponse.openapi_response(
+            examples=["missing attributes"]
+        )
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify only 1 example is returned when explicitly specified
+        assert len(examples) == 1
+        assert "missing attributes" in examples
+        assert "invalid format" not in examples
+
+
+class TestQuotaExceededResponse:
+    """Test cases for QuotaExceededResponse."""
+
+    def test_factory_model(self) -> None:
+        """Test QuotaExceededResponse.model() factory method."""
+        response = QuotaExceededResponse.model("gpt-4-turbo")
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_429_TOO_MANY_REQUESTS
+        assert isinstance(response.detail, DetailModel)
+        assert response.detail.response == "The model quota has been exceeded"
+        assert (
+            response.detail.cause
+            == "The token quota for model gpt-4-turbo has been exceeded."
+        )
+
+    def test_factory_from_exception(self) -> None:
+        """Test QuotaExceededResponse.from_exception() factory method."""
+        exc = QuotaExceedError("123", "u", 0, 0)
+        response = QuotaExceededResponse.from_exception(exc)
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_429_TOO_MANY_REQUESTS
+        assert isinstance(response.detail, DetailModel)
+        assert response.detail.response == "The quota has been exceeded"
+        assert response.detail.cause == "User 123 has no available tokens"
+
+    def test_openapi_response(self) -> None:
+        """Test QuotaExceededResponse.openapi_response() method."""
+        schema = QuotaExceededResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = QuotaExceededResponse.openapi_response()
+        assert result["description"] == QUOTA_EXCEEDED_DESCRIPTION
+        assert result["model"] == QuotaExceededResponse
+        assert "examples" in result["content"]["application/json"]
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify example count matches schema examples count
+        assert len(examples) == expected_count
+        assert expected_count == 7
+
+        # Verify all labeled examples are present
+        assert "model" in examples
+        assert "user none" in examples
+        assert "cluster none" in examples
+        assert "subject none" in examples
+        assert "user insufficient" in examples
+        assert "cluster insufficient" in examples
+        assert "subject insufficient" in examples
+
+        # Verify example structure for one example
+        model_example = examples["model"]
+        assert "value" in model_example
+        assert "detail" in model_example["value"]
+        assert model_example["value"]["detail"]["response"] == (
+            "The model quota has been exceeded"
+        )
+
+    def test_openapi_response_with_explicit_examples(self) -> None:
+        """Test QuotaExceededResponse.openapi_response() with explicit examples."""
+        result = QuotaExceededResponse.openapi_response(examples=["model"])
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify only 1 example is returned when explicitly specified
+        assert len(examples) == 1
+        assert "model" in examples
+        assert "user none" not in examples
+
+
+class TestNotFoundResponse:
+    """Test cases for NotFoundResponse."""
+
+    def test_constructor(self) -> None:
+        """Test NotFoundResponse with valid parameters."""
+        response = NotFoundResponse(resource="conversation", resource_id="conv-123")
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_404_NOT_FOUND
+        assert isinstance(response.detail, DetailModel)
+        assert response.detail.response == "Conversation not found"
+        assert response.detail.cause == "Conversation with ID conv-123 does not exist"
+
+    def test_different_resources(self) -> None:
+        """Test NotFoundResponse with different resource types."""
+        response = NotFoundResponse(resource="provider", resource_id="openai")
+        assert response.detail.response == "Provider not found"
+        assert response.detail.cause == "Provider with ID openai does not exist"
+
+        response = NotFoundResponse(resource="model", resource_id="gpt-4")
+        assert response.detail.response == "Model not found"
+        assert response.detail.cause == "Model with ID gpt-4 does not exist"
+
+    def test_resource_title_capitalization(self) -> None:
+        """Test NotFoundResponse properly capitalizes resource names."""
+        response = NotFoundResponse(resource="conversation", resource_id="test")
+        assert response.detail.response == "Conversation not found"
+
+        response = NotFoundResponse(resource="MODEL", resource_id="test")
+        assert response.detail.response == "Model not found"
+
+    def test_openapi_response(self) -> None:
+        """Test NotFoundResponse.openapi_response() method."""
+        schema = NotFoundResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = NotFoundResponse.openapi_response()
+        assert result["description"] == NOT_FOUND_DESCRIPTION
+        assert result["model"] == NotFoundResponse
+        assert "examples" in result["content"]["application/json"]
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify example count matches schema examples count
+        assert len(examples) == expected_count
+        assert expected_count == 4
+
+        # Verify all labeled examples are present
+        assert "conversation" in examples
+        assert "provider" in examples
+        assert "model" in examples
+        assert "rag" in examples
+
+        # Verify example structure for one example
+        conversation_example = examples["conversation"]
+        assert "value" in conversation_example
+        assert "detail" in conversation_example["value"]
+        assert (
+            conversation_example["value"]["detail"]["response"]
+            == "Conversation not found"
+        )
+
+    def test_openapi_response_with_explicit_examples(self) -> None:
+        """Test NotFoundResponse.openapi_response() with explicit examples."""
+        result = NotFoundResponse.openapi_response(examples=["provider"])
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify only 1 example is returned when explicitly specified
+        assert len(examples) == 1
+        assert "provider" in examples
+        assert "conversation" not in examples
+
+
+class TestInternalServerErrorResponse:
+    """Test cases for InternalServerErrorResponse."""
+
+    def test_factory_generic(self) -> None:
+        """Test InternalServerErrorResponse.generic() factory method."""
+        response = InternalServerErrorResponse.generic()
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        assert isinstance(response.detail, DetailModel)
+        assert response.detail.response == "Internal server error"
+        assert (
+            response.detail.cause
+            == "An unexpected error occurred while processing the request."
+        )
+
+    def test_factory_configuration_not_loaded(self) -> None:
+        """Test InternalServerErrorResponse.configuration_not_loaded() factory method."""
+        response = InternalServerErrorResponse.configuration_not_loaded()
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        assert isinstance(response.detail, DetailModel)
+        assert response.detail.response == "Configuration is not loaded"
+        assert (
+            response.detail.cause
+            == "Lightspeed Stack configuration has not been initialized."
+        )
+
+    def test_factory_feedback_path_invalid(self) -> None:
+        """Test InternalServerErrorResponse.feedback_path_invalid() factory method."""
+        response = InternalServerErrorResponse.feedback_path_invalid(
+            "/path/to/feedback"
+        )
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        assert isinstance(response.detail, DetailModel)
+        assert response.detail.response == "Failed to store feedback"
+        assert (
+            response.detail.cause
+            == "Failed to store feedback at directory: /path/to/feedback"
+        )
+
+    def test_factory_query_failed(self) -> None:
+        """Test InternalServerErrorResponse.query_failed() factory method."""
+        response = InternalServerErrorResponse.query_failed("https://api.example.com")
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        assert isinstance(response.detail, DetailModel)
+        assert response.detail.response == "Error while processing query"
+        assert (
+            response.detail.cause == "Failed to call backend: https://api.example.com"
+        )
+
+    def test_factory_cache_unavailable(self) -> None:
+        """Test InternalServerErrorResponse.cache_unavailable() factory method."""
+        response = InternalServerErrorResponse.cache_unavailable()
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        assert isinstance(response.detail, DetailModel)
+        assert response.detail.response == "Conversation cache not configured"
+        assert (
+            response.detail.cause
+            == "Conversation cache is not configured or unavailable."
+        )
+
+    def test_factory_database_error(self) -> None:
+        """Test InternalServerErrorResponse.database_error() factory method."""
+        response = InternalServerErrorResponse.database_error()
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        assert isinstance(response.detail, DetailModel)
+        assert response.detail.response == "Database query failed"
+        assert response.detail.cause == "Failed to query the database"
+
+    def test_openapi_response(self) -> None:
+        """Test InternalServerErrorResponse.openapi_response() method."""
+        schema = InternalServerErrorResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = InternalServerErrorResponse.openapi_response()
+        assert result["description"] == INTERNAL_SERVER_ERROR_DESCRIPTION
+        assert result["model"] == InternalServerErrorResponse
+        assert "examples" in result["content"]["application/json"]
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify example count matches schema examples count
+        assert len(examples) == expected_count
+        assert expected_count == 6
+
+        # Verify all labeled examples are present
+        assert "internal" in examples
+        assert "configuration" in examples
+        assert "feedback storage" in examples
+        assert "query" in examples
+        assert "conversation cache" in examples
+        assert "database" in examples
+
+        # Verify example structure for one example
+        internal_example = examples["internal"]
+        assert "value" in internal_example
+        assert "detail" in internal_example["value"]
+        assert (
+            internal_example["value"]["detail"]["response"] == "Internal server error"
+        )
+
+    def test_openapi_response_with_explicit_examples(self) -> None:
+        """Test InternalServerErrorResponse.openapi_response() with explicit examples."""
+        result = InternalServerErrorResponse.openapi_response(
+            examples=["configuration"]
+        )
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify only 1 example is returned when explicitly specified
+        assert len(examples) == 1
+        assert "configuration" in examples
+        assert "internal" not in examples
+
+
+class TestServiceUnavailableResponse:
+    """Test cases for ServiceUnavailableResponse."""
+
+    def test_constructor(self) -> None:
+        """Test ServiceUnavailableResponse with valid parameters."""
+        response = ServiceUnavailableResponse(
+            backend_name="Llama Stack", cause="Connection timeout"
+        )
+        assert isinstance(response, AbstractErrorResponse)
+        assert response.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+        assert isinstance(response.detail, DetailModel)
+        assert response.detail.response == "Unable to connect to Llama Stack"
+        assert response.detail.cause == "Connection timeout"
+
+    def test_different_backend_names(self) -> None:
+        """Test ServiceUnavailableResponse with different backend names."""
+        response = ServiceUnavailableResponse(
+            backend_name="Kubernetes API",
+            cause="Unable to initialize Kubernetes client",
+        )
+        assert response.detail.response == "Unable to connect to Kubernetes API"
+        assert response.detail.cause == "Unable to initialize Kubernetes client"
+
+    def test_openapi_response(self) -> None:
+        """Test ServiceUnavailableResponse.openapi_response() method."""
+        schema = ServiceUnavailableResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ServiceUnavailableResponse.openapi_response()
+        assert result["description"] == SERVICE_UNAVAILABLE_DESCRIPTION
+        assert result["model"] == ServiceUnavailableResponse
+        assert "examples" in result["content"]["application/json"]
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify example count matches schema examples count
+        assert len(examples) == expected_count
+        assert expected_count == 1
+
+        # Verify example structure
+        assert "llama stack" in examples
+        llama_example = examples["llama stack"]
+        assert "value" in llama_example
+        assert "detail" in llama_example["value"]
+        assert (
+            llama_example["value"]["detail"]["response"]
+            == "Unable to connect to Llama Stack"
+        )
+
+    def test_openapi_response_with_explicit_examples(self) -> None:
+        """Test ServiceUnavailableResponse.openapi_response() with explicit examples."""
+        result = ServiceUnavailableResponse.openapi_response(examples=["llama stack"])
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify only 1 example is returned when explicitly specified
+        assert len(examples) == 1
+        assert "llama stack" in examples
+
+
+class TestAbstractErrorResponse:  # pylint: disable=too-few-public-methods
+    """Test cases for AbstractErrorResponse edge cases."""
+
+    def test_openapi_response_missing_label(self) -> None:
+        """Test openapi_response() raises SchemaError when example has no label."""
+
+        # Create a class with examples missing labels
+        class InvalidErrorResponse(AbstractErrorResponse):
+            """Class with invalid examples (missing label)."""
+
+            status_code: int = 400
+            detail: DetailModel = DetailModel(response="Test error", cause="Test cause")
+
+            model_config = {
+                "json_schema_extra": {
+                    "examples": [
+                        {
+                            # Missing "label" key
+                            "value": {
+                                "detail": {
+                                    "response": "Test error",
+                                    "cause": "Test cause",
+                                }
+                            },
+                        },
+                    ]
+                }
+            }
+
+        with pytest.raises(SchemaError, match="has no label"):
+            InvalidErrorResponse.openapi_response()
diff --git a/tests/unit/models/responses/test_status_response.py b/tests/unit/models/responses/test_status_response.py
deleted file mode 100644
index 3974ba97..00000000
--- a/tests/unit/models/responses/test_status_response.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""Unit tests for StatusResponse model."""
-
-from models.responses import StatusResponse
-
-
-class TestStatusResponse:
-    """Test cases for the StatusResponse model."""
-
-    def test_constructor_feedback_enabled(self) -> None:
-        """Test the StatusResponse constructor."""
-        sr = StatusResponse(functionality="feedback", status={"enabled": True})
-        assert sr.functionality == "feedback"
-        assert sr.status == {"enabled": True}
-
-    def test_constructor_feedback_disabled(self) -> None:
-        """Test the StatusResponse constructor."""
-        sr = StatusResponse(functionality="feedback", status={"enabled": False})
-        assert sr.functionality == "feedback"
-        assert sr.status == {"enabled": False}
diff --git a/tests/unit/models/responses/test_successful_responses.py b/tests/unit/models/responses/test_successful_responses.py
new file mode 100644
index 00000000..470c5d57
--- /dev/null
+++ b/tests/unit/models/responses/test_successful_responses.py
@@ -0,0 +1,984 @@
+# pylint: disable=unsupported-membership-test,unsubscriptable-object
+
+"""Unit tests for all successful response models."""
+
+from typing import Any
+
+import pytest
+from pydantic import AnyUrl, ValidationError
+from pydantic_core import SchemaError
+
+from models.config import (
+    Configuration,
+    LlamaStackConfiguration,
+    ServiceConfiguration,
+    UserDataCollection,
+)
+from models.responses import (
+    AbstractSuccessfulResponse,
+    AuthorizedResponse,
+    ConfigurationResponse,
+    ConversationData,
+    ConversationDeleteResponse,
+    ConversationDetails,
+    ConversationResponse,
+    ConversationsListResponse,
+    ConversationsListResponseV2,
+    ConversationUpdateResponse,
+    FeedbackResponse,
+    FeedbackStatusUpdateResponse,
+    InfoResponse,
+    LivenessResponse,
+    ModelsResponse,
+    ProviderHealthStatus,
+    ProviderResponse,
+    ProvidersListResponse,
+    QueryResponse,
+    RAGChunk,
+    ReadinessResponse,
+    ReferencedDocument,
+    ShieldsResponse,
+    StatusResponse,
+    ToolCall,
+    ToolsResponse,
+)
+
+
+class TestModelsResponse:
+    """Test cases for ModelsResponse."""
+
+    def test_constructor(self) -> None:
+        """Test ModelsResponse with valid models list."""
+        models = [
+            {
+                "identifier": "openai/gpt-4-turbo",
+                "metadata": {},
+                "api_model_type": "llm",
+                "provider_id": "openai",
+                "type": "model",
+                "provider_resource_id": "gpt-4-turbo",
+                "model_type": "llm",
+            }
+        ]
+        response = ModelsResponse(models=models)
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.models == models
+        assert len(response.models) == 1
+
+    def test_empty_models_list(self) -> None:
+        """Test ModelsResponse with empty models list."""
+        response = ModelsResponse(models=[])
+        assert response.models == []
+        assert len(response.models) == 0
+
+    def test_multiple_models(self) -> None:
+        """Test ModelsResponse with multiple models."""
+        models = [
+            {"identifier": "model1", "provider_id": "provider1"},
+            {"identifier": "model2", "provider_id": "provider2"},
+        ]
+        response = ModelsResponse(models=models)
+        assert len(response.models) == 2
+
+    def test_missing_required_parameter(self) -> None:
+        """Test ModelsResponse raises ValidationError when models is missing."""
+        with pytest.raises(ValidationError):
+            ModelsResponse()  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test ModelsResponse.openapi_response() method."""
+        schema = ModelsResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ModelsResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == ModelsResponse
+        assert "content" in result
+        assert "application/json" in result["content"]
+
+        # For single-example responses, check "example" key exists
+        assert "example" in result["content"]["application/json"]
+        example = result["content"]["application/json"]["example"]
+        assert "models" in example
+        assert isinstance(example["models"], list)
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestToolsResponse:
+    """Test cases for ToolsResponse."""
+
+    def test_constructor(self) -> None:
+        """Test ToolsResponse with valid tools list."""
+        tools = [
+            {
+                "identifier": "filesystem_read",
+                "description": "Read contents of a file",
+                "parameters": [],
+                "provider_id": "mcp",
+                "type": "tool",
+            }
+        ]
+        response = ToolsResponse(tools=tools)
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.tools == tools
+
+    def test_empty_tools_list(self) -> None:
+        """Test ToolsResponse with empty tools list."""
+        response = ToolsResponse(tools=[])
+        assert response.tools == []
+
+    def test_missing_required_parameter(self) -> None:
+        """Test ToolsResponse raises ValidationError when tools is missing."""
+        with pytest.raises(ValidationError):
+            ToolsResponse()  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test ToolsResponse.openapi_response() method."""
+        schema = ToolsResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ToolsResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == ToolsResponse
+        assert "example" in result["content"]["application/json"]
+        assert "tools" in result["content"]["application/json"]["example"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestShieldsResponse:
+    """Test cases for ShieldsResponse."""
+
+    def test_constructor(self) -> None:
+        """Test ShieldsResponse with valid shields list."""
+        shields = [{"name": "shield1", "status": "active"}]
+        response = ShieldsResponse(shields=shields)
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.shields == shields
+
+    def test_missing_required_parameter(self) -> None:
+        """Test ShieldsResponse raises ValidationError when shields is missing."""
+        with pytest.raises(ValidationError):
+            ShieldsResponse()  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test ShieldsResponse.openapi_response() method."""
+        schema = ShieldsResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ShieldsResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == ShieldsResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestProvidersListResponse:
+    """Test cases for ProvidersListResponse."""
+
+    def test_constructor(self) -> None:
+        """Test ProvidersListResponse with valid providers dict."""
+        providers = {
+            "inference": [{"provider_id": "openai", "provider_type": "remote::openai"}]
+        }
+        response = ProvidersListResponse(providers=providers)
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.providers == providers
+
+    def test_empty_providers(self) -> None:
+        """Test ProvidersListResponse with empty providers dict."""
+        response = ProvidersListResponse(providers={})
+        assert response.providers == {}
+
+    def test_missing_required_parameter(self) -> None:
+        """Test ProvidersListResponse raises ValidationError when providers is missing."""
+        with pytest.raises(ValidationError):
+            ProvidersListResponse()  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test ProvidersListResponse.openapi_response() method."""
+        schema = ProvidersListResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ProvidersListResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == ProvidersListResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestProviderResponse:
+    """Test cases for ProviderResponse."""
+
+    def test_constructor(self) -> None:
+        """Test ProviderResponse with all required fields."""
+        response = ProviderResponse(
+            api="inference",
+            config={"api_key": "test"},
+            health={"status": "OK"},
+            provider_id="openai",
+            provider_type="remote::openai",
+        )
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.api == "inference"
+        assert response.config == {"api_key": "test"}
+        assert response.health == {"status": "OK"}
+        assert response.provider_id == "openai"
+        assert response.provider_type == "remote::openai"
+
+    def test_missing_required_parameters(self) -> None:
+        """Test ProviderResponse raises ValidationError when required fields are missing."""
+        with pytest.raises(ValidationError):
+            ProviderResponse()  # type: ignore[call-arg]
+        with pytest.raises(ValidationError):
+            ProviderResponse(api="inference")  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test ProviderResponse.openapi_response() method."""
+        schema = ProviderResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ProviderResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == ProviderResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestQueryResponse:
+    """Test cases for QueryResponse."""
+
+    def test_constructor_minimal(self) -> None:
+        """Test QueryResponse with only required fields."""
+        response_obj = QueryResponse(response="Test response")  # type: ignore[call-arg]
+        assert isinstance(response_obj, AbstractSuccessfulResponse)
+        assert response_obj.response == "Test response"
+        assert response_obj.conversation_id is None
+        assert response_obj.rag_chunks == []
+        assert response_obj.tool_calls is None
+        assert response_obj.referenced_documents == []
+        assert response_obj.truncated is False
+        assert response_obj.input_tokens == 0
+        assert response_obj.output_tokens == 0
+        assert response_obj.available_quotas == {}
+
+    def test_constructor_full(self) -> None:
+        """Test QueryResponse with all fields."""
+        rag_chunks = [RAGChunk(content="chunk1", source="doc1", score=0.9)]
+        tool_calls = [
+            ToolCall(tool_name="tool1", arguments={"arg": "value"}, result=None)
+        ]
+        referenced_docs = [
+            ReferencedDocument(doc_url=AnyUrl("https://example.com"), doc_title="Doc")
+        ]
+
+        response = QueryResponse(  # type: ignore[call-arg]
+            conversation_id="conv-123",
+            response="Test response",
+            rag_chunks=rag_chunks,
+            tool_calls=tool_calls,
+            referenced_documents=referenced_docs,
+            truncated=True,
+            input_tokens=100,
+            output_tokens=50,
+            available_quotas={"daily": 1000},
+        )
+        assert response.conversation_id == "conv-123"
+        assert response.rag_chunks == rag_chunks
+        assert response.tool_calls == tool_calls
+        assert response.referenced_documents == referenced_docs
+        assert response.truncated is True
+        assert response.input_tokens == 100
+        assert response.output_tokens == 50
+        assert response.available_quotas == {"daily": 1000}
+
+    def test_missing_required_parameter(self) -> None:
+        """Test QueryResponse raises ValidationError when response is missing."""
+        with pytest.raises(ValidationError):
+            QueryResponse()  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test QueryResponse.openapi_response() method."""
+        schema = QueryResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = QueryResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == QueryResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestInfoResponse:
+    """Test cases for InfoResponse."""
+
+    def test_constructor(self) -> None:
+        """Test InfoResponse with all fields."""
+        response = InfoResponse(
+            name="Lightspeed Stack",
+            service_version="1.0.0",
+            llama_stack_version="1.0.0",
+        )
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.name == "Lightspeed Stack"
+        assert response.service_version == "1.0.0"
+        assert response.llama_stack_version == "1.0.0"
+
+    def test_missing_required_parameters(self) -> None:
+        """Test InfoResponse raises ValidationError when required fields are missing."""
+        with pytest.raises(ValidationError):
+            InfoResponse()  # type: ignore[call-arg]
+        with pytest.raises(ValidationError):
+            InfoResponse(name="Test")  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test InfoResponse.openapi_response() method."""
+        schema = InfoResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = InfoResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == InfoResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestReadinessResponse:
+    """Test cases for ReadinessResponse."""
+
+    def test_constructor_ready(self) -> None:
+        """Test ReadinessResponse when service is ready."""
+        response = ReadinessResponse(
+            ready=True, reason="Service is ready", providers=[]
+        )
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.ready is True
+        assert response.reason == "Service is ready"
+        assert response.providers == []
+
+    def test_constructor_not_ready(self) -> None:
+        """Test ReadinessResponse when service is not ready."""
+        providers = [
+            ProviderHealthStatus(
+                provider_id="provider1", status="unhealthy", message="Error"
+            )
+        ]
+        response = ReadinessResponse(
+            ready=False, reason="Service is not ready", providers=providers
+        )
+        assert response.ready is False
+        assert len(response.providers) == 1
+        assert response.providers[0].provider_id == "provider1"
+
+    def test_missing_required_parameters(self) -> None:
+        """Test ReadinessResponse raises ValidationError when required fields are missing."""
+        with pytest.raises(ValidationError):
+            ReadinessResponse()  # type: ignore[call-arg]
+        with pytest.raises(ValidationError):
+            ReadinessResponse(ready=True)  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test ReadinessResponse.openapi_response() method."""
+        schema = ReadinessResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ReadinessResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == ReadinessResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestLivenessResponse:
+    """Test cases for LivenessResponse."""
+
+    def test_constructor_alive(self) -> None:
+        """Test LivenessResponse when service is alive."""
+        response = LivenessResponse(alive=True)
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.alive is True
+
+    def test_constructor_not_alive(self) -> None:
+        """Test LivenessResponse when service is not alive."""
+        response = LivenessResponse(alive=False)
+        assert response.alive is False
+
+    def test_missing_required_parameter(self) -> None:
+        """Test LivenessResponse raises ValidationError when alive is missing."""
+        with pytest.raises(ValidationError):
+            LivenessResponse()  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test LivenessResponse.openapi_response() method."""
+        schema = LivenessResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = LivenessResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == LivenessResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestFeedbackResponse:
+    """Test cases for FeedbackResponse."""
+
+    def test_constructor(self) -> None:
+        """Test FeedbackResponse with response message."""
+        response = FeedbackResponse(response="feedback received")
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.response == "feedback received"
+
+    def test_missing_required_parameter(self) -> None:
+        """Test FeedbackResponse raises ValidationError when response is missing."""
+        with pytest.raises(ValidationError):
+            FeedbackResponse()  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test FeedbackResponse.openapi_response() method."""
+        schema = FeedbackResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = FeedbackResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == FeedbackResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestStatusResponse:
+    """Test cases for StatusResponse."""
+
+    def test_constructor_feedback_enabled(self) -> None:
+        """Test the StatusResponse constructor."""
+        sr = StatusResponse(functionality="feedback", status={"enabled": True})
+        assert sr.functionality == "feedback"
+        assert sr.status == {"enabled": True}
+        assert isinstance(sr, AbstractSuccessfulResponse)
+
+    def test_constructor_feedback_disabled(self) -> None:
+        """Test the StatusResponse constructor."""
+        sr = StatusResponse(functionality="feedback", status={"enabled": False})
+        assert sr.functionality == "feedback"
+        assert sr.status == {"enabled": False}
+        assert isinstance(sr, AbstractSuccessfulResponse)
+
+    def test_missing_required_parameters(self) -> None:
+        """Test StatusResponse raises ValidationError when required fields are missing."""
+        with pytest.raises(ValidationError):
+            StatusResponse()  # type: ignore[call-arg]
+        with pytest.raises(ValidationError):
+            StatusResponse(functionality="feedback")  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test StatusResponse.openapi_response() method."""
+        schema = StatusResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = StatusResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == StatusResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestAuthorizedResponse:
+    """Test cases for AuthorizedResponse."""
+
+    def test_constructor(self) -> None:
+        """Test AuthorizedResponse with all fields."""
+        response = AuthorizedResponse(
+            user_id="123e4567-e89b-12d3-a456-426614174000",
+            username="user1",
+            skip_userid_check=False,
+        )
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.user_id == "123e4567-e89b-12d3-a456-426614174000"
+        assert response.username == "user1"
+        assert response.skip_userid_check is False
+
+    def test_skip_userid_check_true(self) -> None:
+        """Test AuthorizedResponse with skip_userid_check=True."""
+        response = AuthorizedResponse(
+            user_id="user-123", username="test", skip_userid_check=True
+        )
+        assert response.skip_userid_check is True
+
+    def test_missing_required_parameters(self) -> None:
+        """Test AuthorizedResponse raises ValidationError when required fields are missing."""
+        with pytest.raises(ValidationError):
+            AuthorizedResponse()  # type: ignore[call-arg]
+        with pytest.raises(ValidationError):
+            AuthorizedResponse(user_id="user-123")  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test AuthorizedResponse.openapi_response() method."""
+        schema = AuthorizedResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = AuthorizedResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == AuthorizedResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+    def test_constructor_fields_required(self) -> None:
+        """Test the AuthorizedResponse constructor."""
+        with pytest.raises(ValidationError):
+            # missing all parameters
+            _ = AuthorizedResponse()  # pyright: ignore
+
+        with pytest.raises(ValidationError):
+            # missing user_id parameter
+            _ = AuthorizedResponse(username="testuser")  # pyright: ignore
+
+        with pytest.raises(ValidationError):
+            # missing username parameter
+            _ = AuthorizedResponse(
+                user_id="123e4567-e89b-12d3-a456-426614174000"
+            )  # pyright: ignore
+
+
+class TestConversationResponse:
+    """Test cases for ConversationResponse."""
+
+    def test_constructor(self) -> None:
+        """Test ConversationResponse with conversation_id and chat_history."""
+        chat_history = [
+            {
+                "messages": [
+                    {"content": "Hello", "type": "user"},
+                    {"content": "Hi there!", "type": "assistant"},
+                ],
+                "started_at": "2024-01-01T00:01:00Z",
+                "completed_at": "2024-01-01T00:01:05Z",
+            }
+        ]
+        response = ConversationResponse(
+            conversation_id="123e4567-e89b-12d3-a456-426614174000",
+            chat_history=chat_history,
+        )
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.conversation_id == "123e4567-e89b-12d3-a456-426614174000"
+        assert response.chat_history == chat_history
+
+    def test_empty_chat_history(self) -> None:
+        """Test ConversationResponse with empty chat_history."""
+        response = ConversationResponse(conversation_id="conv-123", chat_history=[])
+        assert response.chat_history == []
+
+    def test_missing_required_parameters(self) -> None:
+        """Test ConversationResponse raises ValidationError when required fields are missing."""
+        with pytest.raises(ValidationError):
+            ConversationResponse()  # type: ignore[call-arg]
+        with pytest.raises(ValidationError):
+            ConversationResponse(conversation_id="conv-123")  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test ConversationResponse.openapi_response() method."""
+        schema = ConversationResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ConversationResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == ConversationResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestConversationDeleteResponse:
+    """Test cases for ConversationDeleteResponse."""
+
+    def test_constructor_deleted(self) -> None:
+        """Test ConversationDeleteResponse when conversation is deleted."""
+        response = ConversationDeleteResponse(
+            deleted=True, conversation_id="123e4567-e89b-12d3-a456-426614174000"
+        )
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.conversation_id == "123e4567-e89b-12d3-a456-426614174000"
+        assert response.success is True
+        assert response.response == "Conversation deleted successfully"
+
+    def test_constructor_not_deleted(self) -> None:
+        """Test ConversationDeleteResponse when conversation cannot be deleted."""
+        response = ConversationDeleteResponse(deleted=False, conversation_id="conv-123")
+        assert response.success is True
+        assert response.response == "Conversation cannot be deleted"
+
+    def test_missing_required_parameters(self) -> None:
+        """Test ConversationDeleteResponse raises ValidationError when required fields missing."""
+        with pytest.raises(TypeError):
+            ConversationDeleteResponse()  # pylint: disable=missing-kwoa
+        with pytest.raises(TypeError):
+            ConversationDeleteResponse(deleted=True)  # pylint: disable=missing-kwoa
+
+    def test_openapi_response(self) -> None:
+        """Test ConversationDeleteResponse.openapi_response() method."""
+        schema = ConversationDeleteResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ConversationDeleteResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == ConversationDeleteResponse
+        assert "examples" in result["content"]["application/json"]
+        examples = result["content"]["application/json"]["examples"]
+
+        # Verify example count matches schema examples count
+        assert len(examples) == expected_count
+        assert expected_count == 2
+
+        # Verify all labeled examples are present
+        assert "deleted" in examples
+        assert "not found" in examples
+
+        # Verify example structure for "deleted" example
+        deleted_example = examples["deleted"]
+        assert "value" in deleted_example
+        assert (
+            deleted_example["value"]["conversation_id"]
+            == "123e4567-e89b-12d3-a456-426614174000"
+        )
+        assert deleted_example["value"]["success"] is True
+        assert (
+            deleted_example["value"]["response"] == "Conversation deleted successfully"
+        )
+
+        # Verify example structure for "not found" example
+        not_found_example = examples["not found"]
+        assert "value" in not_found_example
+        assert not_found_example["value"]["conversation_id"] == (
+            "123e4567-e89b-12d3-a456-426614174000"
+        )
+        assert not_found_example["value"]["success"] is True
+        assert (
+            not_found_example["value"]["response"] == "Conversation can not be deleted"
+        )
+
+    def test_openapi_response_missing_label(self) -> None:
+        """Test openapi_response() raises SchemaError when example has no label."""
+
+        class InvalidResponse(ConversationDeleteResponse):
+            """Class with invalid examples (missing label)."""
+
+            model_config = {
+                "json_schema_extra": {
+                    "examples": [
+                        {
+                            # Missing "label" key
+                            "value": {
+                                "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                                "success": True,
+                                "response": "Conversation deleted successfully",
+                            },
+                        },
+                    ]
+                }
+            }
+
+        with pytest.raises(SchemaError, match="has no label"):
+            InvalidResponse.openapi_response()
+
+    def test_openapi_response_missing_value(self) -> None:
+        """Test openapi_response() raises SchemaError when example has no value."""
+
+        class InvalidResponse(ConversationDeleteResponse):
+            """Class with invalid examples (missing value)."""
+
+            model_config = {
+                "json_schema_extra": {
+                    "examples": [
+                        {
+                            "label": "deleted",
+                            # Missing "value" key
+                        },
+                    ]
+                }
+            }
+
+        with pytest.raises(SchemaError, match="has no value"):
+            InvalidResponse.openapi_response()
+
+
+class TestConversationsListResponse:
+    """Test cases for ConversationsListResponse."""
+
+    def test_constructor(self) -> None:
+        """Test ConversationsListResponse with conversation details."""
+        conversations = [
+            ConversationDetails(
+                conversation_id="123e4567-e89b-12d3-a456-426614174000",
+                created_at="2024-01-01T00:00:00Z",
+                last_message_at="2024-01-01T00:05:00Z",
+                message_count=5,
+                last_used_model="gpt-4",
+                last_used_provider="openai",
+                topic_summary="Test topic",
+            )
+        ]
+        response = ConversationsListResponse(conversations=conversations)
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert len(response.conversations) == 1
+        assert (
+            response.conversations[0].conversation_id
+            == "123e4567-e89b-12d3-a456-426614174000"
+        )
+
+    def test_empty_conversations(self) -> None:
+        """Test ConversationsListResponse with empty conversations list."""
+        response = ConversationsListResponse(conversations=[])
+        assert response.conversations == []
+
+    def test_missing_required_parameter(self) -> None:
+        """Test ConversationsListResponse raises ValidationError when conversations is missing."""
+        with pytest.raises(ValidationError):
+            ConversationsListResponse()  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test ConversationsListResponse.openapi_response() method."""
+        schema = ConversationsListResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ConversationsListResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == ConversationsListResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestConversationsListResponseV2:
+    """Test cases for ConversationsListResponseV2."""
+
+    def test_constructor(self) -> None:
+        """Test ConversationsListResponseV2 with conversation data."""
+        conversations = [
+            ConversationData(
+                conversation_id="123e4567-e89b-12d3-a456-426614174000",
+                topic_summary="Test topic",
+                last_message_timestamp=1704067200.0,
+            )
+        ]
+        response = ConversationsListResponseV2(conversations=conversations)
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert len(response.conversations) == 1
+        assert (
+            response.conversations[0].conversation_id
+            == "123e4567-e89b-12d3-a456-426614174000"
+        )
+
+    def test_conversation_with_none_topic(self) -> None:
+        """Test ConversationsListResponseV2 with conversation having None topic_summary."""
+        conversations = [
+            ConversationData(
+                conversation_id="conv-123",
+                topic_summary=None,
+                last_message_timestamp=1704067200.0,
+            )
+        ]
+        response = ConversationsListResponseV2(conversations=conversations)
+        assert response.conversations[0].topic_summary is None
+
+    def test_missing_required_parameter(self) -> None:
+        """Test ConversationsListResponseV2 raises ValidationError when conversations is missing."""
+        with pytest.raises(ValidationError):
+            ConversationsListResponseV2()  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test ConversationsListResponseV2.openapi_response() method."""
+        schema = ConversationsListResponseV2.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ConversationsListResponseV2.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == ConversationsListResponseV2
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestFeedbackStatusUpdateResponse:
+    """Test cases for FeedbackStatusUpdateResponse."""
+
+    def test_constructor(self) -> None:
+        """Test FeedbackStatusUpdateResponse with status dict."""
+        status_dict = {
+            "previous_status": True,
+            "updated_status": False,
+            "updated_by": "user/test",
+            "timestamp": "2023-03-15 12:34:56",
+        }
+        response = FeedbackStatusUpdateResponse(status=status_dict)
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.status == status_dict
+
+    def test_missing_required_parameter(self) -> None:
+        """Test FeedbackStatusUpdateResponse raises ValidationError when status is missing."""
+        with pytest.raises(ValidationError):
+            FeedbackStatusUpdateResponse()  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test FeedbackStatusUpdateResponse.openapi_response() method."""
+        schema = FeedbackStatusUpdateResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = FeedbackStatusUpdateResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == FeedbackStatusUpdateResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestConversationUpdateResponse:
+    """Test cases for ConversationUpdateResponse."""
+
+    def test_constructor_success(self) -> None:
+        """Test ConversationUpdateResponse with successful update."""
+        response = ConversationUpdateResponse(
+            conversation_id="123e4567-e89b-12d3-a456-426614174000",
+            success=True,
+            message="Topic summary updated successfully",
+        )
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert response.conversation_id == "123e4567-e89b-12d3-a456-426614174000"
+        assert response.success is True
+        assert response.message == "Topic summary updated successfully"
+
+    def test_constructor_failure(self) -> None:
+        """Test ConversationUpdateResponse with failed update."""
+        response = ConversationUpdateResponse(
+            conversation_id="conv-123", success=False, message="Update failed"
+        )
+        assert response.success is False
+        assert response.message == "Update failed"
+
+    def test_missing_required_parameters(self) -> None:
+        """Test ConversationUpdateResponse raises ValidationError when required fields missing."""
+        with pytest.raises(ValidationError):
+            ConversationUpdateResponse()  # type: ignore[call-arg]
+        with pytest.raises(ValidationError):
+            ConversationUpdateResponse(conversation_id="conv-123")  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test ConversationUpdateResponse.openapi_response() method."""
+        schema = ConversationUpdateResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ConversationUpdateResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == ConversationUpdateResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestConfigurationResponse:
+    """Test cases for ConfigurationResponse."""
+
+    def test_constructor(self) -> None:
+        """Test ConfigurationResponse with Configuration object."""
+        # Create a minimal Configuration object for testing
+        config = Configuration(
+            name="test",
+            service=ServiceConfiguration(host="localhost", port=8080),
+            llama_stack=LlamaStackConfiguration(url="http://localhost:8321"),
+            user_data_collection=UserDataCollection(feedback_enabled=False),
+        )
+        response = ConfigurationResponse(configuration=config)
+        assert isinstance(response, AbstractSuccessfulResponse)
+        assert isinstance(response.configuration, Configuration)
+        assert response.configuration.name == "test"
+
+    def test_missing_required_parameter(self) -> None:
+        """Test ConfigurationResponse raises ValidationError when configuration is missing."""
+        with pytest.raises(ValidationError):
+            ConfigurationResponse()  # type: ignore[call-arg]
+
+    def test_openapi_response(self) -> None:
+        """Test ConfigurationResponse.openapi_response() method."""
+        schema = ConfigurationResponse.model_json_schema()
+        model_examples = schema.get("examples", [])
+        expected_count = len(model_examples)
+
+        result = ConfigurationResponse.openapi_response()
+        assert result["description"] == "Successful response"
+        assert result["model"] == ConfigurationResponse
+        assert "example" in result["content"]["application/json"]
+
+        # Verify example count matches schema examples count (should be 1)
+        assert expected_count == 1
+
+
+class TestAbstractSuccessfulResponseOpenAPI:
+    """Test cases for AbstractSuccessfulResponse.openapi_response() edge cases."""
+
+    def test_openapi_response_requires_examples(self) -> None:
+        """Test that openapi_response raises SchemaError if no examples found."""
+
+        # Create a class without examples
+        class NoExamplesResponse(AbstractSuccessfulResponse):
+            """Class without examples."""
+
+            field: str = "test"
+            model_config: dict[str, Any] = {"json_schema_extra": {}}
+
+        with pytest.raises(SchemaError, match="Examples not found"):
+            NoExamplesResponse.openapi_response()
+
+    def test_openapi_response_structure(self) -> None:
+        """Test that openapi_response returns correct structure."""
+        result = ModelsResponse.openapi_response()
+        assert "description" in result
+        assert "model" in result
+        assert "content" in result
+        assert result["description"] == "Successful response"
+        assert result["model"] == ModelsResponse
+        assert "application/json" in result["content"]
+        assert "example" in result["content"]["application/json"]
diff --git a/tests/unit/models/responses/test_unauthorized_response.py b/tests/unit/models/responses/test_unauthorized_response.py
deleted file mode 100644
index 278f46ca..00000000
--- a/tests/unit/models/responses/test_unauthorized_response.py
+++ /dev/null
@@ -1,21 +0,0 @@
-"""Unit tests for UnauthorizedResponse model."""
-
-from models.responses import UnauthorizedResponse, DetailModel
-
-
-class TestUnauthorizedResponse:
-    """Test cases for the UnauthorizedResponse model."""
-
-    def test_constructor_without_user_id(self) -> None:
-        """Test UnauthorizedResponse when user_id is not provided."""
-        ur = UnauthorizedResponse()
-        assert isinstance(ur.detail, DetailModel)
-        assert ur.detail.response == "Unauthorized"
-        assert ur.detail.cause == "Missing or invalid credentials provided by client"
-
-    def test_constructor_with_user_id(self) -> None:
-        """Test UnauthorizedResponse when user_id is provided."""
-        ur = UnauthorizedResponse(user_id="user_123")
-        assert isinstance(ur.detail, DetailModel)
-        assert ur.detail.response == "Unauthorized"
-        assert ur.detail.cause == "User user_123 is unauthorized"

From f658cd44b28cc6098d65a7553af9e376e9fd255c Mon Sep 17 00:00:00 2001
From: Andrej Simurka <asimurka@asimurka-thinkpadp1gen7.tpb.csb>
Date: Sun, 23 Nov 2025 20:09:13 +0100
Subject: [PATCH 2/4] Modified exception raising in endpoints

---
 docs/openapi.json                             | 4355 ++++++++++++++---
 src/app/endpoints/authorized.py               |   24 +-
 src/app/endpoints/config.py                   |   58 +-
 src/app/endpoints/conversations.py            |  255 +-
 src/app/endpoints/conversations_v2.py         |  209 +-
 src/app/endpoints/feedback.py                 |   89 +-
 src/app/endpoints/health.py                   |   46 +-
 src/app/endpoints/info.py                     |   33 +-
 src/app/endpoints/metrics.py                  |   24 +-
 src/app/endpoints/models.py                   |   61 +-
 src/app/endpoints/providers.py                |  195 +-
 src/app/endpoints/query.py                    |  148 +-
 src/app/endpoints/query_v2.py                 |   96 +-
 src/app/endpoints/rags.py                     |  118 +-
 src/app/endpoints/root.py                     |   13 +-
 src/app/endpoints/shields.py                  |   48 +-
 src/app/endpoints/streaming_query.py          |  130 +-
 src/app/endpoints/streaming_query_v2.py       |   50 +-
 src/app/endpoints/tools.py                    |  215 +-
 src/app/main.py                               |   23 +-
 src/utils/endpoints.py                        |   84 +-
 .../features/authorized_noop_token.feature    |   27 +-
 tests/e2e/features/conversations.feature      |   28 +-
 tests/e2e/features/environment.py             |   13 +-
 tests/e2e/features/feedback.feature           |   23 +-
 tests/e2e/features/info.feature               |    8 +-
 tests/e2e/features/query.feature              |   15 +-
 tests/e2e/features/streaming_query.feature    |    9 +-
 .../endpoints/test_config_integration.py      |   26 +-
 .../endpoints/test_health_integration.py      |   31 +-
 .../endpoints/test_info_integration.py        |    4 +-
 .../test_middleware_integration.py            |   34 +
 tests/integration/test_openapi_json.py        |  122 +-
 tests/unit/app/endpoints/test_authorized.py   |   20 +-
 tests/unit/app/endpoints/test_config.py       |   54 +-
 .../unit/app/endpoints/test_conversations.py  |  342 +-
 .../app/endpoints/test_conversations_v2.py    |  179 +-
 tests/unit/app/endpoints/test_feedback.py     |   97 +-
 tests/unit/app/endpoints/test_health.py       |   19 +-
 tests/unit/app/endpoints/test_info.py         |   13 +-
 tests/unit/app/endpoints/test_metrics.py      |    6 +-
 tests/unit/app/endpoints/test_models.py       |   88 +-
 tests/unit/app/endpoints/test_providers.py    |  156 +-
 tests/unit/app/endpoints/test_query.py        |   62 +-
 tests/unit/app/endpoints/test_query_v2.py     |   25 +-
 tests/unit/app/endpoints/test_rags.py         |  110 +-
 tests/unit/app/endpoints/test_shields.py      |  114 +-
 .../app/endpoints/test_streaming_query.py     |   76 +-
 .../app/endpoints/test_streaming_query_v2.py  |   13 +-
 tests/unit/app/endpoints/test_tools.py        |  185 +-
 tests/unit/app/test_main_middleware.py        |   74 +
 tests/unit/utils/test_endpoints.py            |   15 +-
 52 files changed, 5664 insertions(+), 2598 deletions(-)
 create mode 100644 tests/integration/test_middleware_integration.py
 create mode 100644 tests/unit/app/test_main_middleware.py

diff --git a/docs/openapi.json b/docs/openapi.json
index ce044525..1bbeb357 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -40,6 +40,58 @@
                                 }
                             }
                         }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            },
+                            "text/html": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                }
+                            },
+                            "text/html": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                }
+                            }
+                        }
                     }
                 }
             }
@@ -54,23 +106,134 @@
                 "operationId": "info_endpoint_handler_v1_info_get",
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/InfoResponse"
+                                },
+                                "example": {
+                                    "llama_stack_version": "1.0.0",
+                                    "name": "Lightspeed Stack",
+                                    "service_version": "1.0.0"
                                 }
                             }
-                        },
-                        "name": "Service name",
-                        "service_version": "Service version",
-                        "llama_stack_version": "Llama Stack version"
+                        }
                     },
-                    "500": {
-                        "description": "Internal Server Error",
-                        "detail": {
-                            "response": "Unable to connect to Llama Stack",
-                            "cause": "Connection error."
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "expired token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Token has expired",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "invalid signature": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Invalid token signature",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "invalid key": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Token signed by unknown key",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing claim": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Token missing claim: user_id",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "invalid k8s token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Invalid or expired Kubernetes token",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "invalid jwk token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Authentication key server returned invalid data",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "503": {
+                        "description": "Service unavailable",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
                         }
                     }
                 }
@@ -86,39 +249,115 @@
                 "operationId": "models_endpoint_handler_v1_models_get",
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ModelsResponse"
+                                },
+                                "example": {
+                                    "models": [
+                                        {
+                                            "api_model_type": "llm",
+                                            "identifier": "openai/gpt-4-turbo",
+                                            "metadata": {},
+                                            "model_type": "llm",
+                                            "provider_id": "openai",
+                                            "provider_resource_id": "gpt-4-turbo",
+                                            "type": "model"
+                                        }
+                                    ]
                                 }
                             }
-                        },
-                        "models": [
-                            {
-                                "identifier": "all-MiniLM-L6-v2",
-                                "metadata": {
-                                    "embedding_dimension": 384
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
                                 },
-                                "api_model_type": "embedding",
-                                "provider_id": "ollama",
-                                "provider_resource_id": "all-minilm:latest",
-                                "type": "model",
-                                "model_type": "embedding"
-                            },
-                            {
-                                "identifier": "llama3.2:3b-instruct-fp16",
-                                "metadata": {},
-                                "api_model_type": "llm",
-                                "provider_id": "ollama",
-                                "provider_resource_id": "llama3.2:3b-instruct-fp16",
-                                "type": "model",
-                                "model_type": "llm"
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
                             }
-                        ]
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
                     },
                     "500": {
-                        "description": "Connection to Llama Stack is broken"
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "503": {
+                        "description": "Service unavailable",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
                     }
                 }
             }
@@ -133,7 +372,7 @@
                 "operationId": "tools_endpoint_handler_v1_tools_get",
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -142,20 +381,19 @@
                                 "example": {
                                     "tools": [
                                         {
-                                            "identifier": "",
-                                            "description": "",
+                                            "description": "Read contents of a file from the filesystem",
+                                            "identifier": "filesystem_read",
                                             "parameters": [
                                                 {
-                                                    "name": "",
-                                                    "description": "",
-                                                    "parameter_type": "",
-                                                    "required": "True/False",
-                                                    "default": "null"
+                                                    "description": "Path to the file to read",
+                                                    "name": "path",
+                                                    "parameter_type": "string",
+                                                    "required": true
                                                 }
                                             ],
-                                            "provider_id": "",
-                                            "toolgroup_id": "",
-                                            "server_source": "",
+                                            "provider_id": "model-context-protocol",
+                                            "server_source": "http://localhost:3000",
+                                            "toolgroup_id": "filesystem-tools",
                                             "type": "tool"
                                         }
                                     ]
@@ -163,8 +401,93 @@
                             }
                         }
                     },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
                     "500": {
-                        "description": "Connection to Llama Stack is broken or MCP server error"
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "503": {
+                        "description": "Service unavailable",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
                     }
                 }
             }
@@ -179,26 +502,113 @@
                 "operationId": "shields_endpoint_handler_v1_shields_get",
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ShieldsResponse"
+                                },
+                                "example": {
+                                    "shields": [
+                                        {
+                                            "identifier": "lightspeed_question_validity-shield",
+                                            "params": {},
+                                            "provider_id": "lightspeed_question_validity",
+                                            "provider_resource_id": "lightspeed_question_validity-shield",
+                                            "type": "shield"
+                                        }
+                                    ]
                                 }
                             }
-                        },
-                        "shields": [
-                            {
-                                "identifier": "lightspeed_question_validity-shield",
-                                "provider_resource_id": "lightspeed_question_validity-shield",
-                                "provider_id": "lightspeed_question_validity",
-                                "type": "shield",
-                                "params": {}
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
                             }
-                        ]
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
                     },
                     "500": {
-                        "description": "Connection to Llama Stack is broken"
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "503": {
+                        "description": "Service unavailable",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
                     }
                 }
             }
@@ -209,49 +619,126 @@
                     "providers"
                 ],
                 "summary": "Providers Endpoint Handler",
-                "description": "Handle GET requests to list all available providers.\n\nRetrieves providers from the Llama Stack service, groups them by API type.\n\nRaises:\n    HTTPException:\n        - 500 if configuration is not loaded,\n        - 500 if unable to connect to Llama Stack,\n        - 500 for any unexpected retrieval errors.\n\nReturns:\n    ProvidersListResponse: Object mapping API types to lists of providers.",
+                "description": "List all available providers grouped by API type.\n\nReturns:\n    ProvidersListResponse: Mapping from API type to list of providers.\n\nRaises:\n    HTTPException:\n        - 401: Authentication failed\n        - 403: Authorization failed\n        - 500: Lightspeed Stack configuration not loaded\n        - 503: Unable to connect to Llama Stack",
                 "operationId": "providers_endpoint_handler_v1_providers_get",
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ProvidersListResponse"
+                                },
+                                "example": {
+                                    "providers": {
+                                        "agents": [
+                                            {
+                                                "provider_id": "meta-reference",
+                                                "provider_type": "inline::meta-reference"
+                                            }
+                                        ],
+                                        "inference": [
+                                            {
+                                                "provider_id": "sentence-transformers",
+                                                "provider_type": "inline::sentence-transformers"
+                                            },
+                                            {
+                                                "provider_id": "openai",
+                                                "provider_type": "remote::openai"
+                                            }
+                                        ]
+                                    }
                                 }
                             }
-                        },
-                        "providers": {
-                            "agents": [
-                                {
-                                    "provider_id": "meta-reference",
-                                    "provider_type": "inline::meta-reference"
-                                }
-                            ],
-                            "datasetio": [
-                                {
-                                    "provider_id": "huggingface",
-                                    "provider_type": "remote::huggingface"
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
                                 },
-                                {
-                                    "provider_id": "localfs",
-                                    "provider_type": "inline::localfs"
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
                                 }
-                            ],
-                            "inference": [
-                                {
-                                    "provider_id": "sentence-transformers",
-                                    "provider_type": "inline::sentence-transformers"
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
                                 },
-                                {
-                                    "provider_id": "openai",
-                                    "provider_type": "remote::openai"
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
                                 }
-                            ]
+                            }
                         }
                     },
                     "500": {
-                        "description": "Connection to Llama Stack is broken"
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "503": {
+                        "description": "Service unavailable",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
                     }
                 }
             }
@@ -262,7 +749,7 @@
                     "providers"
                 ],
                 "summary": "Get Provider Endpoint Handler",
-                "description": "Retrieve a single provider by its unique ID.\n\nRaises:\n    HTTPException:\n        - 404 if provider with the given ID is not found,\n        - 500 if unable to connect to Llama Stack,\n        - 500 for any unexpected retrieval errors.\n\nReturns:\n    ProviderResponse: A single provider's details including API, config, health,\n    provider_id, and provider_type.",
+                "description": "Retrieve a single provider by its unique ID.\n\nReturns:\n    ProviderResponse: Provider details.\n\nRaises:\n    HTTPException:\n        - 401: Authentication failed\n        - 403: Authorization failed\n        - 404: Provider not found\n        - 500: Lightspeed Stack configuration not loaded\n        - 503: Unable to connect to Llama Stack",
                 "operationId": "get_provider_endpoint_handler_v1_providers__provider_id__get",
                 "parameters": [
                     {
@@ -277,33 +764,134 @@
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ProviderResponse"
+                                },
+                                "example": {
+                                    "api": "inference",
+                                    "config": {
+                                        "api_key": "********"
+                                    },
+                                    "health": {
+                                        "message": "Healthy",
+                                        "status": "OK"
+                                    },
+                                    "provider_id": "openai",
+                                    "provider_type": "remote::openai"
                                 }
                             }
-                        },
-                        "api": "inference",
-                        "config": {
-                            "api_key": "********"
-                        },
-                        "health": {
-                            "status": "Not Implemented",
-                            "message": "Provider does not implement health check"
-                        },
-                        "provider_id": "openai",
-                        "provider_type": "remote::openai"
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                }
+                            }
+                        }
                     },
                     "404": {
-                        "response": "Provider with given id not found",
-                        "description": "Not Found"
+                        "description": "Resource not found",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "provider": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Provider with ID openai does not exist",
+                                                "response": "Provider not found"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/NotFoundResponse"
+                                }
+                            }
+                        }
                     },
                     "500": {
-                        "response": "Unable to retrieve list of providers",
-                        "cause": "Connection to Llama Stack is broken",
-                        "description": "Internal Server Error"
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                }
+                            }
+                        }
+                    },
+                    "503": {
+                        "description": "Service unavailable",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                }
+                            }
+                        }
                     },
                     "422": {
                         "description": "Validation Error",
@@ -324,26 +912,113 @@
                     "rags"
                 ],
                 "summary": "Rags Endpoint Handler",
-                "description": "Handle GET requests to list all available RAGs.\n\nRetrieves RAGs from the Llama Stack service.\n\nRaises:\n    HTTPException:\n        - 500 if configuration is not loaded,\n        - 500 if unable to connect to Llama Stack,\n        - 500 for any unexpected retrieval errors.\n\nReturns:\n    RAGListResponse: List of RAGs.",
+                "description": "List all available RAGs.\n\nReturns:\n    RAGListResponse: List of RAG identifiers.\n\nRaises:\n    HTTPException:\n        - 401: Authentication failed\n        - 403: Authorization failed\n        - 500: Lightspeed Stack configuration not loaded\n        - 503: Unable to connect to Llama Stack",
                 "operationId": "rags_endpoint_handler_v1_rags_get",
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/RAGListResponse"
+                                },
+                                "example": {
+                                    "rags": [
+                                        "vs_00000000-cafe-babe-0000-000000000000",
+                                        "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
+                                        "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3"
+                                    ]
                                 }
                             }
-                        },
-                        "rags": [
-                            "vs_00000000-cafe-babe-0000-000000000000",
-                            "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
-                            "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3"
-                        ]
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
                     },
                     "500": {
-                        "description": "Connection to Llama Stack is broken"
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "503": {
+                        "description": "Service unavailable",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
                     }
                 }
             }
@@ -354,7 +1029,7 @@
                     "rags"
                 ],
                 "summary": "Get Rag Endpoint Handler",
-                "description": "Retrieve a single RAG by its unique ID.\n\nRaises:\n    HTTPException:\n        - 404 if RAG with the given ID is not found,\n        - 500 if unable to connect to Llama Stack,\n        - 500 for any unexpected retrieval errors.\n\nReturns:\n    RAGInfoResponse: A single RAG's details",
+                "description": "Retrieve a single RAG by its unique ID.\n\nReturns:\n    RAGInfoResponse: A single RAG's details.\n\nRaises:\n    HTTPException:\n        - 401: Authentication failed\n        - 403: Authorization failed\n        - 404: RAG with the given ID not found\n        - 500: Lightspeed Stack configuration not loaded\n        - 503: Unable to connect to Llama Stack",
                 "operationId": "get_rag_endpoint_handler_v1_rags__rag_id__get",
                 "parameters": [
                     {
@@ -369,23 +1044,131 @@
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/RAGInfoResponse"
+                                },
+                                "example": {
+                                    "created_at": 1763391371,
+                                    "id": "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
+                                    "last_active_at": 1763391371,
+                                    "name": "Faiss Store with Knowledge base",
+                                    "object": "vector_store",
+                                    "status": "completed",
+                                    "usage_bytes": 1024000
+                                }
+                            }
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                }
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "Resource not found",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "rag": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Rag with ID vs_7b52a8cf-0fa3-489c-beab-27e061d102f3 does not exist",
+                                                "response": "Rag not found"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/NotFoundResponse"
+                                }
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
-                                    "$ref": "#/components/schemas/RAGInfoResponse"
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
                                 }
                             }
                         }
                     },
-                    "404": {
-                        "response": "RAG with given id not found",
-                        "description": "Not Found"
-                    },
-                    "500": {
-                        "response": "Unable to retrieve list of RAGs",
-                        "cause": "Connection to Llama Stack is broken",
-                        "description": "Internal Server Error"
+                    "503": {
+                        "description": "Service unavailable",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                }
+                            }
+                        }
                     },
                     "422": {
                         "description": "Validation Error",
@@ -420,66 +1203,290 @@
                 },
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/QueryResponse"
+                                },
+                                "example": {
+                                    "available_quotas": {
+                                        "daily": 1000,
+                                        "monthly": 50000
+                                    },
+                                    "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                                    "input_tokens": 150,
+                                    "output_tokens": 75,
+                                    "rag_chunks": [
+                                        {
+                                            "content": "OLM is a component of the Operator Framework toolkit...",
+                                            "score": 0.95,
+                                            "source": "kubernetes-docs/operators.md"
+                                        }
+                                    ],
+                                    "referenced_documents": [
+                                        {
+                                            "doc_title": "Operator Lifecycle Manager (OLM)",
+                                            "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html"
+                                        }
+                                    ],
+                                    "response": "Operator Lifecycle Manager (OLM) helps users install...",
+                                    "tool_calls": [
+                                        {
+                                            "arguments": {
+                                                "query": "operator lifecycle manager"
+                                            },
+                                            "result": {
+                                                "chunks_found": 5
+                                            },
+                                            "tool_name": "knowledge_search"
+                                        }
+                                    ],
+                                    "truncated": false
                                 }
                             }
-                        },
-                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                        "response": "LLM answer",
-                        "referenced_documents": [
-                            {
-                                "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html",
-                                "doc_title": "Operator Lifecycle Manager (OLM)"
-                            }
-                        ]
+                        }
                     },
-                    "400": {
-                        "description": "Missing or invalid credentials provided by client",
+                    "401": {
+                        "description": "Unauthorized",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
                     "403": {
-                        "description": "Client does not have permission to access conversation",
+                        "description": "Permission denied",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "conversation read": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000",
+                                                "response": "User does not have permission to perform this action"
+                                            }
+                                        }
+                                    },
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    },
+                                    "model override": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User lacks model_override permission required to override model/provider.",
+                                                "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request."
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "Resource not found",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/NotFoundResponse"
+                                },
+                                "examples": {
+                                    "conversation": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
+                                                "response": "Conversation not found"
+                                            }
+                                        }
+                                    },
+                                    "provider": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Provider with ID openai does not exist",
+                                                "response": "Provider not found"
+                                            }
+                                        }
+                                    },
+                                    "model": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Model with ID gpt-4-turbo is not configured",
+                                                "response": "Model not found"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "422": {
+                        "description": "Request validation failed",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnprocessableEntityResponse"
+                                },
+                                "examples": {
+                                    "invalid format": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Invalid request format. The request body could not be parsed.",
+                                                "response": "Invalid request format"
+                                            }
+                                        }
+                                    },
+                                    "missing attributes": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Missing required attributes: ['query', 'model', 'provider']",
+                                                "response": "Missing required attributes"
+                                            }
+                                        }
+                                    },
+                                    "invalid value": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Invalid attatchment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
+                                                "response": "Invalid attribute value"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
                     "429": {
-                        "description": "The quota has been exceeded",
+                        "description": "Quota limit exceeded",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/QuotaExceededResponse"
+                                },
+                                "examples": {
+                                    "model": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "The token quota for model gpt-4-turbo has been exceeded.",
+                                                "response": "The model quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "user none": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 123 has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "cluster none": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Cluster has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "subject none": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Unknown subject 999 has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "user insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 123 has 5 tokens, but 10 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "cluster insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Cluster has 500 tokens, but 900 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "subject insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
-                        "detail": {
-                            "response": "Unable to connect to Llama Stack",
-                            "cause": "Connection error."
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
                         }
                     },
-                    "422": {
-                        "description": "Validation Error",
+                    "503": {
+                        "description": "Service unavailable",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/HTTPValidationError"
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
@@ -507,145 +1514,420 @@
                 },
                 "responses": {
                     "200": {
-                        "description": "Streaming response with Server-Sent Events",
+                        "description": "Streaming response (Server-Sent Events)",
                         "content": {
                             "application/json": {
-                                "schema": {
-                                    "type": "string",
-                                    "example": "data: {\"event\": \"start\", \"data\": {\"conversation_id\": \"123e4567-e89b-12d3-a456-426614174000\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 0, \"token\": \"Hello\"}}\n\ndata: {\"event\": \"end\", \"data\": {\"referenced_documents\": [], \"truncated\": null, \"input_tokens\": 0, \"output_tokens\": 0}, \"available_quotas\": {}}\n\n"
-                                }
+                                "schema": {}
                             },
-                            "text/plain": {
+                            "text/event-stream": {
                                 "schema": {
-                                    "type": "string",
-                                    "example": "Hello world!\n\n---\n\nReference: https://example.com/doc"
-                                }
+                                    "type": "string"
+                                },
+                                "example": "data: {\"event\": \"start\", \"data\": {\"conversation_id\": \"123e4567-e89b-12d3-a456-426614174000\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 0, \"token\": \"Hello\"}}\n\ndata: {\"event\": \"end\", \"data\": {\"referenced_documents\": [], \"truncated\": null, \"input_tokens\": 0, \"output_tokens\": 0}, \"available_quotas\": {}}\n\n"
                             }
                         }
                     },
-                    "400": {
-                        "description": "Missing or invalid credentials provided by client",
+                    "401": {
+                        "description": "Unauthorized",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized: Invalid or missing Bearer token for k8s auth",
+                    "403": {
+                        "description": "Permission denied",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "conversation read": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000",
+                                                "response": "User does not have permission to perform this action"
+                                            }
+                                        }
+                                    },
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    },
+                                    "model override": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User lacks model_override permission required to override model/provider.",
+                                                "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request."
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
-                    "403": {
-                        "description": "Client does not have permission to access conversation",
+                    "404": {
+                        "description": "Resource not found",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                    "$ref": "#/components/schemas/NotFoundResponse"
+                                },
+                                "examples": {
+                                    "conversation": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
+                                                "response": "Conversation not found"
+                                            }
+                                        }
+                                    },
+                                    "provider": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Provider with ID openai does not exist",
+                                                "response": "Provider not found"
+                                            }
+                                        }
+                                    },
+                                    "model": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Model with ID gpt-4-turbo is not configured",
+                                                "response": "Model not found"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "422": {
+                        "description": "Request validation failed",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnprocessableEntityResponse"
+                                },
+                                "examples": {
+                                    "invalid format": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Invalid request format. The request body could not be parsed.",
+                                                "response": "Invalid request format"
+                                            }
+                                        }
+                                    },
+                                    "missing attributes": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Missing required attributes: ['query', 'model', 'provider']",
+                                                "response": "Missing required attributes"
+                                            }
+                                        }
+                                    },
+                                    "invalid value": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Invalid attatchment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
+                                                "response": "Invalid attribute value"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
                     "429": {
-                        "description": "The quota has been exceeded",
+                        "description": "Quota limit exceeded",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/QuotaExceededResponse"
+                                },
+                                "examples": {
+                                    "model": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "The token quota for model gpt-4-turbo has been exceeded.",
+                                                "response": "The model quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "user none": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 123 has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "cluster none": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Cluster has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "subject none": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Unknown subject 999 has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "user insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 123 has 5 tokens, but 10 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "cluster insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Cluster has 500 tokens, but 900 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "subject insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
-                        "detail": {
-                            "response": "Unable to connect to Llama Stack",
-                            "cause": "Connection error."
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
                         }
                     },
-                    "422": {
-                        "description": "Validation Error",
+                    "503": {
+                        "description": "Service unavailable",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/HTTPValidationError"
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/config": {
+            "get": {
+                "tags": [
+                    "config"
+                ],
+                "summary": "Config Endpoint Handler",
+                "description": "Handle requests to the /config endpoint.\n\nProcess GET requests to the /config endpoint and returns the\ncurrent service configuration.\n\nReturns:\n    ConfigurationResponse: The loaded service configuration response.",
+                "operationId": "config_endpoint_handler_v1_config_get",
+                "responses": {
+                    "200": {
+                        "description": "Successful response",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ConfigurationResponse"
+                                },
+                                "example": {
+                                    "configuration": {
+                                        "authentication": {
+                                            "module": "noop",
+                                            "skip_tls_verification": false
+                                        },
+                                        "authorization": {
+                                            "access_rules": []
+                                        },
+                                        "byok_rag": [],
+                                        "conversation_cache": {},
+                                        "database": {
+                                            "sqlite": {
+                                                "db_path": "/tmp/lightspeed-stack.db"
+                                            }
+                                        },
+                                        "inference": {
+                                            "default_model": "gpt-4-turbo",
+                                            "default_provider": "openai"
+                                        },
+                                        "llama_stack": {
+                                            "api_key": "*****",
+                                            "url": "http://localhost:8321",
+                                            "use_as_library_client": false
+                                        },
+                                        "mcp_servers": [
+                                            {
+                                                "name": "server1",
+                                                "provider_id": "provider1",
+                                                "url": "http://url.com:1"
+                                            }
+                                        ],
+                                        "name": "lightspeed-stack",
+                                        "quota_handlers": {
+                                            "enable_token_history": false,
+                                            "limiters": [],
+                                            "scheduler": {
+                                                "period": 1
+                                            }
+                                        },
+                                        "service": {
+                                            "access_log": true,
+                                            "auth_enabled": false,
+                                            "color_log": true,
+                                            "cors": {
+                                                "allow_credentials": false,
+                                                "allow_headers": [
+                                                    "*"
+                                                ],
+                                                "allow_methods": [
+                                                    "*"
+                                                ],
+                                                "allow_origins": [
+                                                    "*"
+                                                ]
+                                            },
+                                            "host": "localhost",
+                                            "port": 8080,
+                                            "tls_config": {},
+                                            "workers": 1
+                                        },
+                                        "user_data_collection": {
+                                            "feedback_enabled": true,
+                                            "feedback_storage": "/tmp/data/feedback",
+                                            "transcripts_enabled": false,
+                                            "transcripts_storage": "/tmp/data/transcripts"
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
-                    }
-                }
-            }
-        },
-        "/v1/config": {
-            "get": {
-                "tags": [
-                    "config"
-                ],
-                "summary": "Config Endpoint Handler",
-                "description": "Handle requests to the /config endpoint.\n\nProcess GET requests to the /config endpoint and returns the\ncurrent service configuration.\n\nReturns:\n    Configuration: The loaded service configuration object.",
-                "operationId": "config_endpoint_handler_v1_config_get",
-                "responses": {
-                    "200": {
-                        "description": "Successful Response",
+                    },
+                    "500": {
+                        "description": "Internal server error",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/Configuration"
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
                                 }
                             }
-                        },
-                        "name": "foo bar baz",
-                        "service": {
-                            "host": "localhost",
-                            "port": 8080,
-                            "auth_enabled": false,
-                            "workers": 1,
-                            "color_log": true,
-                            "access_log": true,
-                            "tls_config": {
-                                "tls_certificate_path": "config/certificate.crt",
-                                "tls_key_path": "config/private.key"
-                            }
-                        },
-                        "llama_stack": {
-                            "url": "http://localhost:8321",
-                            "api_key": "*****",
-                            "use_as_library_client": false
-                        },
-                        "user_data_collection": {
-                            "feedback_enabled": true,
-                            "feedback_storage": "/tmp/data/feedback",
-                            "transcripts_enabled": false
-                        },
-                        "mcp_servers": [
-                            {
-                                "name": "server1",
-                                "provider_id": "provider1",
-                                "url": "http://url.com:1"
-                            },
-                            {
-                                "name": "server2",
-                                "provider_id": "provider2",
-                                "url": "http://url.com:2"
-                            },
-                            {
-                                "name": "server3",
-                                "provider_id": "provider3",
-                                "url": "http://url.com:3"
-                            }
-                        ]
-                    },
-                    "503": {
-                        "description": "Service Unavailable",
-                        "detail": {
-                            "response": "Configuration is not loaded"
                         }
                     }
                 }
@@ -671,41 +1953,118 @@
                 },
                 "responses": {
                     "200": {
-                        "description": "Feedback received and stored",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/FeedbackResponse"
+                                },
+                                "example": {
+                                    "response": "feedback received"
                                 }
                             }
                         }
                     },
                     "401": {
-                        "description": "Missing or invalid credentials provided by client",
+                        "description": "Unauthorized",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
                     "403": {
-                        "description": "Client does not have permission to access resource",
+                        "description": "Permission denied",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    },
+                                    "feedback": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Storing feedback is disabled.",
+                                                "response": "Storing feedback is disabled."
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "404": {
+                        "description": "Resource not found",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/NotFoundResponse"
+                                },
+                                "examples": {
+                                    "conversation": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
+                                                "response": "Conversation not found"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
                     "500": {
-                        "description": "User feedback can not be stored",
+                        "description": "Internal server error",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/ErrorResponse"
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    },
+                                    "feedback storage": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Failed to store feedback at directory: /path/example",
+                                                "response": "Failed to store feedback"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
@@ -733,11 +2092,17 @@
                 "operationId": "feedback_status_v1_feedback_status_get",
                 "responses": {
                     "200": {
-                        "description": "Feedback status successfully retrieved",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/StatusResponse"
+                                },
+                                "example": {
+                                    "functionality": "feedback",
+                                    "status": {
+                                        "enabled": true
+                                    }
                                 }
                             }
                         }
@@ -763,31 +2128,87 @@
                 },
                 "responses": {
                     "200": {
-                        "description": "Feedback status successfully updated",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/FeedbackStatusUpdateResponse"
+                                },
+                                "example": {
+                                    "status": {
+                                        "previous_status": true,
+                                        "timestamp": "2023-03-15 12:34:56",
+                                        "updated_by": "user/test",
+                                        "updated_status": false
+                                    }
                                 }
                             }
                         }
                     },
                     "401": {
-                        "description": "Missing or invalid credentials provided by client",
+                        "description": "Unauthorized",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
                     "403": {
-                        "description": "Client does not have permission to access resource",
+                        "description": "Permission denied",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
@@ -815,21 +2236,108 @@
                 "operationId": "get_conversations_list_endpoint_handler_v1_conversations_get",
                 "responses": {
                     "200": {
-                        "description": "List of conversations retrieved successfully",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ConversationsListResponse"
+                                },
+                                "example": {
+                                    "conversations": [
+                                        {
+                                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                                            "created_at": "2024-01-01T00:00:00Z",
+                                            "last_message_at": "2024-01-01T00:05:00Z",
+                                            "last_used_model": "gemini/gemini-2.0-flash",
+                                            "last_used_provider": "gemini",
+                                            "message_count": 5,
+                                            "topic_summary": "Openshift Microservices Deployment Strategies"
+                                        },
+                                        {
+                                            "conversation_id": "456e7890-e12b-34d5-a678-901234567890",
+                                            "created_at": "2024-01-01T01:00:00Z",
+                                            "last_used_model": "gemini/gemini-2.5-flash",
+                                            "last_used_provider": "gemini",
+                                            "message_count": 2,
+                                            "topic_summary": "RHDH Purpose Summary"
+                                        }
+                                    ]
                                 }
                             }
                         }
                     },
                     "401": {
-                        "description": "Unauthorized: Invalid or missing Bearer token",
+                        "description": "Unauthorized",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    },
+                                    "database": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Failed to query the database",
+                                                "response": "Database query failed"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
@@ -840,6 +2348,16 @@
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
@@ -868,19 +2386,48 @@
                 ],
                 "responses": {
                     "200": {
-                        "description": "Conversation retrieved successfully",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ConversationResponse"
+                                },
+                                "example": {
+                                    "chat_history": [
+                                        {
+                                            "completed_at": "2024-01-01T00:01:05Z",
+                                            "messages": [
+                                                {
+                                                    "content": "Hello",
+                                                    "type": "user"
+                                                },
+                                                {
+                                                    "content": "Hi there!",
+                                                    "type": "assistant"
+                                                }
+                                            ],
+                                            "started_at": "2024-01-01T00:01:00Z"
+                                        }
+                                    ],
+                                    "conversation_id": "123e4567-e89b-12d3-a456-426614174000"
                                 }
                             }
                         }
                     },
                     "400": {
-                        "description": "Invalid request",
+                        "description": "Invalid request format",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "conversation_id": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "The conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format.",
+                                                "response": "Invalid conversation ID format"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
                                     "$ref": "#/components/schemas/BadRequestResponse"
                                 }
@@ -888,9 +2435,27 @@
                         }
                     },
                     "401": {
-                        "description": "Unauthorized: Invalid or missing Bearer token",
+                        "description": "Unauthorized",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
                                     "$ref": "#/components/schemas/UnauthorizedResponse"
                                 }
@@ -898,29 +2463,95 @@
                         }
                     },
                     "403": {
-                        "description": "Client does not have permission to access conversation",
+                        "description": "Permission denied",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "conversation read": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000",
+                                                "response": "User does not have permission to perform this action"
+                                            }
+                                        }
+                                    },
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
-                                    "$ref": "#/components/schemas/AccessDeniedResponse"
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
                                 }
                             }
                         }
                     },
                     "404": {
-                        "description": "Conversation not found",
+                        "description": "Resource not found",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "conversation": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
+                                                "response": "Conversation not found"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
                                     "$ref": "#/components/schemas/NotFoundResponse"
                                 }
                             }
                         }
                     },
+                    "500": {
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    },
+                                    "database": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Failed to query the database",
+                                                "response": "Database query failed"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                }
+                            }
+                        }
+                    },
                     "503": {
                         "description": "Service unavailable",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
                                     "$ref": "#/components/schemas/ServiceUnavailableResponse"
                                 }
@@ -959,19 +2590,45 @@
                 ],
                 "responses": {
                     "200": {
-                        "description": "Conversation deleted successfully",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ConversationDeleteResponse"
+                                },
+                                "examples": {
+                                    "deleted": {
+                                        "value": {
+                                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                                            "response": "Conversation deleted successfully",
+                                            "success": true
+                                        }
+                                    },
+                                    "not found": {
+                                        "value": {
+                                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                                            "response": "Conversation can not be deleted",
+                                            "success": true
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
                     "400": {
-                        "description": "Invalid request",
+                        "description": "Invalid request format",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "conversation_id": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "The conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format.",
+                                                "response": "Invalid conversation ID format"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
                                     "$ref": "#/components/schemas/BadRequestResponse"
                                 }
@@ -979,9 +2636,27 @@
                         }
                     },
                     "401": {
-                        "description": "Unauthorized: Invalid or missing Bearer token",
+                        "description": "Unauthorized",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
                                     "$ref": "#/components/schemas/UnauthorizedResponse"
                                 }
@@ -989,29 +2664,95 @@
                         }
                     },
                     "403": {
-                        "description": "Client does not have permission to access conversation",
+                        "description": "Permission denied",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "conversation delete": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 does not have permission to delete conversation with ID 123e4567-e89b-12d3-a456-426614174000",
+                                                "response": "User does not have permission to perform this action"
+                                            }
+                                        }
+                                    },
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
-                                    "$ref": "#/components/schemas/AccessDeniedResponse"
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
                                 }
                             }
                         }
                     },
                     "404": {
-                        "description": "Conversation not found",
+                        "description": "Resource not found",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "conversation": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
+                                                "response": "Conversation not found"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
                                     "$ref": "#/components/schemas/NotFoundResponse"
                                 }
                             }
                         }
                     },
+                    "500": {
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    },
+                                    "database": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Failed to query the database",
+                                                "response": "Database query failed"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                }
+                            }
+                        }
+                    },
                     "503": {
                         "description": "Service unavailable",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
                                     "$ref": "#/components/schemas/ServiceUnavailableResponse"
                                 }
@@ -1023,39 +2764,117 @@
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/HTTPValidationError"
+                                    "$ref": "#/components/schemas/HTTPValidationError"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v2/conversations": {
+            "get": {
+                "tags": [
+                    "conversations_v2"
+                ],
+                "summary": "Get Conversations List Endpoint Handler",
+                "description": "Handle request to retrieve all conversations for the authenticated user.",
+                "operationId": "get_conversations_list_endpoint_handler_v2_conversations_get",
+                "responses": {
+                    "200": {
+                        "description": "Successful response",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ConversationsListResponseV2"
+                                },
+                                "example": {
+                                    "conversations": [
+                                        {
+                                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                                            "last_message_timestamp": 1704067200.0,
+                                            "topic_summary": "Openshift Microservices Deployment Strategies"
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
-                    }
-                }
-            }
-        },
-        "/v2/conversations": {
-            "get": {
-                "tags": [
-                    "conversations_v2"
-                ],
-                "summary": "Get Conversations List Endpoint Handler",
-                "description": "Handle request to retrieve all conversations for the authenticated user.",
-                "operationId": "get_conversations_list_endpoint_handler_v2_conversations_get",
-                "responses": {
-                    "200": {
-                        "description": "Successful Response",
+                    },
+                    "500": {
+                        "description": "Internal server error",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/ConversationsListResponseV2"
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    },
+                                    "conversation cache": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation cache is not configured or unavailable.",
+                                                "response": "Conversation cache not configured"
+                                            }
+                                        }
+                                    }
                                 }
                             }
-                        },
-                        "conversations": [
-                            {
-                                "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                                "topic_summary": "This is a topic summary",
-                                "last_message_timestamp": "2024-01-01T00:00:00Z"
-                            }
-                        ]
+                        }
                     }
                 }
             }
@@ -1081,60 +2900,149 @@
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ConversationResponse"
+                                },
+                                "example": {
+                                    "chat_history": [
+                                        {
+                                            "completed_at": "2024-01-01T00:01:05Z",
+                                            "messages": [
+                                                {
+                                                    "content": "Hello",
+                                                    "type": "user"
+                                                },
+                                                {
+                                                    "content": "Hi there!",
+                                                    "type": "assistant"
+                                                }
+                                            ],
+                                            "started_at": "2024-01-01T00:01:00Z"
+                                        }
+                                    ],
+                                    "conversation_id": "123e4567-e89b-12d3-a456-426614174000"
                                 }
                             }
-                        },
-                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                        "chat_history": [
-                            {
-                                "messages": [
-                                    {
-                                        "content": "Hi",
-                                        "type": "user"
-                                    },
-                                    {
-                                        "content": "Hello!",
-                                        "type": "assistant"
-                                    }
-                                ],
-                                "started_at": "2024-01-01T00:00:00Z",
-                                "completed_at": "2024-01-01T00:00:05Z",
-                                "provider": "provider ID",
-                                "model": "model ID"
-                            }
-                        ]
+                        }
                     },
                     "400": {
-                        "description": "Missing or invalid credentials provided by client",
+                        "description": "Invalid request format",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "conversation_id": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "The conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format.",
+                                                "response": "Invalid conversation ID format"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
-                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                    "$ref": "#/components/schemas/BadRequestResponse"
                                 }
                             }
                         }
                     },
                     "401": {
-                        "description": "Unauthorized: Invalid or missing Bearer token",
+                        "description": "Unauthorized",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
                                     "$ref": "#/components/schemas/UnauthorizedResponse"
                                 }
                             }
                         }
                     },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                }
+                            }
+                        }
+                    },
                     "404": {
-                        "detail": {
-                            "response": "Conversation not found",
-                            "cause": "The specified conversation ID does not exist."
-                        },
-                        "description": "Not Found"
+                        "description": "Resource not found",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "conversation": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
+                                                "response": "Conversation not found"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/NotFoundResponse"
+                                }
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    },
+                                    "conversation cache": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation cache is not configured or unavailable.",
+                                                "response": "Conversation cache not configured"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                }
+                            }
+                        }
                     },
                     "422": {
                         "description": "Validation Error",
@@ -1168,44 +3076,146 @@
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ConversationDeleteResponse"
+                                },
+                                "examples": {
+                                    "deleted": {
+                                        "value": {
+                                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                                            "response": "Conversation deleted successfully",
+                                            "success": true
+                                        }
+                                    },
+                                    "not found": {
+                                        "value": {
+                                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                                            "response": "Conversation can not be deleted",
+                                            "success": true
+                                        }
+                                    }
                                 }
                             }
-                        },
-                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                        "success": true,
-                        "message": "Conversation deleted successfully"
+                        }
                     },
                     "400": {
-                        "description": "Missing or invalid credentials provided by client",
+                        "description": "Invalid request format",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "conversation_id": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "The conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format.",
+                                                "response": "Invalid conversation ID format"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
-                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                    "$ref": "#/components/schemas/BadRequestResponse"
                                 }
                             }
                         }
                     },
                     "401": {
-                        "description": "Unauthorized: Invalid or missing Bearer token",
+                        "description": "Unauthorized",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
                                     "$ref": "#/components/schemas/UnauthorizedResponse"
                                 }
                             }
                         }
                     },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                }
+                            }
+                        }
+                    },
                     "404": {
-                        "detail": {
-                            "response": "Conversation not found",
-                            "cause": "The specified conversation ID does not exist."
-                        },
-                        "description": "Not Found"
+                        "description": "Resource not found",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "conversation": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
+                                                "response": "Conversation not found"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/NotFoundResponse"
+                                }
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    },
+                                    "conversation cache": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation cache is not configured or unavailable.",
+                                                "response": "Conversation cache not configured"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                }
+                            }
+                        }
                     },
                     "422": {
                         "description": "Validation Error",
@@ -1249,44 +3259,135 @@
                 },
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ConversationUpdateResponse"
+                                },
+                                "example": {
+                                    "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                                    "message": "Topic summary updated successfully",
+                                    "success": true
                                 }
                             }
-                        },
-                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                        "success": true,
-                        "message": "Topic summary updated successfully"
+                        }
                     },
                     "400": {
-                        "description": "Missing or invalid credentials provided by client",
+                        "description": "Invalid request format",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "conversation_id": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "The conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format.",
+                                                "response": "Invalid conversation ID format"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
-                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                    "$ref": "#/components/schemas/BadRequestResponse"
                                 }
                             }
                         }
                     },
                     "401": {
-                        "description": "Unauthorized: Invalid or missing Bearer token",
+                        "description": "Unauthorized",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                },
                                 "schema": {
                                     "$ref": "#/components/schemas/UnauthorizedResponse"
                                 }
                             }
                         }
                     },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                }
+                            }
+                        }
+                    },
                     "404": {
-                        "detail": {
-                            "response": "Conversation not found",
-                            "cause": "The specified conversation ID does not exist."
-                        },
-                        "description": "Not Found"
+                        "description": "Resource not found",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "conversation": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
+                                                "response": "Conversation not found"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/NotFoundResponse"
+                                }
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    },
+                                    "conversation cache": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation cache is not configured or unavailable.",
+                                                "response": "Conversation cache not configured"
+                                            }
+                                        }
+                                    }
+                                },
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                }
+                            }
+                        }
                     },
                     "422": {
                         "description": "Validation Error",
@@ -1321,66 +3422,290 @@
                 },
                 "responses": {
                     "200": {
-                        "description": "Successful Response",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/QueryResponse"
+                                },
+                                "example": {
+                                    "available_quotas": {
+                                        "daily": 1000,
+                                        "monthly": 50000
+                                    },
+                                    "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                                    "input_tokens": 150,
+                                    "output_tokens": 75,
+                                    "rag_chunks": [
+                                        {
+                                            "content": "OLM is a component of the Operator Framework toolkit...",
+                                            "score": 0.95,
+                                            "source": "kubernetes-docs/operators.md"
+                                        }
+                                    ],
+                                    "referenced_documents": [
+                                        {
+                                            "doc_title": "Operator Lifecycle Manager (OLM)",
+                                            "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html"
+                                        }
+                                    ],
+                                    "response": "Operator Lifecycle Manager (OLM) helps users install...",
+                                    "tool_calls": [
+                                        {
+                                            "arguments": {
+                                                "query": "operator lifecycle manager"
+                                            },
+                                            "result": {
+                                                "chunks_found": 5
+                                            },
+                                            "tool_name": "knowledge_search"
+                                        }
+                                    ],
+                                    "truncated": false
+                                }
+                            }
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "conversation read": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000",
+                                                "response": "User does not have permission to perform this action"
+                                            }
+                                        }
+                                    },
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    },
+                                    "model override": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User lacks model_override permission required to override model/provider.",
+                                                "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request."
+                                            }
+                                        }
+                                    }
                                 }
                             }
-                        },
-                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                        "response": "LLM answer",
-                        "referenced_documents": [
-                            {
-                                "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html",
-                                "doc_title": "Operator Lifecycle Manager (OLM)"
-                            }
-                        ]
+                        }
                     },
-                    "400": {
-                        "description": "Missing or invalid credentials provided by client",
+                    "404": {
+                        "description": "Resource not found",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                    "$ref": "#/components/schemas/NotFoundResponse"
+                                },
+                                "examples": {
+                                    "conversation": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
+                                                "response": "Conversation not found"
+                                            }
+                                        }
+                                    },
+                                    "provider": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Provider with ID openai does not exist",
+                                                "response": "Provider not found"
+                                            }
+                                        }
+                                    },
+                                    "model": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Model with ID gpt-4-turbo is not configured",
+                                                "response": "Model not found"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
-                    "403": {
-                        "description": "Client does not have permission to access conversation",
+                    "422": {
+                        "description": "Request validation failed",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                    "$ref": "#/components/schemas/UnprocessableEntityResponse"
+                                },
+                                "examples": {
+                                    "invalid format": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Invalid request format. The request body could not be parsed.",
+                                                "response": "Invalid request format"
+                                            }
+                                        }
+                                    },
+                                    "missing attributes": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Missing required attributes: ['query', 'model', 'provider']",
+                                                "response": "Missing required attributes"
+                                            }
+                                        }
+                                    },
+                                    "invalid value": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Invalid attatchment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
+                                                "response": "Invalid attribute value"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
                     "429": {
-                        "description": "The quota has been exceeded",
+                        "description": "Quota limit exceeded",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/QuotaExceededResponse"
+                                },
+                                "examples": {
+                                    "model": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "The token quota for model gpt-4-turbo has been exceeded.",
+                                                "response": "The model quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "user none": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 123 has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "cluster none": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Cluster has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "subject none": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Unknown subject 999 has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "user insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 123 has 5 tokens, but 10 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "cluster insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Cluster has 500 tokens, but 900 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "subject insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
-                        "detail": {
-                            "response": "Unable to connect to Llama Stack",
-                            "cause": "Connection error."
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
                         }
                     },
-                    "422": {
-                        "description": "Validation Error",
+                    "503": {
+                        "description": "Service unavailable",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/HTTPValidationError"
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
@@ -1424,49 +3749,246 @@
                             }
                         }
                     },
-                    "400": {
-                        "description": "Missing or invalid credentials provided by client",
+                    "401": {
+                        "description": "Unauthorized",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized: Invalid or missing Bearer token for k8s auth",
+                    "403": {
+                        "description": "Permission denied",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "conversation read": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000",
+                                                "response": "User does not have permission to perform this action"
+                                            }
+                                        }
+                                    },
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    },
+                                    "model override": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User lacks model_override permission required to override model/provider.",
+                                                "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request."
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
-                    "403": {
-                        "description": "User is not authorized",
+                    "404": {
+                        "description": "Resource not found",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                    "$ref": "#/components/schemas/NotFoundResponse"
+                                },
+                                "examples": {
+                                    "conversation": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
+                                                "response": "Conversation not found"
+                                            }
+                                        }
+                                    },
+                                    "provider": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Provider with ID openai does not exist",
+                                                "response": "Provider not found"
+                                            }
+                                        }
+                                    },
+                                    "model": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Model with ID gpt-4-turbo is not configured",
+                                                "response": "Model not found"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "422": {
+                        "description": "Request validation failed",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnprocessableEntityResponse"
+                                },
+                                "examples": {
+                                    "invalid format": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Invalid request format. The request body could not be parsed.",
+                                                "response": "Invalid request format"
+                                            }
+                                        }
+                                    },
+                                    "missing attributes": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Missing required attributes: ['query', 'model', 'provider']",
+                                                "response": "Missing required attributes"
+                                            }
+                                        }
+                                    },
+                                    "invalid value": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Invalid attatchment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
+                                                "response": "Invalid attribute value"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "429": {
+                        "description": "Quota limit exceeded",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/QuotaExceededResponse"
+                                },
+                                "examples": {
+                                    "model": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "The token quota for model gpt-4-turbo has been exceeded.",
+                                                "response": "The model quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "user none": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 123 has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "cluster none": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Cluster has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "subject none": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Unknown subject 999 has no available tokens.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "user insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 123 has 5 tokens, but 10 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "cluster insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Cluster has 500 tokens, but 900 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    },
+                                    "subject insufficient": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.",
+                                                "response": "The quota has been exceeded"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
-                        "detail": {
-                            "response": "Unable to connect to Llama Stack",
-                            "cause": "Connection error."
+                        "description": "Internal server error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
+                                },
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
                         }
                     },
-                    "422": {
-                        "description": "Validation Error",
+                    "503": {
+                        "description": "Service unavailable",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/HTTPValidationError"
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
@@ -1484,21 +4006,84 @@
                 "operationId": "readiness_probe_get_method_readiness_get",
                 "responses": {
                     "200": {
-                        "description": "Service is ready",
+                        "description": "Successful response",
                         "content": {
                             "application/json": {
                                 "schema": {
                                     "$ref": "#/components/schemas/ReadinessResponse"
+                                },
+                                "example": {
+                                    "providers": [],
+                                    "ready": true,
+                                    "reason": "Service is ready"
+                                }
+                            }
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
                     },
                     "503": {
-                        "description": "Service is not ready",
+                        "description": "Service unavailable",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/ReadinessResponse"
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
+                                },
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
@@ -1516,11 +4101,137 @@
                 "operationId": "liveness_probe_get_method_liveness_get",
                 "responses": {
                     "200": {
-                        "description": "Service is alive",
+                        "description": "Successful response",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/LivenessResponse"
+                                },
+                                "example": {
+                                    "alive": true
+                                }
+                            }
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/authorized": {
+            "post": {
+                "tags": [
+                    "authorized"
+                ],
+                "summary": "Authorized Endpoint Handler",
+                "description": "Handle request to the /authorized endpoint.\n\nProcess POST requests to the /authorized endpoint, returning\nthe authenticated user's ID and username.\n\nReturns:\n    AuthorizedResponse: Contains the user ID and username of the authenticated user.",
+                "operationId": "authorized_endpoint_handler_authorized_post",
+                "responses": {
+                    "200": {
+                        "description": "Successful response",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/AuthorizedResponse"
+                                },
+                                "example": {
+                                    "skip_userid_check": false,
+                                    "user_id": "123e4567-e89b-12d3-a456-426614174000",
+                                    "username": "user1"
+                                }
+                            }
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                },
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "Permission denied",
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/LivenessResponse"
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                },
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
                                 }
                             }
                         }
@@ -1528,73 +4239,117 @@
                 }
             }
         },
-        "/authorized": {
-            "post": {
+        "/metrics": {
+            "get": {
                 "tags": [
-                    "authorized"
+                    "metrics"
                 ],
-                "summary": "Authorized Endpoint Handler",
-                "description": "Handle request to the /authorized endpoint.\n\nProcess POST requests to the /authorized endpoint, returning\nthe authenticated user's ID and username.\n\nReturns:\n    AuthorizedResponse: Contains the user ID and username of the authenticated user.",
-                "operationId": "authorized_endpoint_handler_authorized_post",
+                "summary": "Metrics Endpoint Handler",
+                "description": "Handle request to the /metrics endpoint.\n\nProcess GET requests to the /metrics endpoint, returning the\nlatest Prometheus metrics in form of a plain text.\n\nInitializes model metrics on the first request if not already\nset up, then responds with the current metrics snapshot in\nPrometheus format.",
+                "operationId": "metrics_endpoint_handler_metrics_get",
                 "responses": {
                     "200": {
-                        "description": "The user is logged-in and authorized to access OLS",
+                        "description": "Successful Response",
                         "content": {
-                            "application/json": {
+                            "text/plain": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/AuthorizedResponse"
+                                    "type": "string"
                                 }
                             }
                         }
                     },
-                    "400": {
-                        "description": "Missing or invalid credentials provided by client for the noop and noop-with-token authentication modules",
+                    "401": {
+                        "description": "Unauthorized",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "missing header": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No Authorization header found",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    },
+                                    "missing token": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "No token found in Authorization header",
+                                                "response": "Missing or invalid credentials provided by client"
+                                            }
+                                        }
+                                    }
+                                }
+                            },
+                            "text/plain": {
                                 "schema": {
                                     "$ref": "#/components/schemas/UnauthorizedResponse"
                                 }
                             }
                         }
                     },
-                    "401": {
-                        "description": "Missing or invalid credentials provided by client for the k8s authentication module",
+                    "403": {
+                        "description": "Permission denied",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "endpoint": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "User 6789 is not authorized to access this endpoint.",
+                                                "response": "User does not have permission to access this endpoint"
+                                            }
+                                        }
+                                    }
+                                }
+                            },
+                            "text/plain": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
                                 }
                             }
                         }
                     },
-                    "403": {
-                        "description": "User is not authorized",
+                    "500": {
+                        "description": "Internal server error",
                         "content": {
                             "application/json": {
+                                "examples": {
+                                    "configuration": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Lightspeed Stack configuration has not been initialized.",
+                                                "response": "Configuration is not loaded"
+                                            }
+                                        }
+                                    }
+                                }
+                            },
+                            "text/plain": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                    "$ref": "#/components/schemas/InternalServerErrorResponse"
                                 }
                             }
                         }
-                    }
-                }
-            }
-        },
-        "/metrics": {
-            "get": {
-                "tags": [
-                    "metrics"
-                ],
-                "summary": "Metrics Endpoint Handler",
-                "description": "Handle request to the /metrics endpoint.\n\nProcess GET requests to the /metrics endpoint, returning the\nlatest Prometheus metrics in form of a plain text.\n\nInitializes model metrics on the first request if not already\nset up, then responds with the current metrics snapshot in\nPrometheus format.",
-                "operationId": "metrics_endpoint_handler_metrics_get",
-                "responses": {
-                    "200": {
-                        "description": "Successful Response",
+                    },
+                    "503": {
+                        "description": "Service unavailable",
                         "content": {
+                            "application/json": {
+                                "examples": {
+                                    "llama stack": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "Connection error while trying to reach backend service.",
+                                                "response": "Unable to connect to Llama Stack"
+                                            }
+                                        }
+                                    }
+                                }
+                            },
                             "text/plain": {
                                 "schema": {
-                                    "type": "string"
+                                    "$ref": "#/components/schemas/ServiceUnavailableResponse"
                                 }
                             }
                         }
@@ -1605,27 +4360,6 @@
     },
     "components": {
         "schemas": {
-            "AccessDeniedResponse": {
-                "properties": {
-                    "detail": {
-                        "$ref": "#/components/schemas/DetailModel"
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "detail"
-                ],
-                "title": "AccessDeniedResponse",
-                "description": "403 Access Denied - User does not have permission to perform the action.",
-                "examples": [
-                    {
-                        "detail": {
-                            "cause": "User 6789 does not have permission to access conversation with ID 123e4567-e89b-12d3-a456-426614174000.",
-                            "response": "Access denied"
-                        }
-                    }
-                ]
-            },
             "AccessRule": {
                 "properties": {
                     "role": {
@@ -1858,22 +4592,28 @@
             },
             "BadRequestResponse": {
                 "properties": {
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
+                    },
                     "detail": {
                         "$ref": "#/components/schemas/DetailModel"
                     }
                 },
                 "type": "object",
                 "required": [
+                    "status_code",
                     "detail"
                 ],
                 "title": "BadRequestResponse",
-                "description": "400 Bad Request - Invalid resource identifier.",
+                "description": "400 Bad Request. Invalid resource identifier.",
                 "examples": [
                     {
                         "detail": {
-                            "cause": "Conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format",
+                            "cause": "The conversation ID 123e4567-e89b-12d3-a456-426614174000 has invalid format.",
                             "response": "Invalid conversation ID format"
-                        }
+                        },
+                        "label": "conversation_id"
                     }
                 ]
             },
@@ -2042,6 +4782,90 @@
                 "title": "Configuration",
                 "description": "Global service configuration."
             },
+            "ConfigurationResponse": {
+                "properties": {
+                    "configuration": {
+                        "$ref": "#/components/schemas/Configuration"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "configuration"
+                ],
+                "title": "ConfigurationResponse",
+                "description": "Success response model for the config endpoint.",
+                "examples": [
+                    {
+                        "configuration": {
+                            "authentication": {
+                                "module": "noop",
+                                "skip_tls_verification": false
+                            },
+                            "authorization": {
+                                "access_rules": []
+                            },
+                            "byok_rag": [],
+                            "conversation_cache": {},
+                            "database": {
+                                "sqlite": {
+                                    "db_path": "/tmp/lightspeed-stack.db"
+                                }
+                            },
+                            "inference": {
+                                "default_model": "gpt-4-turbo",
+                                "default_provider": "openai"
+                            },
+                            "llama_stack": {
+                                "api_key": "*****",
+                                "url": "http://localhost:8321",
+                                "use_as_library_client": false
+                            },
+                            "mcp_servers": [
+                                {
+                                    "name": "server1",
+                                    "provider_id": "provider1",
+                                    "url": "http://url.com:1"
+                                }
+                            ],
+                            "name": "lightspeed-stack",
+                            "quota_handlers": {
+                                "enable_token_history": false,
+                                "limiters": [],
+                                "scheduler": {
+                                    "period": 1
+                                }
+                            },
+                            "service": {
+                                "access_log": true,
+                                "auth_enabled": false,
+                                "color_log": true,
+                                "cors": {
+                                    "allow_credentials": false,
+                                    "allow_headers": [
+                                        "*"
+                                    ],
+                                    "allow_methods": [
+                                        "*"
+                                    ],
+                                    "allow_origins": [
+                                        "*"
+                                    ]
+                                },
+                                "host": "localhost",
+                                "port": 8080,
+                                "tls_config": {},
+                                "workers": 1
+                            },
+                            "user_data_collection": {
+                                "feedback_enabled": true,
+                                "feedback_storage": "/tmp/data/feedback",
+                                "transcripts_enabled": false,
+                                "transcripts_storage": "/tmp/data/transcripts"
+                            }
+                        }
+                    }
+                ]
+            },
             "ConversationData": {
                 "properties": {
                     "conversation_id": {
@@ -2077,15 +4901,29 @@
                 "properties": {
                     "conversation_id": {
                         "type": "string",
-                        "title": "Conversation Id"
+                        "title": "Conversation Id",
+                        "description": "The conversation ID (UUID) that was deleted.",
+                        "examples": [
+                            "123e4567-e89b-12d3-a456-426614174000"
+                        ]
                     },
                     "success": {
                         "type": "boolean",
-                        "title": "Success"
+                        "title": "Success",
+                        "description": "Whether the deletion was successful.",
+                        "examples": [
+                            true,
+                            false
+                        ]
                     },
                     "response": {
                         "type": "string",
-                        "title": "Response"
+                        "title": "Response",
+                        "description": "A message about the deletion result.",
+                        "examples": [
+                            "Conversation deleted successfully",
+                            "Conversation cannot be deleted"
+                        ]
                     }
                 },
                 "type": "object",
@@ -2095,12 +4933,23 @@
                     "response"
                 ],
                 "title": "ConversationDeleteResponse",
-                "description": "Model representing a response for deleting a conversation.\n\nAttributes:\n    conversation_id: The conversation ID (UUID) that was deleted.\n    success: Whether the deletion was successful.\n    response: A message about the deletion result.\n\nExample:\n    ```python\n    delete_response = ConversationDeleteResponse(\n        conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n        success=True,\n        response=\"Conversation deleted successfully\"\n    )\n    ```",
+                "description": "Model representing a response for deleting a conversation.\n\nAttributes:\n    conversation_id: The conversation ID (UUID) that was deleted.\n    success: Whether the deletion was successful.\n    response: A message about the deletion result.",
                 "examples": [
                     {
-                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                        "response": "Conversation deleted successfully",
-                        "success": true
+                        "label": "deleted",
+                        "value": {
+                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                            "response": "Conversation deleted successfully",
+                            "success": true
+                        }
+                    },
+                    {
+                        "label": "not found",
+                        "value": {
+                            "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                            "response": "Conversation can not be deleted",
+                            "success": true
+                        }
                     }
                 ]
             },
@@ -2212,7 +5061,7 @@
                     "conversation_id"
                 ],
                 "title": "ConversationDetails",
-                "description": "Model representing the details of a user conversation.\n\nAttributes:\n    conversation_id: The conversation ID (UUID).\n    created_at: When the conversation was created.\n    last_message_at: When the last message was sent.\n    message_count: Number of user messages in the conversation.\n    last_used_model: The last model used for the conversation.\n    last_used_provider: The provider of the last used model.\n    topic_summary: The topic summary for the conversation.\n\nExample:\n    ```python\n    conversation = ConversationDetails(\n        conversation_id=\"123e4567-e89b-12d3-a456-426614174000\"\n        created_at=\"2024-01-01T00:00:00Z\",\n        last_message_at=\"2024-01-01T00:05:00Z\",\n        message_count=5,\n        last_used_model=\"gemini/gemini-2.0-flash\",\n        last_used_provider=\"gemini\",\n        topic_summary=\"Openshift Microservices Deployment Strategies\",\n    )\n    ```"
+                "description": "Model representing the details of a user conversation.\n\nAttributes:\n    conversation_id: The conversation ID (UUID).\n    created_at: When the conversation was created.\n    last_message_at: When the last message was sent.\n    message_count: Number of user messages in the conversation.\n    last_used_model: The last model used for the conversation.\n    last_used_provider: The provider of the last used model.\n    topic_summary: The topic summary for the conversation."
             },
             "ConversationHistoryConfiguration": {
                 "properties": {
@@ -2311,7 +5160,7 @@
                     "chat_history"
                 ],
                 "title": "ConversationResponse",
-                "description": "Model representing a response for retrieving a conversation.\n\nAttributes:\n    conversation_id: The conversation ID (UUID).\n    chat_history: The simplified chat history as a list of conversation turns.\n\nExample:\n    ```python\n    conversation_response = ConversationResponse(\n        conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n        chat_history=[\n            {\n                \"messages\": [\n                    {\"content\": \"Hello\", \"type\": \"user\"},\n                    {\"content\": \"Hi there!\", \"type\": \"assistant\"}\n                ],\n                \"started_at\": \"2024-01-01T00:01:00Z\",\n                \"completed_at\": \"2024-01-01T00:01:05Z\"\n            }\n        ]\n    )\n    ```",
+                "description": "Model representing a response for retrieving a conversation.\n\nAttributes:\n    conversation_id: The conversation ID (UUID).\n    chat_history: The simplified chat history as a list of conversation turns.",
                 "examples": [
                     {
                         "chat_history": [
@@ -2360,26 +5209,17 @@
                     "conversation_id": {
                         "type": "string",
                         "title": "Conversation Id",
-                        "description": "The conversation ID (UUID) that was updated",
-                        "examples": [
-                            "123e4567-e89b-12d3-a456-426614174000"
-                        ]
+                        "description": "The conversation ID (UUID) that was updated"
                     },
                     "success": {
                         "type": "boolean",
                         "title": "Success",
-                        "description": "Whether the update was successful",
-                        "examples": [
-                            true
-                        ]
+                        "description": "Whether the update was successful"
                     },
                     "message": {
                         "type": "string",
                         "title": "Message",
-                        "description": "A message about the update result",
-                        "examples": [
-                            "Topic summary updated successfully"
-                        ]
+                        "description": "A message about the update result"
                     }
                 },
                 "type": "object",
@@ -2389,7 +5229,14 @@
                     "message"
                 ],
                 "title": "ConversationUpdateResponse",
-                "description": "Model representing a response for updating a conversation topic summary.\n\nAttributes:\n    conversation_id: The conversation ID (UUID) that was updated.\n    success: Whether the update was successful.\n    message: A message about the update result.\n\nExample:\n    ```python\n    update_response = ConversationUpdateResponse(\n        conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n        success=True,\n        message=\"Topic summary updated successfully\",\n    )\n    ```"
+                "description": "Model representing a response for updating a conversation topic summary.\n\nAttributes:\n    conversation_id: The conversation ID (UUID) that was updated.\n    success: Whether the update was successful.\n    message: A message about the update result.",
+                "examples": [
+                    {
+                        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                        "message": "Topic summary updated successfully",
+                        "success": true
+                    }
+                ]
             },
             "ConversationsListResponse": {
                 "properties": {
@@ -2406,7 +5253,7 @@
                     "conversations"
                 ],
                 "title": "ConversationsListResponse",
-                "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n    conversations: List of conversation details associated with the user.\n\nExample:\n    ```python\n    conversations_list = ConversationsListResponse(\n        conversations=[\n            ConversationDetails(\n                conversation_id=\"123e4567-e89b-12d3-a456-426614174000\",\n                created_at=\"2024-01-01T00:00:00Z\",\n                last_message_at=\"2024-01-01T00:05:00Z\",\n                message_count=5,\n                last_used_model=\"gemini/gemini-2.0-flash\",\n                last_used_provider=\"gemini\",\n                topic_summary=\"Openshift Microservices Deployment Strategies\",\n            ),\n            ConversationDetails(\n                conversation_id=\"456e7890-e12b-34d5-a678-901234567890\"\n                created_at=\"2024-01-01T01:00:00Z\",\n                message_count=2,\n                last_used_model=\"gemini/gemini-2.0-flash\",\n                last_used_provider=\"gemini\",\n                topic_summary=\"RHDH Purpose Summary\",\n            )\n        ]\n    )\n    ```",
+                "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n    conversations: List of conversation details associated with the user.",
                 "examples": [
                     {
                         "conversations": [
@@ -2446,7 +5293,18 @@
                     "conversations"
                 ],
                 "title": "ConversationsListResponseV2",
-                "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n    conversations: List of conversation data associated with the user."
+                "description": "Model representing a response for listing conversations of a user.\n\nAttributes:\n    conversations: List of conversation data associated with the user.",
+                "examples": [
+                    {
+                        "conversations": [
+                            {
+                                "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                                "last_message_timestamp": 1704067200.0,
+                                "topic_summary": "Openshift Microservices Deployment Strategies"
+                            }
+                        ]
+                    }
+                ]
             },
             "CustomProfile": {
                 "properties": {
@@ -2576,48 +5434,6 @@
                 "title": "DetailModel",
                 "description": "Nested detail model for error responses."
             },
-            "ErrorResponse": {
-                "properties": {
-                    "detail": {
-                        "additionalProperties": {
-                            "type": "string"
-                        },
-                        "type": "object",
-                        "title": "Detail",
-                        "description": "Error details",
-                        "examples": [
-                            {
-                                "cause": "Failed to handle request to https://bam-api.res.ibm.com/v2/text",
-                                "response": "Error while validation question"
-                            },
-                            {
-                                "cause": "Invalid conversation ID 1237-e89b-12d3-a456-426614174000",
-                                "response": "Error retrieving conversation history"
-                            }
-                        ]
-                    }
-                },
-                "type": "object",
-                "required": [
-                    "detail"
-                ],
-                "title": "ErrorResponse",
-                "description": "Model representing error response for query endpoint.",
-                "examples": [
-                    {
-                        "detail": {
-                            "cause": "Failed to handle request to https://bam-api.res.ibm.com/v2/text",
-                            "response": "Error while validation question"
-                        }
-                    },
-                    {
-                        "detail": {
-                            "cause": "Invalid conversation ID 1237-e89b-12d3-a456-426614174000",
-                            "response": "Error retrieving conversation history"
-                        }
-                    }
-                ]
-            },
             "FeedbackCategory": {
                 "type": "string",
                 "enum": [
@@ -2766,7 +5582,7 @@
                     "response"
                 ],
                 "title": "FeedbackResponse",
-                "description": "Model representing a response to a feedback request.\n\nAttributes:\n    response: The response of the feedback request.\n\nExample:\n    ```python\n    feedback_response = FeedbackResponse(response=\"feedback received\")\n    ```",
+                "description": "Model representing a response to a feedback request.\n\nAttributes:\n    response: The response of the feedback request.",
                 "examples": [
                     {
                         "response": "feedback received"
@@ -2804,7 +5620,7 @@
                     "status"
                 ],
                 "title": "FeedbackStatusUpdateResponse",
-                "description": "Model representing a response to a feedback status update request.\n\nAttributes:\n    status: The previous and current status of the service and who updated it.\n\nExample:\n    ```python\n    status_response = StatusResponse(\n        status={\n            \"previous_status\": true,\n            \"updated_status\": false,\n            \"updated_by\": \"user/test\",\n            \"timestamp\": \"2023-03-15 12:34:56\"\n        },\n    )\n    ```",
+                "description": "Model representing a response to a feedback status update request.\n\nAttributes:\n    status: The previous and current status of the service and who updated it.",
                 "examples": [
                     {
                         "status": {
@@ -2818,22 +5634,56 @@
             },
             "ForbiddenResponse": {
                 "properties": {
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
+                    },
                     "detail": {
                         "$ref": "#/components/schemas/DetailModel"
                     }
                 },
                 "type": "object",
                 "required": [
+                    "status_code",
                     "detail"
                 ],
                 "title": "ForbiddenResponse",
-                "description": "403 Forbidden - User does not have access to this resource.",
+                "description": "403 Forbidden. Access denied.",
                 "examples": [
                     {
                         "detail": {
-                            "cause": "User 42 is not allowed to access conversation with ID 123e4567-e89b-12d3-a456-426614174000.",
-                            "response": "Access denied"
-                        }
+                            "cause": "User 6789 does not have permission to read conversation with ID 123e4567-e89b-12d3-a456-426614174000",
+                            "response": "User does not have permission to perform this action"
+                        },
+                        "label": "conversation read"
+                    },
+                    {
+                        "detail": {
+                            "cause": "User 6789 does not have permission to delete conversation with ID 123e4567-e89b-12d3-a456-426614174000",
+                            "response": "User does not have permission to perform this action"
+                        },
+                        "label": "conversation delete"
+                    },
+                    {
+                        "detail": {
+                            "cause": "User 6789 is not authorized to access this endpoint.",
+                            "response": "User does not have permission to access this endpoint"
+                        },
+                        "label": "endpoint"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Storing feedback is disabled.",
+                            "response": "Storing feedback is disabled."
+                        },
+                        "label": "feedback"
+                    },
+                    {
+                        "detail": {
+                            "cause": "User lacks model_override permission required to override model/provider.",
+                            "response": "This instance does not permit overriding model/provider in the query request (missing permission: MODEL_OVERRIDE). Please remove the model and provider fields from your request."
+                        },
+                        "label": "model override"
                     }
                 ]
             },
@@ -2936,12 +5786,74 @@
                     "llama_stack_version"
                 ],
                 "title": "InfoResponse",
-                "description": "Model representing a response to an info request.\n\nAttributes:\n    name: Service name.\n    service_version: Service version.\n    llama_stack_version: Llama Stack version.\n\nExample:\n    ```python\n    info_response = InfoResponse(\n        name=\"Lightspeed Stack\",\n        service_version=\"1.0.0\",\n        llama_stack_version=\"0.2.22\",\n    )\n    ```",
+                "description": "Model representing a response to an info request.\n\nAttributes:\n    name: Service name.\n    service_version: Service version.\n    llama_stack_version: Llama Stack version.",
+                "examples": [
+                    {
+                        "llama_stack_version": "1.0.0",
+                        "name": "Lightspeed Stack",
+                        "service_version": "1.0.0"
+                    }
+                ]
+            },
+            "InternalServerErrorResponse": {
+                "properties": {
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
+                    },
+                    "detail": {
+                        "$ref": "#/components/schemas/DetailModel"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "status_code",
+                    "detail"
+                ],
+                "title": "InternalServerErrorResponse",
+                "description": "500 Internal Server Error.",
                 "examples": [
                     {
-                        "llama_stack_version": "1.0.0",
-                        "name": "Lightspeed Stack",
-                        "service_version": "1.0.0"
+                        "detail": {
+                            "cause": "An unexpected error occurred while processing the request.",
+                            "response": "Internal server error"
+                        },
+                        "label": "internal"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Lightspeed Stack configuration has not been initialized.",
+                            "response": "Configuration is not loaded"
+                        },
+                        "label": "configuration"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Failed to store feedback at directory: /path/example",
+                            "response": "Failed to store feedback"
+                        },
+                        "label": "feedback storage"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Failed to call backend API",
+                            "response": "Error while processing query"
+                        },
+                        "label": "query"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Conversation cache is not configured or unavailable.",
+                            "response": "Conversation cache not configured"
+                        },
+                        "label": "conversation cache"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Failed to query the database",
+                            "response": "Database query failed"
+                        },
+                        "label": "database"
                     }
                 ]
             },
@@ -3054,7 +5966,7 @@
                     "alive"
                 ],
                 "title": "LivenessResponse",
-                "description": "Model representing a response to a liveness request.\n\nAttributes:\n    alive: If app is alive.\n\nExample:\n    ```python\n    liveness_response = LivenessResponse(alive=True)\n    ```",
+                "description": "Model representing a response to a liveness request.\n\nAttributes:\n    alive: If app is alive.",
                 "examples": [
                     {
                         "alive": true
@@ -3149,8 +6061,18 @@
                         },
                         "type": "array",
                         "title": "Models",
-                        "description": "List of models available",
-                        "examples": [
+                        "description": "List of models available"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "models"
+                ],
+                "title": "ModelsResponse",
+                "description": "Model representing a response to models request.",
+                "examples": [
+                    {
+                        "models": [
                             {
                                 "api_model_type": "llm",
                                 "identifier": "openai/gpt-4-turbo",
@@ -3159,34 +6081,24 @@
                                 "provider_id": "openai",
                                 "provider_resource_id": "gpt-4-turbo",
                                 "type": "model"
-                            },
-                            {
-                                "api_model_type": "llm",
-                                "identifier": "openai/gpt-3.5-turbo-0125",
-                                "metadata": {},
-                                "model_type": "llm",
-                                "provider_id": "openai",
-                                "provider_resource_id": "gpt-3.5-turbo-0125",
-                                "type": "model"
                             }
                         ]
                     }
-                },
-                "type": "object",
-                "required": [
-                    "models"
-                ],
-                "title": "ModelsResponse",
-                "description": "Model representing a response to models request."
+                ]
             },
             "NotFoundResponse": {
                 "properties": {
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
+                    },
                     "detail": {
                         "$ref": "#/components/schemas/DetailModel"
                     }
                 },
                 "type": "object",
                 "required": [
+                    "status_code",
                     "detail"
                 ],
                 "title": "NotFoundResponse",
@@ -3194,9 +6106,31 @@
                 "examples": [
                     {
                         "detail": {
-                            "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist.",
+                            "cause": "Conversation with ID 123e4567-e89b-12d3-a456-426614174000 does not exist",
                             "response": "Conversation not found"
-                        }
+                        },
+                        "label": "conversation"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Provider with ID openai does not exist",
+                            "response": "Provider not found"
+                        },
+                        "label": "provider"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Model with ID gpt-4-turbo is not configured",
+                            "response": "Model not found"
+                        },
+                        "label": "model"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Rag with ID vs_7b52a8cf-0fa3-489c-beab-27e061d102f3 does not exist",
+                            "response": "Rag not found"
+                        },
+                        "label": "rag"
                     }
                 ]
             },
@@ -3319,8 +6253,7 @@
                     "api": {
                         "type": "string",
                         "title": "Api",
-                        "description": "The API this provider implements",
-                        "example": "inference"
+                        "description": "The API this provider implements"
                     },
                     "config": {
                         "additionalProperties": {
@@ -3346,10 +6279,7 @@
                         },
                         "type": "object",
                         "title": "Config",
-                        "description": "Provider configuration parameters",
-                        "example": {
-                            "api_key": "********"
-                        }
+                        "description": "Provider configuration parameters"
                     },
                     "health": {
                         "additionalProperties": {
@@ -3375,26 +6305,19 @@
                         },
                         "type": "object",
                         "title": "Health",
-                        "description": "Current health status of the provider",
-                        "example": {
-                            "message": "Healthy",
-                            "status": "OK"
-                        }
+                        "description": "Current health status of the provider"
                     },
                     "provider_id": {
                         "type": "string",
                         "title": "Provider Id",
-                        "description": "Unique provider identifier",
-                        "example": "openai"
+                        "description": "Unique provider identifier"
                     },
                     "provider_type": {
                         "type": "string",
                         "title": "Provider Type",
-                        "description": "Provider implementation type",
-                        "example": "remote::openai"
+                        "description": "Provider implementation type"
                     }
                 },
-                "additionalProperties": true,
                 "type": "object",
                 "required": [
                     "api",
@@ -3404,7 +6327,21 @@
                     "provider_type"
                 ],
                 "title": "ProviderResponse",
-                "description": "Model representing a response to get specific provider request."
+                "description": "Model representing a response to get specific provider request.",
+                "examples": [
+                    {
+                        "api": "inference",
+                        "config": {
+                            "api_key": "********"
+                        },
+                        "health": {
+                            "message": "Healthy",
+                            "status": "OK"
+                        },
+                        "provider_id": "openai",
+                        "provider_type": "remote::openai"
+                    }
+                ]
             },
             "ProvidersListResponse": {
                 "properties": {
@@ -3418,37 +6355,7 @@
                         },
                         "type": "object",
                         "title": "Providers",
-                        "description": "List of available API types and their corresponding providers",
-                        "examples": [
-                            {
-                                "agents": [
-                                    {
-                                        "provider_id": "meta-reference",
-                                        "provider_type": "inline::meta-reference"
-                                    }
-                                ],
-                                "datasetio": [
-                                    {
-                                        "provider_id": "huggingface",
-                                        "provider_type": "remote::huggingface"
-                                    },
-                                    {
-                                        "provider_id": "localfs",
-                                        "provider_type": "inline::localfs"
-                                    }
-                                ],
-                                "inference": [
-                                    {
-                                        "provider_id": "sentence-transformers",
-                                        "provider_type": "inline::sentence-transformers"
-                                    },
-                                    {
-                                        "provider_id": "openai",
-                                        "provider_type": "remote::openai"
-                                    }
-                                ]
-                            }
-                        ]
+                        "description": "List of available API types and their corresponding providers"
                     }
                 },
                 "type": "object",
@@ -3456,7 +6363,29 @@
                     "providers"
                 ],
                 "title": "ProvidersListResponse",
-                "description": "Model representing a response to providers request."
+                "description": "Model representing a response to providers request.",
+                "examples": [
+                    {
+                        "providers": {
+                            "agents": [
+                                {
+                                    "provider_id": "meta-reference",
+                                    "provider_type": "inline::meta-reference"
+                                }
+                            ],
+                            "inference": [
+                                {
+                                    "provider_id": "sentence-transformers",
+                                    "provider_type": "inline::sentence-transformers"
+                                },
+                                {
+                                    "provider_id": "openai",
+                                    "provider_type": "remote::openai"
+                                }
+                            ]
+                        }
+                    }
+                ]
             },
             "QueryRequest": {
                 "properties": {
@@ -3579,6 +6508,23 @@
                             false
                         ]
                     },
+                    "generate_topic_summary": {
+                        "anyOf": [
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ],
+                        "title": "Generate Topic Summary",
+                        "description": "Whether to generate topic summary for new conversations",
+                        "default": true,
+                        "examples": [
+                            true,
+                            false
+                        ]
+                    },
                     "media_type": {
                         "anyOf": [
                             {
@@ -3602,7 +6548,7 @@
                     "query"
                 ],
                 "title": "QueryRequest",
-                "description": "Model representing a request for the LLM (Language Model).\n\nAttributes:\n    query: The query string.\n    conversation_id: The optional conversation ID (UUID).\n    provider: The optional provider.\n    model: The optional model.\n    system_prompt: The optional system prompt.\n    attachments: The optional attachments.\n    no_tools: Whether to bypass all tools and MCP servers (default: False).\n    media_type: The optional media type for response format (application/json or text/plain).\n\nExample:\n    ```python\n    query_request = QueryRequest(query=\"Tell me about Kubernetes\")\n    ```",
+                "description": "Model representing a request for the LLM (Language Model).\n\nAttributes:\n    query: The query string.\n    conversation_id: The optional conversation ID (UUID).\n    provider: The optional provider.\n    model: The optional model.\n    system_prompt: The optional system prompt.\n    attachments: The optional attachments.\n    no_tools: Whether to bypass all tools and MCP servers (default: False).\n    generate_topic_summary: Whether to generate topic summary for new conversations.\n    media_type: The optional media type for response format (application/json or text/plain).\n\nExample:\n    ```python\n    query_request = QueryRequest(query=\"Tell me about Kubernetes\")\n    ```",
                 "examples": [
                     {
                         "attachments": [
@@ -3623,6 +6569,7 @@
                             }
                         ],
                         "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
+                        "generate_topic_summary": true,
                         "model": "model-name",
                         "no_tools": false,
                         "provider": "openai",
@@ -3789,58 +6736,70 @@
             },
             "QuotaExceededResponse": {
                 "properties": {
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
+                    },
                     "detail": {
                         "$ref": "#/components/schemas/DetailModel"
                     }
                 },
                 "type": "object",
                 "required": [
+                    "status_code",
                     "detail"
                 ],
                 "title": "QuotaExceededResponse",
-                "description": "429 Too Many Requests - LLM quota exceeded.",
+                "description": "429 Too Many Requests - Quota limit exceeded.",
                 "examples": [
+                    {
+                        "detail": {
+                            "cause": "The token quota for model gpt-4-turbo has been exceeded.",
+                            "response": "The model quota has been exceeded"
+                        },
+                        "label": "model"
+                    },
                     {
                         "detail": {
                             "cause": "User 123 has no available tokens.",
                             "response": "The quota has been exceeded"
-                        }
+                        },
+                        "label": "user none"
                     },
                     {
                         "detail": {
                             "cause": "Cluster has no available tokens.",
                             "response": "The quota has been exceeded"
-                        }
+                        },
+                        "label": "cluster none"
                     },
                     {
                         "detail": {
                             "cause": "Unknown subject 999 has no available tokens.",
                             "response": "The quota has been exceeded"
-                        }
+                        },
+                        "label": "subject none"
                     },
                     {
                         "detail": {
                             "cause": "User 123 has 5 tokens, but 10 tokens are needed.",
                             "response": "The quota has been exceeded"
-                        }
+                        },
+                        "label": "user insufficient"
                     },
                     {
                         "detail": {
                             "cause": "Cluster has 500 tokens, but 900 tokens are needed.",
                             "response": "The quota has been exceeded"
-                        }
+                        },
+                        "label": "cluster insufficient"
                     },
                     {
                         "detail": {
                             "cause": "Unknown subject 999 has 3 tokens, but 6 tokens are needed.",
                             "response": "The quota has been exceeded"
-                        }
-                    },
-                    {
-                        "detail": {
-                            "cause": "The token quota for model gpt-4-turbo has been exceeded.",
-                            "response": "The model quota has been exceeded"
-                        }
+                        },
+                        "label": "subject insufficient"
                     }
                 ]
             },
@@ -4077,7 +7036,18 @@
                     "status"
                 ],
                 "title": "RAGInfoResponse",
-                "description": "Model representing a response with information about RAG DB."
+                "description": "Model representing a response with information about RAG DB.",
+                "examples": [
+                    {
+                        "created_at": 1763391371,
+                        "id": "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
+                        "last_active_at": 1763391371,
+                        "name": "Faiss Store with Knowledge base",
+                        "object": "vector_store",
+                        "status": "completed",
+                        "usage_bytes": 1024000
+                    }
+                ]
             },
             "RAGListResponse": {
                 "properties": {
@@ -4099,7 +7069,16 @@
                     "rags"
                 ],
                 "title": "RAGListResponse",
-                "description": "Model representing a response to list RAGs request."
+                "description": "Model representing a response to list RAGs request.",
+                "examples": [
+                    {
+                        "rags": [
+                            "vs_00000000-cafe-babe-0000-000000000000",
+                            "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
+                            "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3"
+                        ]
+                    }
+                ]
             },
             "RHIdentityConfiguration": {
                 "properties": {
@@ -4159,7 +7138,7 @@
                     "providers"
                 ],
                 "title": "ReadinessResponse",
-                "description": "Model representing response to a readiness request.\n\nAttributes:\n    ready: If service is ready.\n    reason: The reason for the readiness.\n    providers: List of unhealthy providers in case of readiness failure.\n\nExample:\n    ```python\n    readiness_response = ReadinessResponse(\n        ready=False,\n        reason=\"Service is not ready\",\n        providers=[\n            ProviderHealthStatus(\n                provider_id=\"ollama\",\n                status=\"unhealthy\",\n                message=\"Server is unavailable\"\n            )\n        ]\n    )\n    ```",
+                "description": "Model representing response to a readiness request.\n\nAttributes:\n    ready: If service is ready.\n    reason: The reason for the readiness.\n    providers: List of unhealthy providers in case of readiness failure.",
                 "examples": [
                     {
                         "providers": [],
@@ -4264,22 +7243,28 @@
             },
             "ServiceUnavailableResponse": {
                 "properties": {
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
+                    },
                     "detail": {
                         "$ref": "#/components/schemas/DetailModel"
                     }
                 },
                 "type": "object",
                 "required": [
+                    "status_code",
                     "detail"
                 ],
                 "title": "ServiceUnavailableResponse",
-                "description": "503 Backend Unavailable - Unable to reach backend service.",
+                "description": "503 Backend Unavailable.",
                 "examples": [
                     {
                         "detail": {
-                            "cause": "Connection error while trying to reach Llama Stack API.",
+                            "cause": "Connection error while trying to reach backend service.",
                             "response": "Unable to connect to Llama Stack"
-                        }
+                        },
+                        "label": "llama stack"
                     }
                 ]
             },
@@ -4292,8 +7277,18 @@
                         },
                         "type": "array",
                         "title": "Shields",
-                        "description": "List of shields available",
-                        "examples": [
+                        "description": "List of shields available"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "shields"
+                ],
+                "title": "ShieldsResponse",
+                "description": "Model representing a response to shields request.",
+                "examples": [
+                    {
+                        "shields": [
                             {
                                 "identifier": "lightspeed_question_validity-shield",
                                 "params": {},
@@ -4303,13 +7298,7 @@
                             }
                         ]
                     }
-                },
-                "type": "object",
-                "required": [
-                    "shields"
-                ],
-                "title": "ShieldsResponse",
-                "description": "Model representing a response to shields request."
+                ]
             },
             "StatusResponse": {
                 "properties": {
@@ -4339,7 +7328,7 @@
                     "status"
                 ],
                 "title": "StatusResponse",
-                "description": "Model representing a response to a status request.\n\nAttributes:\n    functionality: The functionality of the service.\n    status: The status of the service.\n\nExample:\n    ```python\n    status_response = StatusResponse(\n        functionality=\"feedback\",\n        status={\"enabled\": True},\n    )\n    ```",
+                "description": "Model representing a response to a status request.\n\nAttributes:\n    functionality: The functionality of the service.\n    status: The status of the service.",
                 "examples": [
                     {
                         "functionality": "feedback",
@@ -4437,27 +7426,7 @@
                         },
                         "type": "array",
                         "title": "Tools",
-                        "description": "List of tools available from all configured MCP servers and built-in toolgroups",
-                        "examples": [
-                            [
-                                {
-                                    "description": "Read contents of a file from the filesystem",
-                                    "identifier": "filesystem_read",
-                                    "parameters": [
-                                        {
-                                            "description": "Path to the file to read",
-                                            "name": "path",
-                                            "parameter_type": "string",
-                                            "required": true
-                                        }
-                                    ],
-                                    "provider_id": "model-context-protocol",
-                                    "server_source": "http://localhost:3000",
-                                    "toolgroup_id": "filesystem-tools",
-                                    "type": "tool"
-                                }
-                            ]
-                        ]
+                        "description": "List of tools available from all configured MCP servers and built-in toolgroups"
                     }
                 },
                 "type": "object",
@@ -4465,16 +7434,43 @@
                     "tools"
                 ],
                 "title": "ToolsResponse",
-                "description": "Model representing a response to tools request."
+                "description": "Model representing a response to tools request.",
+                "examples": [
+                    {
+                        "tools": [
+                            {
+                                "description": "Read contents of a file from the filesystem",
+                                "identifier": "filesystem_read",
+                                "parameters": [
+                                    {
+                                        "description": "Path to the file to read",
+                                        "name": "path",
+                                        "parameter_type": "string",
+                                        "required": true
+                                    }
+                                ],
+                                "provider_id": "model-context-protocol",
+                                "server_source": "http://localhost:3000",
+                                "toolgroup_id": "filesystem-tools",
+                                "type": "tool"
+                            }
+                        ]
+                    }
+                ]
             },
             "UnauthorizedResponse": {
                 "properties": {
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
+                    },
                     "detail": {
                         "$ref": "#/components/schemas/DetailModel"
                     }
                 },
                 "type": "object",
                 "required": [
+                    "status_code",
                     "detail"
                 ],
                 "title": "UnauthorizedResponse",
@@ -4482,9 +7478,100 @@
                 "examples": [
                     {
                         "detail": {
-                            "cause": "Missing or invalid credentials provided by client",
-                            "response": "Unauthorized"
-                        }
+                            "cause": "No Authorization header found",
+                            "response": "Missing or invalid credentials provided by client"
+                        },
+                        "label": "missing header"
+                    },
+                    {
+                        "detail": {
+                            "cause": "No token found in Authorization header",
+                            "response": "Missing or invalid credentials provided by client"
+                        },
+                        "label": "missing token"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Token has expired",
+                            "response": "Missing or invalid credentials provided by client"
+                        },
+                        "label": "expired token"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Invalid token signature",
+                            "response": "Missing or invalid credentials provided by client"
+                        },
+                        "label": "invalid signature"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Token signed by unknown key",
+                            "response": "Missing or invalid credentials provided by client"
+                        },
+                        "label": "invalid key"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Token missing claim: user_id",
+                            "response": "Missing or invalid credentials provided by client"
+                        },
+                        "label": "missing claim"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Invalid or expired Kubernetes token",
+                            "response": "Missing or invalid credentials provided by client"
+                        },
+                        "label": "invalid k8s token"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Authentication key server returned invalid data",
+                            "response": "Missing or invalid credentials provided by client"
+                        },
+                        "label": "invalid jwk token"
+                    }
+                ]
+            },
+            "UnprocessableEntityResponse": {
+                "properties": {
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
+                    },
+                    "detail": {
+                        "$ref": "#/components/schemas/DetailModel"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "status_code",
+                    "detail"
+                ],
+                "title": "UnprocessableEntityResponse",
+                "description": "422 Unprocessable Entity - Request validation failed.",
+                "examples": [
+                    {
+                        "detail": {
+                            "cause": "Invalid request format. The request body could not be parsed.",
+                            "response": "Invalid request format"
+                        },
+                        "label": "invalid format"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Missing required attributes: ['query', 'model', 'provider']",
+                            "response": "Missing required attributes"
+                        },
+                        "label": "missing attributes"
+                    },
+                    {
+                        "detail": {
+                            "cause": "Invalid attatchment type: must be one of ['text/plain', 'application/json', 'application/yaml', 'application/xml']",
+                            "response": "Invalid attribute value"
+                        },
+                        "label": "invalid value"
                     }
                 ]
             },
diff --git a/src/app/endpoints/authorized.py b/src/app/endpoints/authorized.py
index b108b205..3a24382b 100644
--- a/src/app/endpoints/authorized.py
+++ b/src/app/endpoints/authorized.py
@@ -12,26 +12,12 @@
 logger = logging.getLogger(__name__)
 router = APIRouter(tags=["authorized"])
 
-
 authorized_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "description": "The user is logged-in and authorized to access OLS",
-        "model": AuthorizedResponse,
-    },
-    400: {
-        "description": "Missing or invalid credentials provided by client for the noop and "
-        "noop-with-token authentication modules",
-        "model": UnauthorizedResponse,
-    },
-    401: {
-        "description": "Missing or invalid credentials provided by client for the "
-        "k8s authentication module",
-        "model": UnauthorizedResponse,
-    },
-    403: {
-        "description": "User is not authorized",
-        "model": ForbiddenResponse,
-    },
+    200: AuthorizedResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
 }
 
 
diff --git a/src/app/endpoints/config.py b/src/app/endpoints/config.py
index a68f8707..9c464ff2 100644
--- a/src/app/endpoints/config.py
+++ b/src/app/endpoints/config.py
@@ -9,7 +9,13 @@
 from authentication.interface import AuthTuple
 from authorization.middleware import authorize
 from configuration import configuration
-from models.config import Action, Configuration
+from models.config import Action
+from models.responses import (
+    ConfigurationResponse,
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    UnauthorizedResponse,
+)
 from utils.endpoints import check_configuration_loaded
 
 logger = logging.getLogger(__name__)
@@ -17,44 +23,12 @@
 
 
 get_config_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "name": "foo bar baz",
-        "service": {
-            "host": "localhost",
-            "port": 8080,
-            "auth_enabled": False,
-            "workers": 1,
-            "color_log": True,
-            "access_log": True,
-            "tls_config": {
-                "tls_certificate_path": "config/certificate.crt",
-                "tls_key_path": "config/private.key",
-                "tls_key_password": None,
-            },
-        },
-        "llama_stack": {
-            "url": "http://localhost:8321",
-            "api_key": "*****",
-            "use_as_library_client": False,
-            "library_client_config_path": None,
-        },
-        "user_data_collection": {
-            "feedback_enabled": True,
-            "feedback_storage": "/tmp/data/feedback",
-            "transcripts_enabled": False,
-            "transcripts_storage": None,
-        },
-        "mcp_servers": [
-            {"name": "server1", "provider_id": "provider1", "url": "http://url.com:1"},
-            {"name": "server2", "provider_id": "provider2", "url": "http://url.com:2"},
-            {"name": "server3", "provider_id": "provider3", "url": "http://url.com:3"},
-        ],
-    },
-    503: {
-        "detail": {
-            "response": "Configuration is not loaded",
-        }
-    },
+    200: ConfigurationResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
 }
 
 
@@ -63,7 +37,7 @@
 async def config_endpoint_handler(
     auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
     request: Request,
-) -> Configuration:
+) -> ConfigurationResponse:
     """
     Handle requests to the /config endpoint.
 
@@ -71,7 +45,7 @@ async def config_endpoint_handler(
     current service configuration.
 
     Returns:
-        Configuration: The loaded service configuration object.
+        ConfigurationResponse: The loaded service configuration response.
     """
     # Used only for authorization
     _ = auth
@@ -82,4 +56,4 @@ async def config_endpoint_handler(
     # ensure that configuration is loaded
     check_configuration_loaded(configuration)
 
-    return configuration.configuration
+    return ConfigurationResponse(configuration=configuration.configuration)
diff --git a/src/app/endpoints/conversations.py b/src/app/endpoints/conversations.py
index 0b894bef..1e989137 100644
--- a/src/app/endpoints/conversations.py
+++ b/src/app/endpoints/conversations.py
@@ -3,8 +3,9 @@
 import logging
 from typing import Any
 
-from fastapi import APIRouter, Depends, HTTPException, Request, status
+from fastapi import APIRouter, Depends, HTTPException, Request
 from llama_stack_client import APIConnectionError, NotFoundError
+from sqlalchemy.exc import SQLAlchemyError
 
 from app.database import get_session
 from authentication import get_auth_dependency
@@ -14,20 +15,21 @@
 from models.config import Action
 from models.database.conversations import UserConversation
 from models.responses import (
+    BadRequestResponse,
     ConversationDeleteResponse,
     ConversationDetails,
     ConversationResponse,
     ConversationsListResponse,
-    UnauthorizedResponse,
+    ForbiddenResponse,
+    InternalServerErrorResponse,
     NotFoundResponse,
-    AccessDeniedResponse,
-    BadRequestResponse,
     ServiceUnavailableResponse,
+    UnauthorizedResponse,
 )
 from utils.endpoints import (
+    can_access_conversation,
     check_configuration_loaded,
     delete_conversation,
-    can_access_conversation,
     retrieve_conversation,
 )
 from utils.suid import check_suid
@@ -35,73 +37,47 @@
 logger = logging.getLogger("app.endpoints.handlers")
 router = APIRouter(tags=["conversations"])
 
-conversation_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "model": ConversationResponse,
-        "description": "Conversation retrieved successfully",
-    },
-    400: {
-        "model": BadRequestResponse,
-        "description": "Invalid request",
-    },
-    401: {
-        "model": UnauthorizedResponse,
-        "description": "Unauthorized: Invalid or missing Bearer token",
-    },
-    403: {
-        "model": AccessDeniedResponse,
-        "description": "Client does not have permission to access conversation",
-    },
-    404: {
-        "model": NotFoundResponse,
-        "description": "Conversation not found",
-    },
-    503: {
-        "model": ServiceUnavailableResponse,
-        "description": "Service unavailable",
-    },
+
+conversation_get_responses: dict[int | str, dict[str, Any]] = {
+    200: ConversationResponse.openapi_response(),
+    400: BadRequestResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["conversation read", "endpoint"]),
+    404: NotFoundResponse.openapi_response(examples=["conversation"]),
+    500: InternalServerErrorResponse.openapi_response(
+        examples=["database", "configuration"]
+    ),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 conversation_delete_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "model": ConversationDeleteResponse,
-        "description": "Conversation deleted successfully",
-    },
-    400: {
-        "model": BadRequestResponse,
-        "description": "Invalid request",
-    },
-    401: {
-        "model": UnauthorizedResponse,
-        "description": "Unauthorized: Invalid or missing Bearer token",
-    },
-    403: {
-        "model": AccessDeniedResponse,
-        "description": "Client does not have permission to access conversation",
-    },
-    404: {
-        "model": NotFoundResponse,
-        "description": "Conversation not found",
-    },
-    503: {
-        "model": ServiceUnavailableResponse,
-        "description": "Service unavailable",
-    },
+    200: ConversationDeleteResponse.openapi_response(),
+    400: BadRequestResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(
+        examples=["conversation delete", "endpoint"]
+    ),
+    404: NotFoundResponse.openapi_response(examples=["conversation"]),
+    500: InternalServerErrorResponse.openapi_response(
+        examples=["database", "configuration"]
+    ),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 conversations_list_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "model": ConversationsListResponse,
-        "description": "List of conversations retrieved successfully",
-    },
-    401: {
-        "model": UnauthorizedResponse,
-        "description": "Unauthorized: Invalid or missing Bearer token",
-    },
-    503: {
-        "model": ServiceUnavailableResponse,
-        "description": "Service unavailable",
-    },
+    200: ConversationsListResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(
+        examples=["database", "configuration"]
+    ),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 
@@ -196,20 +172,15 @@ async def get_conversations_list_endpoint_handler(
 
             return ConversationsListResponse(conversations=conversations)
 
-        except Exception as e:
+        except SQLAlchemyError as e:
             logger.exception(
                 "Error retrieving conversations for user %s: %s", user_id, e
             )
-            raise HTTPException(
-                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                detail={
-                    "response": "Unknown error",
-                    "cause": f"Unknown error while getting conversations for user {user_id}",
-                },
-            ) from e
+            response = InternalServerErrorResponse.database_error()
+            raise HTTPException(**response.model_dump()) from e
 
 
-@router.get("/conversations/{conversation_id}", responses=conversation_responses)
+@router.get("/conversations/{conversation_id}", responses=conversation_get_responses)
 @authorize(Action.GET_CONVERSATION)
 async def get_conversation_endpoint_handler(
     request: Request,
@@ -238,12 +209,10 @@ async def get_conversation_endpoint_handler(
     # Validate conversation ID format
     if not check_suid(conversation_id):
         logger.error("Invalid conversation ID format: %s", conversation_id)
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail=BadRequestResponse(
-                resource="conversation", resource_id=conversation_id
-            ).dump_detail(),
+        response = BadRequestResponse(
+            resource="conversation", resource_id=conversation_id
         )
+        raise HTTPException(**response.model_dump())
 
     user_id = auth[0]
     if not can_access_conversation(
@@ -258,25 +227,18 @@ async def get_conversation_endpoint_handler(
             user_id,
             conversation_id,
         )
-        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
-            detail=AccessDeniedResponse(
-                user_id=user_id,
-                resource="conversation",
-                resource_id=conversation_id,
-                action="read",
-            ).dump_detail(),
+        response = ForbiddenResponse.conversation(
+            action="read", resource_id=conversation_id, user_id=user_id
         )
+        raise HTTPException(**response.model_dump())
 
     # If reached this, user is authorized to retreive this conversation
     conversation = retrieve_conversation(conversation_id)
     if conversation is None:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail=NotFoundResponse(
-                resource="conversation", resource_id=conversation_id
-            ).dump_detail(),
+        response = NotFoundResponse(
+            resource="conversation", resource_id=conversation_id
         )
+        raise HTTPException(**response.model_dump())
 
     agent_id = conversation_id
     logger.info("Retrieving conversation %s", conversation_id)
@@ -287,12 +249,10 @@ async def get_conversation_endpoint_handler(
         agent_sessions = (await client.agents.session.list(agent_id=agent_id)).data
         if not agent_sessions:
             logger.error("No sessions found for conversation %s", conversation_id)
-            raise HTTPException(
-                status_code=status.HTTP_404_NOT_FOUND,
-                detail=NotFoundResponse(
-                    resource="conversation", resource_id=conversation_id
-                ).dump_detail(),
+            response = NotFoundResponse(
+                resource="conversation", resource_id=conversation_id
             )
+            raise HTTPException(**response.model_dump())
         session_id = str(agent_sessions[0].get("session_id"))
 
         session_response = await client.agents.session.retrieve(
@@ -312,35 +272,20 @@ async def get_conversation_endpoint_handler(
 
     except APIConnectionError as e:
         logger.error("Unable to connect to Llama Stack: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail=ServiceUnavailableResponse(
-                backend_name="Llama Stack", cause=str(e)
-            ).dump_detail(),
-        ) from e
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
 
     except NotFoundError as e:
         logger.error("Conversation not found: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail=NotFoundResponse(
-                resource="conversation", resource_id=conversation_id
-            ).dump_detail(),
-        ) from e
-
-    except HTTPException:
-        raise
+        response = NotFoundResponse(
+            resource="conversation", resource_id=conversation_id
+        )
+        raise HTTPException(**response.model_dump()) from e
 
-    except Exception as e:
-        # Handle case where session doesn't exist or other errors
+    except SQLAlchemyError as e:
         logger.exception("Error retrieving conversation %s: %s", conversation_id, e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unknown error",
-                "cause": f"Unknown error while getting conversation {conversation_id} : {str(e)}",
-            },
-        ) from e
+        response = InternalServerErrorResponse.database_error()
+        raise HTTPException(**response.model_dump()) from e
 
 
 @router.delete(
@@ -368,12 +313,10 @@ async def delete_conversation_endpoint_handler(
     # Validate conversation ID format
     if not check_suid(conversation_id):
         logger.error("Invalid conversation ID format: %s", conversation_id)
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail=BadRequestResponse(
-                resource="conversation", resource_id=conversation_id
-            ).dump_detail(),
+        response = BadRequestResponse(
+            resource="conversation", resource_id=conversation_id
         )
+        raise HTTPException(**response.model_dump())
 
     user_id = auth[0]
     if not can_access_conversation(
@@ -388,25 +331,18 @@ async def delete_conversation_endpoint_handler(
             user_id,
             conversation_id,
         )
-        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
-            detail=AccessDeniedResponse(
-                user_id=user_id,
-                resource="conversation",
-                resource_id=conversation_id,
-                action="delete",
-            ).dump_detail(),
+        response = ForbiddenResponse.conversation(
+            action="delete", resource_id=conversation_id, user_id=user_id
         )
+        raise HTTPException(**response.model_dump())
 
     # If reached this, user is authorized to retreive this conversation
     conversation = retrieve_conversation(conversation_id)
     if conversation is None:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail=NotFoundResponse(
-                resource="conversation", resource_id=conversation_id
-            ).dump_detail(),
+        response = NotFoundResponse(
+            resource="conversation", resource_id=conversation_id
         )
+        raise HTTPException(**response.model_dump())
 
     agent_id = conversation_id
     logger.info("Deleting conversation %s", conversation_id)
@@ -421,9 +357,8 @@ async def delete_conversation_endpoint_handler(
             # If no sessions are found, do not raise an error, just return a success response
             logger.info("No sessions found for conversation %s", conversation_id)
             return ConversationDeleteResponse(
+                deleted=False,
                 conversation_id=conversation_id,
-                success=True,
-                response="Conversation deleted successfully",
             )
 
         session_id = str(agent_sessions[0].get("session_id"))
@@ -435,37 +370,21 @@ async def delete_conversation_endpoint_handler(
         delete_conversation(conversation_id=conversation_id)
 
         return ConversationDeleteResponse(
+            deleted=True,
             conversation_id=conversation_id,
-            success=True,
-            response="Conversation deleted successfully",
         )
 
     except APIConnectionError as e:
-        raise HTTPException(
-            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail=ServiceUnavailableResponse(
-                backend_name="Llama Stack", cause=str(e)
-            ).dump_detail(),
-        ) from e
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
 
     except NotFoundError as e:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail=NotFoundResponse(
-                resource="conversation", resource_id=conversation_id
-            ).dump_detail(),
-        ) from e
-
-    except HTTPException:
-        raise
+        response = NotFoundResponse(
+            resource="conversation", resource_id=conversation_id
+        )
+        raise HTTPException(**response.model_dump()) from e
 
-    except Exception as e:
-        # Handle case where session doesn't exist or other errors
+    except SQLAlchemyError as e:
         logger.exception("Error deleting conversation %s: %s", conversation_id, e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unknown error",
-                "cause": f"Unknown error while deleting conversation {conversation_id} : {str(e)}",
-            },
-        ) from e
+        response = InternalServerErrorResponse.database_error()
+        raise HTTPException(**response.model_dump()) from e
diff --git a/src/app/endpoints/conversations_v2.py b/src/app/endpoints/conversations_v2.py
index f03fe5dd..adb60822 100644
--- a/src/app/endpoints/conversations_v2.py
+++ b/src/app/endpoints/conversations_v2.py
@@ -3,7 +3,7 @@
 import logging
 from typing import Any
 
-from fastapi import APIRouter, Depends, HTTPException, Request, status
+from fastapi import APIRouter, Depends, HTTPException, Request
 
 from authentication import get_auth_dependency
 from authorization.middleware import authorize
@@ -12,10 +12,14 @@
 from models.config import Action
 from models.requests import ConversationUpdateRequest
 from models.responses import (
+    BadRequestResponse,
     ConversationDeleteResponse,
     ConversationResponse,
-    ConversationUpdateResponse,
     ConversationsListResponseV2,
+    ConversationUpdateResponse,
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    NotFoundResponse,
     UnauthorizedResponse,
 )
 from utils.endpoints import check_configuration_loaded
@@ -25,92 +29,54 @@
 router = APIRouter(tags=["conversations_v2"])
 
 
-conversation_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-        "chat_history": [
-            {
-                "messages": [
-                    {"content": "Hi", "type": "user"},
-                    {"content": "Hello!", "type": "assistant"},
-                ],
-                "started_at": "2024-01-01T00:00:00Z",
-                "completed_at": "2024-01-01T00:00:05Z",
-                "provider": "provider ID",
-                "model": "model ID",
-            }
-        ],
-    },
-    400: {
-        "description": "Missing or invalid credentials provided by client",
-        "model": UnauthorizedResponse,
-    },
-    401: {
-        "description": "Unauthorized: Invalid or missing Bearer token",
-        "model": UnauthorizedResponse,
-    },
-    404: {
-        "detail": {
-            "response": "Conversation not found",
-            "cause": "The specified conversation ID does not exist.",
-        }
-    },
+conversation_get_responses: dict[int | str, dict[str, Any]] = {
+    200: ConversationResponse.openapi_response(),
+    400: BadRequestResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    404: NotFoundResponse.openapi_response(examples=["conversation"]),
+    500: InternalServerErrorResponse.openapi_response(
+        examples=["conversation cache", "configuration"]
+    ),
 }
 
 conversation_delete_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-        "success": True,
-        "message": "Conversation deleted successfully",
-    },
-    400: {
-        "description": "Missing or invalid credentials provided by client",
-        "model": UnauthorizedResponse,
-    },
-    401: {
-        "description": "Unauthorized: Invalid or missing Bearer token",
-        "model": UnauthorizedResponse,
-    },
-    404: {
-        "detail": {
-            "response": "Conversation not found",
-            "cause": "The specified conversation ID does not exist.",
-        }
-    },
+    200: ConversationDeleteResponse.openapi_response(),
+    400: BadRequestResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    404: NotFoundResponse.openapi_response(examples=["conversation"]),
+    500: InternalServerErrorResponse.openapi_response(
+        examples=["conversation cache", "configuration"]
+    ),
 }
 
 conversations_list_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "conversations": [
-            {
-                "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-                "topic_summary": "This is a topic summary",
-                "last_message_timestamp": "2024-01-01T00:00:00Z",
-            }
-        ]
-    }
+    200: ConversationsListResponseV2.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(
+        examples=["conversation cache", "configuration"]
+    ),
 }
 
 conversation_update_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-        "success": True,
-        "message": "Topic summary updated successfully",
-    },
-    400: {
-        "description": "Missing or invalid credentials provided by client",
-        "model": UnauthorizedResponse,
-    },
-    401: {
-        "description": "Unauthorized: Invalid or missing Bearer token",
-        "model": UnauthorizedResponse,
-    },
-    404: {
-        "detail": {
-            "response": "Conversation not found",
-            "cause": "The specified conversation ID does not exist.",
-        }
-    },
+    200: ConversationUpdateResponse.openapi_response(),
+    400: BadRequestResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    404: NotFoundResponse.openapi_response(examples=["conversation"]),
+    500: InternalServerErrorResponse.openapi_response(
+        examples=["conversation cache", "configuration"]
+    ),
 }
 
 
@@ -129,15 +95,10 @@ async def get_conversations_list_endpoint_handler(
 
     skip_userid_check = auth[2]
 
-    if configuration.conversation_cache is None:
-        logger.warning("Conversation cache is not configured")
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail={
-                "response": "Conversation cache is not configured",
-                "cause": "Conversation cache is not configured",
-            },
-        )
+    if configuration.conversation_cache_configuration.type is None:
+        logger.warning("Converastion cache is not configured")
+        response = InternalServerErrorResponse.cache_unavailable()
+        raise HTTPException(**response.model_dump())
 
     conversations = configuration.conversation_cache.list(user_id, skip_userid_check)
     logger.info("Conversations for user %s: %s", user_id, len(conversations))
@@ -145,7 +106,7 @@ async def get_conversations_list_endpoint_handler(
     return ConversationsListResponseV2(conversations=conversations)
 
 
-@router.get("/conversations/{conversation_id}", responses=conversation_responses)
+@router.get("/conversations/{conversation_id}", responses=conversation_get_responses)
 @authorize(Action.GET_CONVERSATION)
 async def get_conversation_endpoint_handler(
     request: Request,  # pylint: disable=unused-argument
@@ -161,15 +122,10 @@ async def get_conversation_endpoint_handler(
 
     skip_userid_check = auth[2]
 
-    if configuration.conversation_cache is None:
-        logger.warning("Conversation cache is not configured")
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail={
-                "response": "Conversation cache is not configured",
-                "cause": "Conversation cache is not configured",
-            },
-        )
+    if configuration.conversation_cache_configuration.type is None:
+        logger.warning("Converastion cache is not configured")
+        response = InternalServerErrorResponse.cache_unavailable()
+        raise HTTPException(**response.model_dump())
 
     check_conversation_existence(user_id, conversation_id)
 
@@ -201,15 +157,10 @@ async def delete_conversation_endpoint_handler(
 
     skip_userid_check = auth[2]
 
-    if configuration.conversation_cache is None:
-        logger.warning("Conversation cache is not configured")
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail={
-                "response": "Conversation cache is not configured",
-                "cause": "Conversation cache is not configured",
-            },
-        )
+    if configuration.conversation_cache_configuration.type is None:
+        logger.warning("Converastion cache is not configured")
+        response = InternalServerErrorResponse.cache_unavailable()
+        raise HTTPException(**response.model_dump())
 
     check_conversation_existence(user_id, conversation_id)
 
@@ -217,18 +168,7 @@ async def delete_conversation_endpoint_handler(
     deleted = configuration.conversation_cache.delete(
         user_id, conversation_id, skip_userid_check
     )
-
-    if deleted:
-        return ConversationDeleteResponse(
-            conversation_id=conversation_id,
-            success=True,
-            response="Conversation deleted successfully",
-        )
-    return ConversationDeleteResponse(
-        conversation_id=conversation_id,
-        success=True,
-        response="Conversation can not be deleted",
-    )
+    return ConversationDeleteResponse(deleted=deleted, conversation_id=conversation_id)
 
 
 @router.put("/conversations/{conversation_id}", responses=conversation_update_responses)
@@ -251,15 +191,10 @@ async def update_conversation_endpoint_handler(
 
     skip_userid_check = auth[2]
 
-    if configuration.conversation_cache is None:
+    if configuration.conversation_cache_configuration.type is None:
         logger.warning("Conversation cache is not configured")
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail={
-                "response": "Conversation cache is not configured",
-                "cause": "Conversation cache is not configured",
-            },
-        )
+        response = InternalServerErrorResponse.cache_unavailable()
+        raise HTTPException(**response.model_dump())
 
     check_conversation_existence(user_id, conversation_id)
 
@@ -285,31 +220,25 @@ def check_valid_conversation_id(conversation_id: str) -> None:
     """Check validity of conversation ID format."""
     if not check_suid(conversation_id):
         logger.error("Invalid conversation ID format: %s", conversation_id)
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail={
-                "response": "Invalid conversation ID format",
-                "cause": f"Conversation ID {conversation_id} is not a valid UUID",
-            },
+        response = BadRequestResponse(
+            resource="conversation", resource_id=conversation_id
         )
+        raise HTTPException(**response.model_dump())
 
 
 def check_conversation_existence(user_id: str, conversation_id: str) -> None:
     """Check if conversation exists."""
     # checked already, but we need to make pyright happy
-    if configuration.conversation_cache is None:
+    if configuration.conversation_cache_configuration.type is None:
         return
     conversations = configuration.conversation_cache.list(user_id, False)
     conversation_ids = [conv.conversation_id for conv in conversations]
     if conversation_id not in conversation_ids:
         logger.error("No conversation found for conversation ID %s", conversation_id)
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail={
-                "response": "Conversation not found",
-                "cause": f"Conversation {conversation_id} could not be retrieved.",
-            },
+        response = NotFoundResponse(
+            resource="conversation", resource_id=conversation_id
         )
+        raise HTTPException(**response.model_dump())
 
 
 def transform_chat_message(entry: CacheEntry) -> dict[str, Any]:
diff --git a/src/app/endpoints/feedback.py b/src/app/endpoints/feedback.py
index 91579823..e411698a 100644
--- a/src/app/endpoints/feedback.py
+++ b/src/app/endpoints/feedback.py
@@ -7,7 +7,7 @@
 from pathlib import Path
 from typing import Annotated, Any
 
-from fastapi import APIRouter, Depends, HTTPException, Request, status
+from fastapi import APIRouter, Depends, HTTPException, Request
 
 from authentication import get_auth_dependency
 from authentication.interface import AuthTuple
@@ -16,59 +16,45 @@
 from models.config import Action
 from models.requests import FeedbackRequest, FeedbackStatusUpdateRequest
 from models.responses import (
-    ErrorResponse,
     FeedbackResponse,
     FeedbackStatusUpdateResponse,
     ForbiddenResponse,
+    InternalServerErrorResponse,
+    NotFoundResponse,
     StatusResponse,
     UnauthorizedResponse,
 )
+from utils.endpoints import check_configuration_loaded
 from utils.suid import get_suid
 
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/feedback", tags=["feedback"])
 feedback_status_lock = threading.Lock()
 
-# Response for the feedback endpoint
+
 feedback_post_response: dict[int | str, dict[str, Any]] = {
-    200: {
-        "description": "Feedback received and stored",
-        "model": FeedbackResponse,
-    },
-    401: {
-        "description": "Missing or invalid credentials provided by client",
-        "model": UnauthorizedResponse,
-    },
-    403: {
-        "description": "Client does not have permission to access resource",
-        "model": ForbiddenResponse,
-    },
-    500: {
-        "description": "User feedback can not be stored",
-        "model": ErrorResponse,
-    },
+    200: FeedbackResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint", "feedback"]),
+    404: NotFoundResponse.openapi_response(examples=["conversation"]),
+    500: InternalServerErrorResponse.openapi_response(
+        examples=["feedback storage", "configuration"]
+    ),
 }
 
 feedback_put_response: dict[int | str, dict[str, Any]] = {
-    200: {
-        "description": "Feedback status successfully updated",
-        "model": FeedbackStatusUpdateResponse,
-    },
-    401: {
-        "description": "Missing or invalid credentials provided by client",
-        "model": UnauthorizedResponse,
-    },
-    403: {
-        "description": "Client does not have permission to access resource",
-        "model": ForbiddenResponse,
-    },
+    200: FeedbackStatusUpdateResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
 }
 
 feedback_get_response: dict[int | str, dict[str, Any]] = {
-    200: {
-        "description": "Feedback status successfully retrieved",
-        "model": StatusResponse,
-    }
+    200: StatusResponse.openapi_response(),
 }
 
 
@@ -99,10 +85,8 @@ async def assert_feedback_enabled(_request: Request) -> None:
     """
     feedback_enabled = is_feedback_enabled()
     if not feedback_enabled:
-        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
-            detail="Forbidden: Feedback is disabled",
-        )
+        response = ForbiddenResponse.feedback_disabled()
+        raise HTTPException(**response.model_dump())
 
 
 @router.post("", responses=feedback_post_response)
@@ -133,17 +117,8 @@ async def feedback_endpoint_handler(
     logger.debug("Feedback received %s", str(feedback_request))
 
     user_id, _, _, _ = auth
-    try:
-        store_feedback(user_id, feedback_request.model_dump(exclude={"model_config"}))
-    except Exception as e:
-        logger.error("Error storing user feedback: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Error storing user feedback",
-                "cause": str(e),
-            },
-        ) from e
+    check_configuration_loaded(configuration)
+    store_feedback(user_id, feedback_request.model_dump(exclude={"model_config"}))
 
     return FeedbackResponse(response="feedback received")
 
@@ -166,21 +141,18 @@ def store_feedback(user_id: str, feedback: dict) -> None:
     storage_path = Path(
         configuration.user_data_collection_configuration.feedback_storage or ""
     )
-    storage_path.mkdir(parents=True, exist_ok=True)
-
     current_time = str(datetime.now(UTC))
     data_to_store = {"user_id": user_id, "timestamp": current_time, **feedback}
-
-    # stores feedback in a file under unique uuid
+    # Stores feedback in a file under unique uuid
     feedback_file_path = storage_path / f"{get_suid()}.json"
     try:
+        storage_path.mkdir(parents=True, exist_ok=True)
         with open(feedback_file_path, "w", encoding="utf-8") as feedback_file:
             json.dump(data_to_store, feedback_file)
-    except (OSError, IOError) as e:
+    except OSError as e:
         logger.error("Failed to store feedback at %s: %s", feedback_file_path, e)
-        raise
-
-    logger.info("Feedback stored successfully at %s", feedback_file_path)
+        response = InternalServerErrorResponse.feedback_path_invalid(str(storage_path))
+        raise HTTPException(**response.model_dump()) from e
 
 
 @router.get("/status", responses=feedback_get_response)
@@ -218,6 +190,7 @@ async def update_feedback_status(
         FeedbackStatusUpdateResponse: Indicates whether feedback is enabled.
     """
     user_id, _, _, _ = auth
+    check_configuration_loaded(configuration)
     requested_status = feedback_update_request.get_value()
 
     with feedback_status_lock:
diff --git a/src/app/endpoints/health.py b/src/app/endpoints/health.py
index 23e89921..aa919d2e 100644
--- a/src/app/endpoints/health.py
+++ b/src/app/endpoints/health.py
@@ -10,6 +10,7 @@
 
 from fastapi import APIRouter, Depends, Response, status
 from llama_stack.providers.datatypes import HealthStatus
+from llama_stack_client import APIConnectionError
 
 from authentication import get_auth_dependency
 from authentication.interface import AuthTuple
@@ -17,15 +18,36 @@
 from client import AsyncLlamaStackClientHolder
 from models.config import Action
 from models.responses import (
+    ForbiddenResponse,
     LivenessResponse,
     ProviderHealthStatus,
     ReadinessResponse,
+    ServiceUnavailableResponse,
+    UnauthorizedResponse,
 )
 
 logger = logging.getLogger("app.endpoints.handlers")
 router = APIRouter(tags=["health"])
 
 
+get_readiness_responses: dict[int | str, dict[str, Any]] = {
+    200: ReadinessResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    503: ServiceUnavailableResponse.openapi_response(),
+}
+
+get_liveness_responses: dict[int | str, dict[str, Any]] = {
+    200: LivenessResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+}
+
+
 async def get_providers_health_statuses() -> list[ProviderHealthStatus]:
     """
     Retrieve the health status of all configured providers.
@@ -51,8 +73,7 @@ async def get_providers_health_statuses() -> list[ProviderHealthStatus]:
         ]
         return health_results
 
-    except Exception as e:  # pylint: disable=broad-exception-caught
-        # eg. no providers defined
+    except APIConnectionError as e:
         logger.error("Failed to check providers health: %s", e)
         return [
             ProviderHealthStatus(
@@ -63,18 +84,6 @@ async def get_providers_health_statuses() -> list[ProviderHealthStatus]:
         ]
 
 
-get_readiness_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "description": "Service is ready",
-        "model": ReadinessResponse,
-    },
-    503: {
-        "description": "Service is not ready",
-        "model": ReadinessResponse,
-    },
-}
-
-
 @router.get("/readiness", responses=get_readiness_responses)
 @authorize(Action.INFO)
 async def readiness_probe_get_method(
@@ -112,15 +121,6 @@ async def readiness_probe_get_method(
     return ReadinessResponse(ready=ready, reason=reason, providers=unhealthy_providers)
 
 
-get_liveness_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "description": "Service is alive",
-        "model": LivenessResponse,
-    },
-    # HTTP_503_SERVICE_UNAVAILABLE will never be returned when unreachable
-}
-
-
 @router.get("/liveness", responses=get_liveness_responses)
 @authorize(Action.INFO)
 async def liveness_probe_get_method(
diff --git a/src/app/endpoints/info.py b/src/app/endpoints/info.py
index 1cf701e6..e3cfb84e 100644
--- a/src/app/endpoints/info.py
+++ b/src/app/endpoints/info.py
@@ -3,7 +3,7 @@
 import logging
 from typing import Annotated, Any
 
-from fastapi import APIRouter, Depends, HTTPException, Request, status
+from fastapi import APIRouter, Depends, HTTPException, Request
 from llama_stack_client import APIConnectionError
 
 from authentication import get_auth_dependency
@@ -12,7 +12,12 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from models.config import Action
-from models.responses import InfoResponse
+from models.responses import (
+    ForbiddenResponse,
+    InfoResponse,
+    ServiceUnavailableResponse,
+    UnauthorizedResponse,
+)
 from version import __version__
 
 logger = logging.getLogger("app.endpoints.handlers")
@@ -20,17 +25,10 @@
 
 
 get_info_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "name": "Service name",
-        "service_version": "Service version",
-        "llama_stack_version": "Llama Stack version",
-    },
-    500: {
-        "detail": {
-            "response": "Unable to connect to Llama Stack",
-            "cause": "Connection error.",
-        }
-    },
+    200: InfoResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 
@@ -74,10 +72,5 @@ async def info_endpoint_handler(
     # connection to Llama Stack server
     except APIConnectionError as e:
         logger.error("Unable to connect to Llama Stack: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to connect to Llama Stack",
-                "cause": str(e),
-            },
-        ) from e
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
diff --git a/src/app/endpoints/metrics.py b/src/app/endpoints/metrics.py
index 5a8c90d0..53ab1892 100644
--- a/src/app/endpoints/metrics.py
+++ b/src/app/endpoints/metrics.py
@@ -1,6 +1,6 @@
 """Handler for REST API call to provide metrics."""
 
-from typing import Annotated
+from typing import Annotated, Any
 
 from fastapi import APIRouter, Depends, Request
 from fastapi.responses import PlainTextResponse
@@ -14,11 +14,29 @@
 from authorization.middleware import authorize
 from metrics.utils import setup_model_metrics
 from models.config import Action
+from models.responses import (
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    ServiceUnavailableResponse,
+    UnauthorizedResponse,
+)
 
 router = APIRouter(tags=["metrics"])
 
 
-@router.get("/metrics", response_class=PlainTextResponse)
+metrics_get_responses: dict[int | str, dict[str, Any]] = {
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
+}
+
+
+@router.get(
+    "/metrics", response_class=PlainTextResponse, responses=metrics_get_responses
+)
 @authorize(Action.GET_METRICS)
 async def metrics_endpoint_handler(
     auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
@@ -43,4 +61,4 @@ async def metrics_endpoint_handler(
     # Setup the model metrics if not already done. This is a one-time setup
     # and will not be run again on subsequent calls to this endpoint
     await setup_model_metrics()
-    return PlainTextResponse(generate_latest(), media_type=CONTENT_TYPE_LATEST)
+    return PlainTextResponse(generate_latest(), media_type=str(CONTENT_TYPE_LATEST))
diff --git a/src/app/endpoints/models.py b/src/app/endpoints/models.py
index a54749cb..f09a0954 100644
--- a/src/app/endpoints/models.py
+++ b/src/app/endpoints/models.py
@@ -3,7 +3,7 @@
 import logging
 from typing import Annotated, Any
 
-from fastapi import APIRouter, HTTPException, Request, status
+from fastapi import APIRouter, HTTPException, Request
 from fastapi.params import Depends
 from llama_stack_client import APIConnectionError
 
@@ -13,7 +13,13 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from models.config import Action
-from models.responses import ModelsResponse
+from models.responses import (
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    ModelsResponse,
+    ServiceUnavailableResponse,
+    UnauthorizedResponse,
+)
 from utils.endpoints import check_configuration_loaded
 
 logger = logging.getLogger(__name__)
@@ -21,29 +27,13 @@
 
 
 models_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "models": [
-            {
-                "identifier": "all-MiniLM-L6-v2",
-                "metadata": {"embedding_dimension": 384},
-                "api_model_type": "embedding",
-                "provider_id": "ollama",
-                "provider_resource_id": "all-minilm:latest",
-                "type": "model",
-                "model_type": "embedding",
-            },
-            {
-                "identifier": "llama3.2:3b-instruct-fp16",
-                "metadata": {},
-                "api_model_type": "llm",
-                "provider_id": "ollama",
-                "provider_resource_id": "llama3.2:3b-instruct-fp16",
-                "type": "model",
-                "model_type": "llm",
-            },
-        ]
-    },
-    500: {"description": "Connection to Llama Stack is broken"},
+    200: ModelsResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 
@@ -85,23 +75,8 @@ async def models_endpoint_handler(
         m = [dict(m) for m in models]
         return ModelsResponse(models=m)
 
-    # connection to Llama Stack server
+    # Connection to Llama Stack server failed
     except APIConnectionError as e:
         logger.error("Unable to connect to Llama Stack: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to connect to Llama Stack",
-                "cause": str(e),
-            },
-        ) from e
-    # any other exception that can occur during model listing
-    except Exception as e:
-        logger.error("Unable to retrieve list of models: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to retrieve list of models",
-                "cause": str(e),
-            },
-        ) from e
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
diff --git a/src/app/endpoints/providers.py b/src/app/endpoints/providers.py
index 426804bd..ae2ec9b4 100644
--- a/src/app/endpoints/providers.py
+++ b/src/app/endpoints/providers.py
@@ -3,9 +3,10 @@
 import logging
 from typing import Annotated, Any
 
-from fastapi import APIRouter, HTTPException, Request, status
+from fastapi import APIRouter, HTTPException, Request
 from fastapi.params import Depends
-from llama_stack_client import APIConnectionError
+from llama_stack_client import APIConnectionError, BadRequestError
+from llama_stack_client.types import ProviderListResponse
 
 from authentication import get_auth_dependency
 from authentication.interface import AuthTuple
@@ -13,76 +14,61 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from models.config import Action
-from models.responses import ProvidersListResponse, ProviderResponse
+from models.responses import (
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    NotFoundResponse,
+    ProviderResponse,
+    ProvidersListResponse,
+    ServiceUnavailableResponse,
+    UnauthorizedResponse,
+)
 from utils.endpoints import check_configuration_loaded
 
 logger = logging.getLogger(__name__)
 router = APIRouter(tags=["providers"])
 
 
-providers_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "providers": {
-            "agents": [
-                {
-                    "provider_id": "meta-reference",
-                    "provider_type": "inline::meta-reference",
-                }
-            ],
-            "datasetio": [
-                {"provider_id": "huggingface", "provider_type": "remote::huggingface"},
-                {"provider_id": "localfs", "provider_type": "inline::localfs"},
-            ],
-            "inference": [
-                {
-                    "provider_id": "sentence-transformers",
-                    "provider_type": "inline::sentence-transformers",
-                },
-                {"provider_id": "openai", "provider_type": "remote::openai"},
-            ],
-        }
-    },
-    500: {"description": "Connection to Llama Stack is broken"},
+providers_list_responses: dict[int | str, dict[str, Any]] = {
+    200: ProvidersListResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
-provider_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "api": "inference",
-        "config": {"api_key": "********"},
-        "health": {
-            "status": "Not Implemented",
-            "message": "Provider does not implement health check",
-        },
-        "provider_id": "openai",
-        "provider_type": "remote::openai",
-    },
-    404: {"response": "Provider with given id not found"},
-    500: {
-        "response": "Unable to retrieve list of providers",
-        "cause": "Connection to Llama Stack is broken",
-    },
+provider_get_responses: dict[int | str, dict[str, Any]] = {
+    200: ProviderResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    404: NotFoundResponse.openapi_response(examples=["provider"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 
-@router.get("/providers", responses=providers_responses)
+@router.get("/providers", responses=providers_list_responses)
 @authorize(Action.LIST_PROVIDERS)
 async def providers_endpoint_handler(
     request: Request,
     auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
 ) -> ProvidersListResponse:
     """
-    Handle GET requests to list all available providers.
+    List all available providers grouped by API type.
 
-    Retrieves providers from the Llama Stack service, groups them by API type.
+    Returns:
+        ProvidersListResponse: Mapping from API type to list of providers.
 
     Raises:
         HTTPException:
-            - 500 if configuration is not loaded,
-            - 500 if unable to connect to Llama Stack,
-            - 500 for any unexpected retrieval errors.
-
-    Returns:
-        ProvidersListResponse: Object mapping API types to lists of providers.
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
     """
     # Used only by the middleware
     _ = auth
@@ -96,41 +82,21 @@ async def providers_endpoint_handler(
     logger.info("Llama stack config: %s", llama_stack_configuration)
 
     try:
-        # try to get Llama Stack client
         client = AsyncLlamaStackClientHolder().get_client()
-        # retrieve providers
-        providers = await client.providers.list()
-        providers = [dict(p) for p in providers]
-        return ProvidersListResponse(providers=group_providers(providers))
-
-    # connection to Llama Stack server
+        providers: ProviderListResponse = await client.providers.list()
     except APIConnectionError as e:
         logger.error("Unable to connect to Llama Stack: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to connect to Llama Stack",
-                "cause": str(e),
-            },
-        ) from e
-    # any other exception that can occur during model listing
-    except Exception as e:
-        logger.error("Unable to retrieve list of providers: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to retrieve list of providers",
-                "cause": str(e),
-            },
-        ) from e
-
-
-def group_providers(providers: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]:
-    """Group a list of providers by their API type.
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+
+    return ProvidersListResponse(providers=group_providers(providers))
+
+
+def group_providers(providers: ProviderListResponse) -> dict[str, list[dict[str, Any]]]:
+    """Group a list of ProviderInfo objects by their API type.
 
     Args:
-        providers: List of provider dictionaries. Each must contain
-            'api', 'provider_id', and 'provider_type' keys.
+        providers: List of ProviderInfo objects.
 
     Returns:
         Mapping from API type to list of providers containing
@@ -138,33 +104,35 @@ def group_providers(providers: list[dict[str, Any]]) -> dict[str, list[dict[str,
     """
     result: dict[str, list[dict[str, Any]]] = {}
     for provider in providers:
-        result.setdefault(provider["api"], []).append(
+        result.setdefault(provider.api, []).append(
             {
-                "provider_id": provider["provider_id"],
-                "provider_type": provider["provider_type"],
+                "provider_id": provider.provider_id,
+                "provider_type": provider.provider_type,
             }
         )
     return result
 
 
-@router.get("/providers/{provider_id}", responses=provider_responses)
+@router.get("/providers/{provider_id}", responses=provider_get_responses)
 @authorize(Action.GET_PROVIDER)
 async def get_provider_endpoint_handler(
     request: Request,
     provider_id: str,
     auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
 ) -> ProviderResponse:
-    """Retrieve a single provider by its unique ID.
+    """
+    Retrieve a single provider by its unique ID.
+
+    Returns:
+        ProviderResponse: Provider details.
 
     Raises:
         HTTPException:
-            - 404 if provider with the given ID is not found,
-            - 500 if unable to connect to Llama Stack,
-            - 500 for any unexpected retrieval errors.
-
-    Returns:
-        ProviderResponse: A single provider's details including API, config, health,
-        provider_id, and provider_type.
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 404: Provider not found
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
     """
     # Used only by the middleware
     _ = auth
@@ -178,38 +146,15 @@ async def get_provider_endpoint_handler(
     logger.info("Llama stack config: %s", llama_stack_configuration)
 
     try:
-        # try to get Llama Stack client
         client = AsyncLlamaStackClientHolder().get_client()
-        # retrieve providers
-        providers = await client.providers.list()
-        p = [dict(p) for p in providers]
-        match = next((item for item in p if item["provider_id"] == provider_id), None)
-        if not match:
-            raise HTTPException(
-                status_code=status.HTTP_404_NOT_FOUND,
-                detail={"response": f"Provider with id '{provider_id}' not found"},
-            )
-        return ProviderResponse(**match)
-
-    # connection to Llama Stack server
-    except HTTPException:
-        raise
+        provider = await client.providers.retrieve(provider_id)
+        return ProviderResponse(**provider.model_dump())
+
     except APIConnectionError as e:
         logger.error("Unable to connect to Llama Stack: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to connect to Llama Stack",
-                "cause": str(e),
-            },
-        ) from e
-    # any other exception that can occur during model listing
-    except Exception as e:
-        logger.error("Unable to retrieve list of providers: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to retrieve list of providers",
-                "cause": str(e),
-            },
-        ) from e
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+
+    except BadRequestError as e:
+        response = NotFoundResponse(resource="provider", resource_id=provider_id)
+        raise HTTPException(**response.model_dump()) from e
diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py
index 7ce869e9..62cdb878 100644
--- a/src/app/endpoints/query.py
+++ b/src/app/endpoints/query.py
@@ -7,7 +7,7 @@
 from datetime import UTC, datetime
 from typing import Annotated, Any, Optional, cast
 
-from fastapi import APIRouter, Depends, HTTPException, Request, status
+from fastapi import APIRouter, Depends, HTTPException, Request
 from litellm.exceptions import RateLimitError
 from llama_stack_client import (
     APIConnectionError,
@@ -17,13 +17,14 @@
 from llama_stack_client.types import Shield, UserMessage  # type: ignore
 from llama_stack_client.types.agents.turn import Turn
 from llama_stack_client.types.agents.turn_create_params import (
+    Document,
     Toolgroup,
     ToolgroupAgentToolGroupWithArgs,
-    Document,
 )
 from llama_stack_client.types.model_list_response import ModelListResponse
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
 from llama_stack_client.types.tool_execution_step import ToolExecutionStep
+from sqlalchemy.exc import SQLAlchemyError
 
 import constants
 import metrics
@@ -39,65 +40,55 @@
 from models.requests import Attachment, QueryRequest
 from models.responses import (
     ForbiddenResponse,
+    InternalServerErrorResponse,
+    NotFoundResponse,
     QueryResponse,
+    QuotaExceededResponse,
     ReferencedDocument,
+    ServiceUnavailableResponse,
     ToolCall,
     UnauthorizedResponse,
-    QuotaExceededResponse,
+    UnprocessableEntityResponse,
 )
 from utils.endpoints import (
     check_configuration_loaded,
     get_agent,
-    get_topic_summary_system_prompt,
-    get_temp_agent,
     get_system_prompt,
+    get_temp_agent,
+    get_topic_summary_system_prompt,
     store_conversation_into_cache,
     validate_conversation_ownership,
     validate_model_provider_override,
 )
+from utils.mcp_headers import handle_mcp_headers_with_toolgroups, mcp_headers_dependency
 from utils.quota import (
-    get_available_quotas,
     check_tokens_available,
     consume_tokens,
+    get_available_quotas,
 )
-from utils.mcp_headers import handle_mcp_headers_with_toolgroups, mcp_headers_dependency
+from utils.token_counter import TokenCounter, extract_and_update_token_metrics
 from utils.transcripts import store_transcript
 from utils.types import TurnSummary
-from utils.token_counter import extract_and_update_token_metrics, TokenCounter
 
 logger = logging.getLogger("app.endpoints.handlers")
 router = APIRouter(tags=["query"])
 
+
 query_response: dict[int | str, dict[str, Any]] = {
-    200: {
-        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-        "response": "LLM answer",
-        "referenced_documents": [
-            {
-                "doc_url": "https://docs.openshift.com/"
-                "container-platform/4.15/operators/olm/index.html",
-                "doc_title": "Operator Lifecycle Manager (OLM)",
-            }
-        ],
-    },
-    400: {
-        "description": "Missing or invalid credentials provided by client",
-        "model": UnauthorizedResponse,
-    },
-    403: {
-        "description": "Client does not have permission to access conversation",
-        "model": ForbiddenResponse,
-    },
-    429: {
-        "description": "The quota has been exceeded",
-        "model": QuotaExceededResponse,
-    },
-    500: {
-        "detail": {
-            "response": "Unable to connect to Llama Stack",
-            "cause": "Connection error.",
-        }
-    },
+    200: QueryResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(
+        examples=["endpoint", "conversation read", "model override"]
+    ),
+    404: NotFoundResponse.openapi_response(
+        examples=["model", "conversation", "provider"]
+    ),
+    422: UnprocessableEntityResponse.openapi_response(),
+    429: QuotaExceededResponse.openapi_response(),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 
@@ -280,13 +271,11 @@ async def query_endpoint_handler_base(  # pylint: disable=R0914
                 query_request.conversation_id,
                 user_id,
             )
-            raise HTTPException(
-                status_code=status.HTTP_404_NOT_FOUND,
-                detail={
-                    "response": "Conversation not found",
-                    "cause": "The requested conversation does not exist.",
-                },
+            response = NotFoundResponse(
+                resource="conversation", resource_id=query_request.conversation_id
             )
+            raise HTTPException(**response.model_dump())
+
     else:
         logger.debug("Query does not contain conversation ID")
 
@@ -430,22 +419,19 @@ async def query_endpoint_handler_base(  # pylint: disable=R0914
         # Update metrics for the LLM call failure
         metrics.llm_calls_failures_total.inc()
         logger.error("Unable to connect to Llama Stack: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to connect to Llama Stack",
-                "cause": str(e),
-            },
-        ) from e
+        response = ServiceUnavailableResponse(
+            backend_name="Llama Stack",
+            cause=str(e),
+        )
+        raise HTTPException(**response.model_dump()) from e
+    except SQLAlchemyError as e:
+        logger.exception("Error persisting conversation details: %s", e)
+        response = InternalServerErrorResponse.database_error()
+        raise HTTPException(**response.model_dump()) from e
     except RateLimitError as e:
-        used_model = getattr(e, "model", "unknown")
-        raise HTTPException(
-            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
-            detail={
-                "response": "Model quota exceeded",
-                "cause": f"The token quota for model {used_model} has been exceeded.",
-            },
-        ) from e
+        used_model = getattr(e, "model", "")
+        response = QuotaExceededResponse.model(used_model)
+        raise HTTPException(**response.model_dump()) from e
 
 
 @router.post("/query", responses=query_response)
@@ -528,31 +514,21 @@ def select_model_and_provider_id(
         except (StopIteration, AttributeError) as e:
             message = "No LLM model found in available models"
             logger.error(message)
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail={
-                    "response": constants.UNABLE_TO_PROCESS_RESPONSE,
-                    "cause": message,
-                },
-            ) from e
+            response = NotFoundResponse(resource="model", resource_id=model_id or "")
+            raise HTTPException(**response.model_dump()) from e
 
     llama_stack_model_id = f"{provider_id}/{model_id}"
     # Validate that the model_id and provider_id are in the available models
     logger.debug("Searching for model: %s, provider: %s", model_id, provider_id)
+    # TODO: Create sepparate validation of provider
     if not any(
         m.identifier == llama_stack_model_id and m.provider_id == provider_id
         for m in models
     ):
         message = f"Model {model_id} from provider {provider_id} not found in available models"
         logger.error(message)
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail={
-                "response": constants.UNABLE_TO_PROCESS_RESPONSE,
-                "cause": message,
-            },
-        )
-
+        response = NotFoundResponse(resource="model", resource_id=model_id)
+        raise HTTPException(**response.model_dump())
     return llama_stack_model_id, model_id, provider_id
 
 
@@ -833,26 +809,24 @@ def validate_attachments_metadata(attachments: list[Attachment]) -> None:
     for attachment in attachments:
         if attachment.attachment_type not in constants.ATTACHMENT_TYPES:
             message = (
-                f"Attachment with improper type {attachment.attachment_type} detected"
+                f"Invalid attatchment type {attachment.attachment_type}: "
+                f"must be one of {constants.ATTACHMENT_TYPES}"
             )
             logger.error(message)
-            raise HTTPException(
-                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-                detail={
-                    "response": constants.UNABLE_TO_PROCESS_RESPONSE,
-                    "cause": message,
-                },
+            response = UnprocessableEntityResponse(
+                response="Invalid attribute value", cause=message
             )
+            raise HTTPException(**response.model_dump())
         if attachment.content_type not in constants.ATTACHMENT_CONTENT_TYPES:
-            message = f"Attachment with improper content type {attachment.content_type} detected"
+            message = (
+                f"Invalid attatchment content type {attachment.content_type}: "
+                f"must be one of {constants.ATTACHMENT_CONTENT_TYPES}"
+            )
             logger.error(message)
-            raise HTTPException(
-                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-                detail={
-                    "response": constants.UNABLE_TO_PROCESS_RESPONSE,
-                    "cause": message,
-                },
+            response = UnprocessableEntityResponse(
+                response="Invalid attribute value", cause=message
             )
+            raise HTTPException(**response.model_dump())
 
 
 def get_rag_toolgroups(
diff --git a/src/app/endpoints/query_v2.py b/src/app/endpoints/query_v2.py
index 702eae36..f25cce97 100644
--- a/src/app/endpoints/query_v2.py
+++ b/src/app/endpoints/query_v2.py
@@ -3,33 +3,37 @@
 import logging
 from typing import Annotated, Any, cast
 
-from llama_stack_client import AsyncLlamaStackClient  # type: ignore
+from fastapi import APIRouter, Depends, Request
 from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseObject,
 )
+from llama_stack_client import AsyncLlamaStackClient  # type: ignore
 
-from fastapi import APIRouter, Request, Depends
-
+import metrics
 from app.endpoints.query import (
     query_endpoint_handler_base,
     validate_attachments_metadata,
 )
-from constants import DEFAULT_RAG_TOOL
 from authentication import get_auth_dependency
 from authentication.interface import AuthTuple
 from authorization.middleware import authorize
 from configuration import AppConfig, configuration
-import metrics
+from constants import DEFAULT_RAG_TOOL
 from models.config import Action
 from models.requests import QueryRequest
 from models.responses import (
     ForbiddenResponse,
+    InternalServerErrorResponse,
+    NotFoundResponse,
     QueryResponse,
+    QuotaExceededResponse,
     ReferencedDocument,
+    ServiceUnavailableResponse,
     UnauthorizedResponse,
-    QuotaExceededResponse,
+    UnprocessableEntityResponse,
 )
 from utils.endpoints import (
+    check_configuration_loaded,
     get_system_prompt,
     get_topic_summary_system_prompt,
 )
@@ -37,41 +41,26 @@
 from utils.responses import extract_text_from_response_output_item
 from utils.shields import detect_shield_violations, get_available_shields
 from utils.token_counter import TokenCounter
-from utils.types import TurnSummary, ToolCallSummary
+from utils.types import ToolCallSummary, TurnSummary
 
 logger = logging.getLogger("app.endpoints.handlers")
 router = APIRouter(tags=["query_v2"])
 
 query_v2_response: dict[int | str, dict[str, Any]] = {
-    200: {
-        "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
-        "response": "LLM answer",
-        "referenced_documents": [
-            {
-                "doc_url": "https://docs.openshift.com/"
-                "container-platform/4.15/operators/olm/index.html",
-                "doc_title": "Operator Lifecycle Manager (OLM)",
-            }
-        ],
-    },
-    400: {
-        "description": "Missing or invalid credentials provided by client",
-        "model": UnauthorizedResponse,
-    },
-    403: {
-        "description": "Client does not have permission to access conversation",
-        "model": ForbiddenResponse,
-    },
-    429: {
-        "description": "The quota has been exceeded",
-        "model": QuotaExceededResponse,
-    },
-    500: {
-        "detail": {
-            "response": "Unable to connect to Llama Stack",
-            "cause": "Connection error.",
-        }
-    },
+    200: QueryResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(
+        examples=["endpoint", "conversation read", "model override"]
+    ),
+    404: NotFoundResponse.openapi_response(
+        examples=["conversation", "model", "provider"]
+    ),
+    422: UnprocessableEntityResponse.openapi_response(),
+    429: QuotaExceededResponse.openapi_response(),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 
@@ -225,27 +214,23 @@ async def get_topic_summary(  # pylint: disable=too-many-nested-blocks
     """
     topic_summary_system_prompt = get_topic_summary_system_prompt(configuration)
 
-    try:
-        # Use Responses API to generate topic summary
-        response = await client.responses.create(
-            input=question,
-            model=model_id,
-            instructions=topic_summary_system_prompt,
-            stream=False,
-            store=False,  # Don't store topic summary requests
-        )
-        response = cast(OpenAIResponseObject, response)
+    # Use Responses API to generate topic summary
+    response = await client.responses.create(
+        input=question,
+        model=model_id,
+        instructions=topic_summary_system_prompt,
+        stream=False,
+        store=False,  # Don't store topic summary requests
+    )
+    response = cast(OpenAIResponseObject, response)
 
-        # Extract text from response output
-        summary_text = "".join(
-            extract_text_from_response_output_item(output_item)
-            for output_item in response.output
-        )
+    # Extract text from response output
+    summary_text = "".join(
+        extract_text_from_response_output_item(output_item)
+        for output_item in response.output
+    )
 
-        return summary_text.strip() if summary_text else ""
-    except Exception as e:  # pylint: disable=broad-exception-caught
-        logger.warning("Failed to generate topic summary: %s", e)
-        return ""  # Return empty string on failure
+    return summary_text.strip() if summary_text else ""
 
 
 @router.post("/query", responses=query_v2_response)
@@ -265,6 +250,7 @@ async def query_endpoint_handler_v2(
     Returns:
         QueryResponse: Contains the conversation ID and the LLM-generated response.
     """
+    check_configuration_loaded(configuration)
     return await query_endpoint_handler_base(
         request=request,
         query_request=query_request,
diff --git a/src/app/endpoints/rags.py b/src/app/endpoints/rags.py
index d1c060db..4209a5db 100644
--- a/src/app/endpoints/rags.py
+++ b/src/app/endpoints/rags.py
@@ -3,9 +3,9 @@
 import logging
 from typing import Annotated, Any
 
-from fastapi import APIRouter, HTTPException, Request, status
+from fastapi import APIRouter, HTTPException, Request
 from fastapi.params import Depends
-from llama_stack_client import APIConnectionError
+from llama_stack_client import APIConnectionError, BadRequestError
 
 from authentication import get_auth_dependency
 from authentication.interface import AuthTuple
@@ -13,7 +13,15 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from models.config import Action
-from models.responses import RAGListResponse, RAGInfoResponse
+from models.responses import (
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    NotFoundResponse,
+    RAGInfoResponse,
+    RAGListResponse,
+    ServiceUnavailableResponse,
+    UnauthorizedResponse,
+)
 from utils.endpoints import check_configuration_loaded
 
 logger = logging.getLogger(__name__)
@@ -21,23 +29,24 @@
 
 
 rags_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "rags": [
-            "vs_00000000-cafe-babe-0000-000000000000",
-            "vs_7b52a8cf-0fa3-489c-beab-27e061d102f3",
-            "vs_7b52a8cf-0fa3-489c-cafe-27e061d102f3",
-        ]
-    },
-    500: {"description": "Connection to Llama Stack is broken"},
+    200: RAGListResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 rag_responses: dict[int | str, dict[str, Any]] = {
-    200: {},
-    404: {"response": "RAG with given id not found"},
-    500: {
-        "response": "Unable to retrieve list of RAGs",
-        "cause": "Connection to Llama Stack is broken",
-    },
+    200: RAGInfoResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    404: NotFoundResponse.openapi_response(examples=["rag"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 
@@ -48,18 +57,17 @@ async def rags_endpoint_handler(
     auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
 ) -> RAGListResponse:
     """
-    Handle GET requests to list all available RAGs.
+    List all available RAGs.
 
-    Retrieves RAGs from the Llama Stack service.
+    Returns:
+        RAGListResponse: List of RAG identifiers.
 
     Raises:
         HTTPException:
-            - 500 if configuration is not loaded,
-            - 500 if unable to connect to Llama Stack,
-            - 500 for any unexpected retrieval errors.
-
-    Returns:
-        RAGListResponse: List of RAGs.
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
     """
     # Used only by the middleware
     _ = auth
@@ -86,23 +94,8 @@ async def rags_endpoint_handler(
     # connection to Llama Stack server
     except APIConnectionError as e:
         logger.error("Unable to connect to Llama Stack: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to connect to Llama Stack",
-                "cause": str(e),
-            },
-        ) from e
-    # any other exception that can occur during model listing
-    except Exception as e:
-        logger.error("Unable to retrieve list of RAGs: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to retrieve list of RAGs",
-                "cause": str(e),
-            },
-        ) from e
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
 
 
 @router.get("/rags/{rag_id}", responses=rag_responses)
@@ -114,14 +107,16 @@ async def get_rag_endpoint_handler(
 ) -> RAGInfoResponse:
     """Retrieve a single RAG by its unique ID.
 
+    Returns:
+        RAGInfoResponse: A single RAG's details.
+
     Raises:
         HTTPException:
-            - 404 if RAG with the given ID is not found,
-            - 500 if unable to connect to Llama Stack,
-            - 500 for any unexpected retrieval errors.
-
-    Returns:
-        RAGInfoResponse: A single RAG's details
+            - 401: Authentication failed
+            - 403: Authorization failed
+            - 404: RAG with the given ID not found
+            - 500: Lightspeed Stack configuration not loaded
+            - 503: Unable to connect to Llama Stack
     """
     # Used only by the middleware
     _ = auth
@@ -149,26 +144,11 @@ async def get_rag_endpoint_handler(
             status=rag_info.status,
             usage_bytes=rag_info.usage_bytes,
         )
-
-    # connection to Llama Stack server
-    except HTTPException:
-        raise
     except APIConnectionError as e:
         logger.error("Unable to connect to Llama Stack: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to connect to Llama Stack",
-                "cause": str(e),
-            },
-        ) from e
-    # any other exception that can occur during model listing
-    except Exception as e:
-        logger.error("Unable to retrieve info about RAG: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to retrieve info about RAG",
-                "cause": str(e),
-            },
-        ) from e
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
+    except BadRequestError as e:
+        logger.error("RAG not found: %s", e)
+        response = NotFoundResponse(resource="rag", resource_id=rag_id)
+        raise HTTPException(**response.model_dump()) from e
diff --git a/src/app/endpoints/root.py b/src/app/endpoints/root.py
index 4485d2d6..31996d7e 100644
--- a/src/app/endpoints/root.py
+++ b/src/app/endpoints/root.py
@@ -1,7 +1,7 @@
 """Handler for the / endpoint."""
 
 import logging
-from typing import Annotated
+from typing import Annotated, Any
 
 from fastapi import APIRouter, Depends, Request
 from fastapi.responses import HTMLResponse
@@ -10,6 +10,7 @@
 from authentication.interface import AuthTuple
 from authorization.middleware import authorize
 from models.config import Action
+from models.responses import ForbiddenResponse, UnauthorizedResponse
 
 logger = logging.getLogger("app.endpoints.handlers")
 router = APIRouter(tags=["root"])
@@ -775,7 +776,15 @@
 """
 
 
-@router.get("/", response_class=HTMLResponse)
+root_responses: dict[int | str, dict[str, Any]] = {
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+}
+
+
+@router.get("/", response_class=HTMLResponse, responses=root_responses)
 @authorize(Action.INFO)
 async def root_endpoint_handler(
     auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
diff --git a/src/app/endpoints/shields.py b/src/app/endpoints/shields.py
index ce632e40..5dd8b8b6 100644
--- a/src/app/endpoints/shields.py
+++ b/src/app/endpoints/shields.py
@@ -3,7 +3,7 @@
 import logging
 from typing import Annotated, Any
 
-from fastapi import APIRouter, HTTPException, Request, status
+from fastapi import APIRouter, HTTPException, Request
 from fastapi.params import Depends
 from llama_stack_client import APIConnectionError
 
@@ -13,7 +13,13 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from models.config import Action
-from models.responses import ShieldsResponse
+from models.responses import (
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    ServiceUnavailableResponse,
+    ShieldsResponse,
+    UnauthorizedResponse,
+)
 from utils.endpoints import check_configuration_loaded
 
 logger = logging.getLogger(__name__)
@@ -21,18 +27,13 @@
 
 
 shields_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "shields": [
-            {
-                "identifier": "lightspeed_question_validity-shield",
-                "provider_resource_id": "lightspeed_question_validity-shield",
-                "provider_id": "lightspeed_question_validity",
-                "type": "shield",
-                "params": {},
-            }
-        ]
-    },
-    500: {"description": "Connection to Llama Stack is broken"},
+    200: ShieldsResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 
@@ -77,20 +78,5 @@ async def shields_endpoint_handler(
     # connection to Llama Stack server
     except APIConnectionError as e:
         logger.error("Unable to connect to Llama Stack: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to connect to Llama Stack",
-                "cause": str(e),
-            },
-        ) from e
-    # any other exception that can occur during shield listing
-    except Exception as e:
-        logger.error("Unable to retrieve list of shields: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to retrieve list of shields",
-                "cause": str(e),
-            },
-        ) from e
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py
index 46f6276d..1b440a33 100644
--- a/src/app/endpoints/streaming_query.py
+++ b/src/app/endpoints/streaming_query.py
@@ -9,9 +9,9 @@
 from datetime import UTC, datetime
 from typing import Annotated, Any, AsyncGenerator, AsyncIterator, Iterator, cast
 
-from litellm.exceptions import RateLimitError
-from fastapi import APIRouter, Depends, HTTPException, Request, status
+from fastapi import APIRouter, Depends, HTTPException, Request
 from fastapi.responses import StreamingResponse
+from litellm.exceptions import RateLimitError
 from llama_stack_client import (
     APIConnectionError,
     AsyncLlamaStackClient,  # type: ignore
@@ -21,21 +21,22 @@
 from llama_stack_client.types.agents.agent_turn_response_stream_chunk import (
     AgentTurnResponseStreamChunk,
 )
+from llama_stack_client.types.agents.turn_create_params import Document
 from llama_stack_client.types.shared import ToolCall
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
-from llama_stack_client.types.agents.turn_create_params import Document
 
+import metrics
 from app.endpoints.query import (
+    evaluate_model_hints,
     get_rag_toolgroups,
+    get_topic_summary,
     is_input_shield,
     is_output_shield,
     is_transcripts_enabled,
+    persist_user_conversation_details,
     select_model_and_provider_id,
     validate_attachments_metadata,
     validate_conversation_ownership,
-    persist_user_conversation_details,
-    evaluate_model_hints,
-    get_topic_summary,
 )
 from authentication import get_auth_dependency
 from authentication.interface import AuthTuple
@@ -43,7 +44,6 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from constants import DEFAULT_RAG_TOOL, MEDIA_TYPE_JSON, MEDIA_TYPE_TEXT
-import metrics
 from metrics.utils import update_llm_token_count_from_turn
 from models.config import Action
 from models.context import ResponseGeneratorContext
@@ -51,8 +51,12 @@
 from models.requests import QueryRequest
 from models.responses import (
     ForbiddenResponse,
-    UnauthorizedResponse,
+    InternalServerErrorResponse,
+    NotFoundResponse,
     QuotaExceededResponse,
+    ServiceUnavailableResponse,
+    UnauthorizedResponse,
+    UnprocessableEntityResponse,
 )
 from utils.endpoints import (
     check_configuration_loaded,
@@ -67,57 +71,40 @@
 from utils.transcripts import store_transcript
 from utils.types import TurnSummary
 
-
 logger = logging.getLogger("app.endpoints.handlers")
 router = APIRouter(tags=["streaming_query"])
 
+
 streaming_query_responses: dict[int | str, dict[str, Any]] = {
     200: {
-        "description": "Streaming response with Server-Sent Events",
+        "description": "Streaming response (Server-Sent Events)",
         "content": {
-            "application/json": {
-                "schema": {
-                    "type": "string",
-                    "example": (
-                        'data: {"event": "start", '
-                        '"data": {"conversation_id": "123e4567-e89b-12d3-a456-426614174000"}}\n\n'
-                        'data: {"event": "token", "data": {"id": 0, "token": "Hello"}}\n\n'
-                        'data: {"event": "end", "data": {"referenced_documents": [], '
-                        '"truncated": null, "input_tokens": 0, "output_tokens": 0}, '
-                        '"available_quotas": {}}\n\n'
-                    ),
-                }
-            },
-            "text/plain": {
-                "schema": {
-                    "type": "string",
-                    "example": "Hello world!\n\n---\n\nReference: https://example.com/doc",
-                }
-            },
+            "text/event-stream": {
+                "schema": {"type": "string"},
+                "example": (
+                    'data: {"event": "start", '
+                    '"data": {"conversation_id": "123e4567-e89b-12d3-a456-426614174000"}}\n\n'
+                    'data: {"event": "token", "data": {"id": 0, "token": "Hello"}}\n\n'
+                    'data: {"event": "end", "data": {"referenced_documents": [], '
+                    '"truncated": null, "input_tokens": 0, "output_tokens": 0}, '
+                    '"available_quotas": {}}\n\n'
+                ),
+            }
         },
     },
-    400: {
-        "description": "Missing or invalid credentials provided by client",
-        "model": UnauthorizedResponse,
-    },
-    401: {
-        "description": "Unauthorized: Invalid or missing Bearer token for k8s auth",
-        "model": UnauthorizedResponse,
-    },
-    403: {
-        "description": "Client does not have permission to access conversation",
-        "model": ForbiddenResponse,
-    },
-    429: {
-        "description": "The quota has been exceeded",
-        "model": QuotaExceededResponse,
-    },
-    500: {
-        "detail": {
-            "response": "Unable to connect to Llama Stack",
-            "cause": "Connection error.",
-        }
-    },
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(
+        examples=["conversation read", "endpoint", "model override"]
+    ),
+    404: NotFoundResponse.openapi_response(
+        examples=["conversation", "model", "provider"]
+    ),
+    422: UnprocessableEntityResponse.openapi_response(),
+    429: QuotaExceededResponse.openapi_response(),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 
@@ -861,13 +848,12 @@ async def streaming_query_endpoint_handler_base(  # pylint: disable=too-many-loc
                 user_id,
                 query_request.conversation_id,
             )
-            raise HTTPException(
-                status_code=status.HTTP_403_FORBIDDEN,
-                detail={
-                    "response": "Access denied",
-                    "cause": "You do not have permission to access this conversation",
-                },
+            response = ForbiddenResponse.conversation(
+                action="read",
+                resource_id=query_request.conversation_id,
+                user_id=user_id,
             )
+            raise HTTPException(**response.model_dump())
 
     try:
         # try to get Llama Stack client
@@ -918,22 +904,22 @@ async def streaming_query_endpoint_handler_base(  # pylint: disable=too-many-loc
         # Update metrics for the LLM call failure
         metrics.llm_calls_failures_total.inc()
         logger.error("Unable to connect to Llama Stack: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to connect to Llama Stack",
-                "cause": str(e),
-            },
-        ) from e
+        response = ServiceUnavailableResponse(
+            backend_name="Llama Stack",
+            cause=str(e),
+        )
+        raise HTTPException(**response.model_dump()) from e
+
     except RateLimitError as e:
-        used_model = getattr(e, "model", "unknown")
-        raise HTTPException(
-            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
-            detail={
-                "response": "Model quota exceeded",
-                "cause": f"The token quota for model {used_model} has been exceeded.",
-            },
-        ) from e
+        used_model = getattr(e, "model", "")
+        if used_model:
+            response = QuotaExceededResponse.model(used_model)
+        else:
+            response = QuotaExceededResponse(
+                response="The quota has been exceeded", cause=str(e)
+            )
+        raise HTTPException(**response.model_dump()) from e
+
     except Exception as e:  # pylint: disable=broad-except
         # Handle other errors with OLS-compatible error response
         # This broad exception catch is intentional to ensure all errors
diff --git a/src/app/endpoints/streaming_query_v2.py b/src/app/endpoints/streaming_query_v2.py
index bf4080c4..f5e8f026 100644
--- a/src/app/endpoints/streaming_query_v2.py
+++ b/src/app/endpoints/streaming_query_v2.py
@@ -3,13 +3,12 @@
 import logging
 from typing import Annotated, Any, AsyncIterator, cast
 
-from llama_stack_client import AsyncLlamaStackClient  # type: ignore
+from fastapi import APIRouter, Depends, Request
+from fastapi.responses import StreamingResponse
 from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseObjectStream,
 )
-
-from fastapi import APIRouter, Depends, Request
-from fastapi.responses import StreamingResponse
+from llama_stack_client import AsyncLlamaStackClient  # type: ignore
 
 from app.endpoints.query import (
     is_transcripts_enabled,
@@ -35,7 +34,15 @@
 from models.config import Action
 from models.context import ResponseGeneratorContext
 from models.requests import QueryRequest
-from models.responses import ForbiddenResponse, UnauthorizedResponse
+from models.responses import (
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    NotFoundResponse,
+    QuotaExceededResponse,
+    ServiceUnavailableResponse,
+    UnauthorizedResponse,
+    UnprocessableEntityResponse,
+)
 from utils.endpoints import (
     cleanup_after_streaming,
     get_system_prompt,
@@ -44,7 +51,7 @@
 from utils.shields import detect_shield_violations, get_available_shields
 from utils.token_counter import TokenCounter
 from utils.transcripts import store_transcript
-from utils.types import TurnSummary, ToolCallSummary
+from utils.types import ToolCallSummary, TurnSummary
 
 logger = logging.getLogger("app.endpoints.handlers")
 router = APIRouter(tags=["streaming_query_v2"])
@@ -75,24 +82,19 @@
             },
         },
     },
-    400: {
-        "description": "Missing or invalid credentials provided by client",
-        "model": UnauthorizedResponse,
-    },
-    401: {
-        "description": "Unauthorized: Invalid or missing Bearer token for k8s auth",
-        "model": UnauthorizedResponse,
-    },
-    403: {
-        "description": "User is not authorized",
-        "model": ForbiddenResponse,
-    },
-    500: {
-        "detail": {
-            "response": "Unable to connect to Llama Stack",
-            "cause": "Connection error.",
-        }
-    },
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(
+        examples=["conversation read", "endpoint", "model override"]
+    ),
+    404: NotFoundResponse.openapi_response(
+        examples=["conversation", "model", "provider"]
+    ),
+    422: UnprocessableEntityResponse.openapi_response(),
+    429: QuotaExceededResponse.openapi_response(),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 
diff --git a/src/app/endpoints/tools.py b/src/app/endpoints/tools.py
index 31eba4fd..074a30d8 100644
--- a/src/app/endpoints/tools.py
+++ b/src/app/endpoints/tools.py
@@ -3,8 +3,8 @@
 import logging
 from typing import Annotated, Any
 
-from fastapi import APIRouter, Depends, HTTPException, Request, status
-from llama_stack_client import APIConnectionError
+from fastapi import APIRouter, Depends, HTTPException, Request
+from llama_stack_client import APIConnectionError, BadRequestError
 
 from authentication import get_auth_dependency
 from authentication.interface import AuthTuple
@@ -12,7 +12,13 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from models.config import Action
-from models.responses import ToolsResponse
+from models.responses import (
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    ServiceUnavailableResponse,
+    ToolsResponse,
+    UnauthorizedResponse,
+)
 from utils.endpoints import check_configuration_loaded
 from utils.tool_formatter import format_tools_list
 
@@ -21,41 +27,19 @@
 
 
 tools_responses: dict[int | str, dict[str, Any]] = {
-    200: {
-        "description": "Successful Response",
-        "content": {
-            "application/json": {
-                "example": {
-                    "tools": [
-                        {
-                            "identifier": "",
-                            "description": "",
-                            "parameters": [
-                                {
-                                    "name": "",
-                                    "description": "",
-                                    "parameter_type": "",
-                                    "required": "True/False",
-                                    "default": "null",
-                                }
-                            ],
-                            "provider_id": "",
-                            "toolgroup_id": "",
-                            "server_source": "",
-                            "type": "tool",
-                        }
-                    ]
-                }
-            }
-        },
-    },
-    500: {"description": "Connection to Llama Stack is broken or MCP server error"},
+    200: ToolsResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+    503: ServiceUnavailableResponse.openapi_response(),
 }
 
 
 @router.get("/tools", responses=tools_responses)
 @authorize(Action.GET_TOOLS)
-async def tools_endpoint_handler(
+async def tools_endpoint_handler(  # pylint: disable=too-many-locals
     request: Request,
     auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
 ) -> ToolsResponse:
@@ -81,110 +65,81 @@ async def tools_endpoint_handler(
 
     check_configuration_loaded(configuration)
 
+    toolgroups_response = []
     try:
-        # Get Llama Stack client
         client = AsyncLlamaStackClientHolder().get_client()
+        logger.debug("Retrieving tools from all toolgroups")
+        toolgroups_response = await client.toolgroups.list()
+    except APIConnectionError as e:
+        logger.error("Unable to connect to Llama Stack: %s", e)
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
 
-        consolidated_tools = []
-        mcp_server_names = (
-            {mcp_server.name for mcp_server in configuration.mcp_servers}
-            if configuration.mcp_servers
-            else set()
-        )
+    consolidated_tools = []
+    mcp_server_names = (
+        {mcp_server.name for mcp_server in configuration.mcp_servers}
+        if configuration.mcp_servers
+        else set()
+    )
 
-        # Get all available toolgroups
+    for toolgroup in toolgroups_response:
         try:
-            logger.debug("Retrieving tools from all toolgroups")
-            toolgroups_response = await client.toolgroups.list()
-
-            for toolgroup in toolgroups_response:
-                try:
-                    # Get tools for each toolgroup
-                    tools_response = await client.tools.list(
-                        toolgroup_id=toolgroup.identifier
-                    )
-
-                    # Convert tools to dict format
-                    tools_count = 0
-                    server_source = "unknown"
-
-                    for tool in tools_response:
-                        tool_dict = dict(tool)
-
-                        # Determine server source based on toolgroup type
-                        if toolgroup.identifier in mcp_server_names:
-                            # This is an MCP server toolgroup
-                            mcp_server = next(
-                                (
-                                    s
-                                    for s in configuration.mcp_servers
-                                    if s.name == toolgroup.identifier
-                                ),
-                                None,
-                            )
-                            tool_dict["server_source"] = (
-                                mcp_server.url if mcp_server else toolgroup.identifier
-                            )
-                        else:
-                            # This is a built-in toolgroup
-                            tool_dict["server_source"] = "builtin"
-
-                        consolidated_tools.append(tool_dict)
-                        tools_count += 1
-                        server_source = tool_dict["server_source"]
-
-                    logger.debug(
-                        "Retrieved %d tools from toolgroup %s (source: %s)",
-                        tools_count,
-                        toolgroup.identifier,
-                        server_source,
-                    )
-
-                except Exception as e:  # pylint: disable=broad-exception-caught
-                    # Catch any exception from individual toolgroup failures to allow
-                    # processing of other toolgroups to continue (partial failure scenario)
-                    logger.warning(
-                        "Failed to retrieve tools from toolgroup %s: %s",
-                        toolgroup.identifier,
-                        e,
-                    )
-                    continue
-
+            # Get tools for each toolgroup
+            tools_response = await client.tools.list(toolgroup_id=toolgroup.identifier)
+        except BadRequestError:
+            logger.error("Toolgroup %s is not found", toolgroup.identifier)
+            continue
         except APIConnectionError as e:
-            logger.warning("Failed to retrieve tools from toolgroups: %s", e)
-            raise
-        except (ValueError, AttributeError) as e:
-            logger.warning("Failed to retrieve tools from toolgroups: %s", e)
-
-        logger.info(
-            "Retrieved total of %d tools (%d from built-in toolgroups, %d from MCP servers)",
-            len(consolidated_tools),
-            len([t for t in consolidated_tools if t.get("server_source") == "builtin"]),
-            len([t for t in consolidated_tools if t.get("server_source") != "builtin"]),
+            logger.error("Unable to connect to Llama Stack: %s", e)
+            response = ServiceUnavailableResponse(
+                backend_name="Llama Stack", cause=str(e)
+            )
+            raise HTTPException(**response.model_dump()) from e
+
+        # Convert tools to dict format
+        tools_count = 0
+        server_source = "unknown"
+
+        for tool in tools_response:
+            tool_dict = dict(tool)
+
+            # Determine server source based on toolgroup type
+            if toolgroup.identifier in mcp_server_names:
+                # This is an MCP server toolgroup
+                mcp_server = next(
+                    (
+                        s
+                        for s in configuration.mcp_servers
+                        if s.name == toolgroup.identifier
+                    ),
+                    None,
+                )
+                tool_dict["server_source"] = (
+                    mcp_server.url if mcp_server else toolgroup.identifier
+                )
+            else:
+                # This is a built-in toolgroup
+                tool_dict["server_source"] = "builtin"
+
+            consolidated_tools.append(tool_dict)
+            tools_count += 1
+            server_source = tool_dict["server_source"]
+
+        logger.debug(
+            "Retrieved %d tools from toolgroup %s (source: %s)",
+            tools_count,
+            toolgroup.identifier,
+            server_source,
         )
 
-        # Format tools with structured description parsing
-        formatted_tools = format_tools_list(consolidated_tools)
+    logger.info(
+        "Retrieved total of %d tools (%d from built-in toolgroups, %d from MCP servers)",
+        len(consolidated_tools),
+        len([t for t in consolidated_tools if t.get("server_source") == "builtin"]),
+        len([t for t in consolidated_tools if t.get("server_source") != "builtin"]),
+    )
 
-        return ToolsResponse(tools=formatted_tools)
+    # Format tools with structured description parsing
+    formatted_tools = format_tools_list(consolidated_tools)
 
-    # Connection to Llama Stack server
-    except APIConnectionError as e:
-        logger.error("Unable to connect to Llama Stack: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to connect to Llama Stack",
-                "cause": str(e),
-            },
-        ) from e
-    # Any other exception that can occur during tool listing
-    except Exception as e:
-        logger.error("Unable to retrieve list of tools: %s", e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": "Unable to retrieve list of tools",
-                "cause": str(e),
-            },
-        ) from e
+    return ToolsResponse(tools=formatted_tools)
diff --git a/src/app/main.py b/src/app/main.py
index cbf45e81..a82be2b7 100644
--- a/src/app/main.py
+++ b/src/app/main.py
@@ -4,8 +4,9 @@
 from contextlib import asynccontextmanager
 from typing import AsyncIterator, Awaitable, Callable
 
-from fastapi import FastAPI, Request, Response
+from fastapi import FastAPI, HTTPException, Request, Response
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
 from starlette.routing import Mount, Route, WebSocketRoute
 
 import metrics
@@ -15,6 +16,7 @@
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
+from models.responses import InternalServerErrorResponse
 from utils.common import register_mcp_servers_async
 from utils.llama_stack_version import check_llama_stack_version
 
@@ -108,6 +110,25 @@ async def rest_api_metrics(
     return response
 
 
+@app.middleware("http")
+async def global_exception_middleware(
+    request: Request, call_next: Callable[[Request], Awaitable[Response]]
+) -> Response:
+    """Middleware to handle uncaught exceptions from all endpoints."""
+    try:
+        response = await call_next(request)
+        return response
+    except HTTPException:
+        raise
+    except Exception as exc:  # pylint: disable=broad-exception-caught
+        logger.exception("Uncaught exception in endpoint: %s", exc)
+        error_response = InternalServerErrorResponse.generic()
+        return JSONResponse(
+            status_code=error_response.status_code,
+            content={"detail": error_response.detail.model_dump()},
+        )
+
+
 logger.info("Including routers")
 routers.include_routers(app)
 
diff --git a/src/utils/endpoints.py b/src/utils/endpoints.py
index cc926889..a737f23a 100644
--- a/src/utils/endpoints.py
+++ b/src/utils/endpoints.py
@@ -3,25 +3,29 @@
 from contextlib import suppress
 from datetime import UTC, datetime
 from typing import Any
-from fastapi import HTTPException, status
+
+from fastapi import HTTPException
 from llama_stack_client._client import AsyncLlamaStackClient
 from llama_stack_client.lib.agents.agent import AsyncAgent
 from pydantic import AnyUrl, ValidationError
 
 import constants
+from app.database import get_session
+from configuration import AppConfig, LogicError
+from log import get_logger
 from models.cache_entry import CacheEntry
-from models.requests import QueryRequest
-from models.responses import ReferencedDocument
-from models.database.conversations import UserConversation
 from models.config import Action
-from app.database import get_session
-from configuration import AppConfig
+from models.database.conversations import UserConversation
+from models.requests import QueryRequest
+from models.responses import (
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    NotFoundResponse,
+    ReferencedDocument,
+    UnprocessableEntityResponse,
+)
 from utils.suid import get_suid
-from utils.types import TurnSummary
-from utils.types import GraniteToolParser
-
-
-from log import get_logger
+from utils.types import GraniteToolParser, TurnSummary
 
 logger = get_logger(__name__)
 
@@ -111,17 +115,19 @@ def can_access_conversation(
 
 def check_configuration_loaded(config: AppConfig) -> None:
     """
-    Ensure the application configuration object is present.
+    Raise an error if the configuration is not loaded.
+
+    Args:
+        config (AppConfig): The application configuration.
 
     Raises:
-        HTTPException: HTTP 500 Internal Server Error with detail `{"response":
-        "Configuration is not loaded"}` when `config` is None.
+        HTTPException: If configuration is missing.
     """
-    if config is None:
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={"response": "Configuration is not loaded"},
-        )
+    try:
+        _ = config.configuration
+    except LogicError as e:
+        response = InternalServerErrorResponse.configuration_not_loaded()
+        raise HTTPException(**response.model_dump()) from e
 
 
 def get_system_prompt(query_request: QueryRequest, config: AppConfig) -> str:
@@ -155,16 +161,15 @@ def get_system_prompt(query_request: QueryRequest, config: AppConfig) -> str:
         and config.customization.disable_query_system_prompt
     )
     if system_prompt_disabled and query_request.system_prompt:
-        raise HTTPException(
-            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-            detail={
-                "response": (
-                    "This instance does not support customizing the system prompt in the "
-                    "query request (disable_query_system_prompt is set). Please remove the "
-                    "system_prompt field from your request."
-                )
-            },
+        response = UnprocessableEntityResponse(
+            response="System prompt customization is disabled",
+            cause=(
+                "This instance does not support customizing the system prompt in the "
+                "query request (disable_query_system_prompt is set). Please remove the "
+                "system_prompt field from your request."
+            ),
         )
+        raise HTTPException(**response.model_dump())
 
     if query_request.system_prompt:
         # Query taking precedence over configuration is the only behavior that
@@ -216,16 +221,8 @@ def validate_model_provider_override(
     if (query_request.model is not None or query_request.provider is not None) and (
         Action.MODEL_OVERRIDE not in authorized_actions
     ):
-        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
-            detail={
-                "response": (
-                    "This instance does not permit overriding model/provider in the query request "
-                    "(missing permission: MODEL_OVERRIDE). Please remove the model and provider "
-                    "fields from your request."
-                )
-            },
-        )
+        response = ForbiddenResponse.model_override()
+        raise HTTPException(**response.model_dump())
 
 
 # # pylint: disable=R0913,R0917
@@ -332,13 +329,10 @@ async def get_agent(
             session_id = str(sessions_response.data[0]["session_id"])
         except IndexError as e:
             logger.error("No sessions found for conversation %s", conversation_id)
-            raise HTTPException(
-                status_code=status.HTTP_404_NOT_FOUND,
-                detail={
-                    "response": "Conversation not found",
-                    "cause": f"Conversation {conversation_id} could not be retrieved.",
-                },
-            ) from e
+            response = NotFoundResponse(
+                resource="conversation", resource_id=conversation_id
+            )
+            raise HTTPException(**response.model_dump()) from e
     else:
         conversation_id = agent.agent_id
         logger.debug("New conversation ID: %s", conversation_id)
diff --git a/tests/e2e/features/authorized_noop_token.feature b/tests/e2e/features/authorized_noop_token.feature
index 1169c609..b654a77d 100644
--- a/tests/e2e/features/authorized_noop_token.feature
+++ b/tests/e2e/features/authorized_noop_token.feature
@@ -11,10 +11,15 @@ Feature: Authorized endpoint API tests for the noop-with-token authentication mo
      """
      {"placeholder":"abc"}
      """
-     Then The status code of the response is 400
+     Then The status code of the response is 401
       And The body of the response is the following
           """
-            {"detail": "No Authorization header found"}
+          {
+            "detail": {
+                        "response": "Missing or invalid credentials provided by client",
+                        "cause": "No Authorization header found"
+                    }
+          }
           """
 
   Scenario: Check if the authorized endpoint works when user_id is not provided 
@@ -50,18 +55,28 @@ Feature: Authorized endpoint API tests for the noop-with-token authentication mo
    Scenario: Check if the authorized endpoint works with proper user_id but bearer token is not present
     Given The system is in default state
      When I access endpoint "authorized" using HTTP POST method with user_id "test_user"
-     Then The status code of the response is 400
+     Then The status code of the response is 401
       And The body of the response is the following
           """
-            {"detail": "No Authorization header found"}
+            {
+              "detail": {
+                "response": "Missing or invalid credentials provided by client",
+                "cause": "No Authorization header found"
+              }
+            }
           """
 
   Scenario: Check if the authorized endpoint works when auth token is malformed
     Given The system is in default state
     And I set the Authorization header to BearereyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
      When I access endpoint "authorized" using HTTP POST method with user_id "test_user"
-     Then The status code of the response is 400
+     Then The status code of the response is 401
       And The body of the response is the following
           """
-            {"detail": "No token found in Authorization header"}
+            {
+              "detail": {
+                "response": "Missing or invalid credentials provided by client",
+                "cause": "No token found in Authorization header"
+              }
+            }
           """
\ No newline at end of file
diff --git a/tests/e2e/features/conversations.feature b/tests/e2e/features/conversations.feature
index 44633cbb..f847d671 100644
--- a/tests/e2e/features/conversations.feature
+++ b/tests/e2e/features/conversations.feature
@@ -34,11 +34,14 @@ Feature: conversations endpoint API tests
     And I store conversation details
     And I remove the auth header
      When I access REST API endpoint "conversations" using HTTP GET method
-     Then The status code of the response is 400
+     Then The status code of the response is 401
      And The body of the response is the following
         """
         {
-            "detail": "No Authorization header found"            
+              "detail": {
+                  "response": "Missing or invalid credentials provided by client",
+                  "cause": "No Authorization header found"
+                }     
         }
         """
 
@@ -100,11 +103,14 @@ Feature: conversations endpoint API tests
     And I store conversation details
     And I remove the auth header
      When I use REST API conversation endpoint with conversation_id from above using HTTP GET method
-     Then The status code of the response is 400
+     Then The status code of the response is 401
      And The body of the response is the following
       """
       {
-          "detail": "No Authorization header found"            
+          "detail": {
+              "response": "Missing or invalid credentials provided by client",
+              "cause": "No Authorization header found"
+            }           
       }
       """
 
@@ -113,6 +119,15 @@ Feature: conversations endpoint API tests
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
      When I use REST API conversation endpoint with conversation_id "abcdef" using HTTP GET method
      Then The status code of the response is 400
+     And The body of the response is the following
+     """
+     {
+        "detail": {
+          "response": "Invalid conversation ID format",
+          "cause": "The conversation ID abcdef has invalid format."
+        }
+     }
+     """
 
   Scenario: Check if conversations/{conversation_id} GET endpoint fails when llama-stack is unavailable
     Given The system is in default state
@@ -145,19 +160,22 @@ Feature: conversations endpoint API tests
       {"success": true, "response": "Conversation deleted successfully"}
       """
      And I use REST API conversation endpoint with conversation_id from above using HTTP GET method
-     And The status code of the response is 404
+     Then The status code of the response is 404
+     And The body of the response contains Conversation not found
 
   Scenario: Check if conversations/{conversation_id} DELETE endpoint fails when conversation_id is malformed
     Given The system is in default state
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
      When I use REST API conversation endpoint with conversation_id "abcdef" using HTTP DELETE method
      Then The status code of the response is 400
+     And The body of the response contains Invalid conversation ID format
 
   Scenario: Check if conversations DELETE endpoint fails when the conversation does not exist
     Given The system is in default state
     And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
      When I use REST API conversation endpoint with conversation_id "12345678-abcd-0000-0123-456789abcdef" using HTTP DELETE method
      Then The status code of the response is 404
+     And The body of the response contains Conversation not found
 
   Scenario: Check if conversations/{conversation_id} DELETE endpoint fails when llama-stack is unavailable
     Given The system is in default state
diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py
index 45f22cbb..ec995c95 100644
--- a/tests/e2e/features/environment.py
+++ b/tests/e2e/features/environment.py
@@ -7,18 +7,19 @@
 4. after_scenario
 """
 
-import requests
+import os
 import subprocess
 import time
-import os
-from behave.model import Scenario, Feature
+
+import requests
+from behave.model import Feature, Scenario
 from behave.runner import Context
 
 from tests.e2e.utils.utils import (
-    switch_config,
-    restart_container,
-    remove_config_backup,
     create_config_backup,
+    remove_config_backup,
+    restart_container,
+    switch_config,
 )
 
 
diff --git a/tests/e2e/features/feedback.feature b/tests/e2e/features/feedback.feature
index 18a99714..abe26f4a 100644
--- a/tests/e2e/features/feedback.feature
+++ b/tests/e2e/features/feedback.feature
@@ -108,8 +108,11 @@ Feature: feedback endpoint API tests
      And The body of the response is the following
         """
         {
-            "detail": "Forbidden: User is not authorized to access this resource"
-        }   
+            "detail": {
+                "response": "Feedback is disabled",
+                "cause": "Storing feedback is disabled."
+            }
+        }  
         """
 
   Scenario: Check if feedback endpoint fails when required fields are not specified
@@ -256,13 +259,13 @@ Feature: feedback endpoint API tests
             "user_question": "Sample Question"
         }
         """
-     Then The status code of the response is 400
+     Then The status code of the response is 401
      And The body of the response is the following
         """
         {
             "detail": {
-                        "cause": "Missing or invalid credentials provided by client",
-                        "response": "Unauthorized"
+                        "response": "Missing or invalid credentials provided by client",
+                        "cause": "No Authorization header found"
             }
         }
         """
@@ -271,13 +274,13 @@ Feature: feedback endpoint API tests
     Given The system is in default state
     And I remove the auth header
      When The feedback is enabled
-     Then The status code of the response is 400
+     Then The status code of the response is 401
      And The body of the response is the following
         """
         {
             "detail": {
-                        "cause": "Missing or invalid credentials provided by client",
-                        "response": "Unauthorized"
+                        "response": "Missing or invalid credentials provided by client",
+                        "cause": "No Authorization header found"
             }
         }
         """
@@ -302,8 +305,8 @@ Feature: feedback endpoint API tests
         """
         {
             "detail": {
-                        "response": "Error storing user feedback", 
-                        "cause": "[Errno 13] Permission denied: '/invalid'"
+                        "response": "Failed to store feedback",
+                        "cause": "Failed to store feedback at directory: /invalid"
                     }
         }
         """
diff --git a/tests/e2e/features/info.feature b/tests/e2e/features/info.feature
index 3d266a09..241907e0 100644
--- a/tests/e2e/features/info.feature
+++ b/tests/e2e/features/info.feature
@@ -22,7 +22,7 @@ Feature: Info tests
     Given The system is in default state
     And  The llama-stack connection is disrupted
      When I access REST API endpoint "info" using HTTP GET method
-     Then The status code of the response is 500
+     Then The status code of the response is 503
       And The body of the response is the following
       """
          {"detail": {"response": "Unable to connect to Llama Stack", "cause": "Connection error."}}
@@ -39,7 +39,7 @@ Feature: Info tests
     Given The system is in default state
     And  The llama-stack connection is disrupted
      When I access REST API endpoint "models" using HTTP GET method
-     Then The status code of the response is 500
+     Then The status code of the response is 503
       And The body of the response is the following
       """
          {"detail": {"response": "Unable to connect to Llama Stack", "cause": "Connection error."}}
@@ -56,7 +56,7 @@ Feature: Info tests
     Given The system is in default state
     And  The llama-stack connection is disrupted
      When I access REST API endpoint "shields" using HTTP GET method
-     Then The status code of the response is 500
+     Then The status code of the response is 503
       And The body of the response is the following
       """
          {"detail": {"response": "Unable to connect to Llama Stack", "cause": "Connection error."}}
@@ -112,7 +112,7 @@ Feature: Info tests
     Given The system is in default state
     And  The llama-stack connection is disrupted
      When I access REST API endpoint "tools" using HTTP GET method
-     Then The status code of the response is 500
+     Then The status code of the response is 503
       And The body of the response is the following
       """
          {"detail": {"response": "Unable to connect to Llama Stack", "cause": "Connection error."}}
diff --git a/tests/e2e/features/query.feature b/tests/e2e/features/query.feature
index 585546fb..17171496 100644
--- a/tests/e2e/features/query.feature
+++ b/tests/e2e/features/query.feature
@@ -53,11 +53,16 @@ Feature: Query endpoint API tests
      """
      {"query": "Write a simple code for reversing string"}
      """
-      Then The status code of the response is 400
+      Then The status code of the response is 401
       And The body of the response is the following
-          """
-          {"detail": "No Authorization header found"}
-          """
+      """
+      {
+        "detail": {
+          "response": "Missing or invalid credentials provided by client",
+          "cause": "No Authorization header found"
+        }
+      }
+      """
 
   Scenario: Check if LLM responds to sent question with error when attempting to access conversation
     Given The system is in default state
@@ -110,7 +115,7 @@ Scenario: Check if LLM responds for query request with error for missing query
     """
     {"query": "Say hello"}
     """
-     Then The status code of the response is 500
+     Then The status code of the response is 503
       And The body of the response contains Unable to connect to Llama Stack
 
   Scenario: Check if LLM responds properly when XML and JSON attachments are sent
diff --git a/tests/e2e/features/streaming_query.feature b/tests/e2e/features/streaming_query.feature
index 27196ccb..ca69aa7a 100644
--- a/tests/e2e/features/streaming_query.feature
+++ b/tests/e2e/features/streaming_query.feature
@@ -126,8 +126,13 @@ Feature: streaming_query endpoint API tests
      """
      {"query": "Say hello"}
      """
-      Then The status code of the response is 400
+      Then The status code of the response is 401
       And The body of the response is the following
           """
-          {"detail": "No Authorization header found"}
+          {
+            "detail": {
+              "response": "Missing or invalid credentials provided by client",
+              "cause": "No Authorization header found"
+            }
+          }
           """
diff --git a/tests/integration/endpoints/test_config_integration.py b/tests/integration/endpoints/test_config_integration.py
index 2df47cdf..c85af079 100644
--- a/tests/integration/endpoints/test_config_integration.py
+++ b/tests/integration/endpoints/test_config_integration.py
@@ -1,13 +1,11 @@
 """Integration tests for the /config endpoint."""
 
 import pytest
+from fastapi import HTTPException, Request, status
 
-from fastapi import Request
-
-from authentication.interface import AuthTuple
-
-from configuration import AppConfig, LogicError
 from app.endpoints.config import config_endpoint_handler
+from authentication.interface import AuthTuple
+from configuration import AppConfig
 
 
 @pytest.mark.asyncio
@@ -32,7 +30,7 @@ async def test_config_endpoint_returns_config(
     response = await config_endpoint_handler(auth=test_auth, request=test_request)
 
     # Verify that response matches the real configuration
-    assert response == test_config.configuration
+    assert response.configuration == test_config.configuration
 
 
 @pytest.mark.asyncio
@@ -57,7 +55,7 @@ async def test_config_endpoint_returns_current_config(
     response = await config_endpoint_handler(auth=test_auth, request=test_request)
 
     # Verify that response matches the root configuration
-    assert response == current_config.configuration
+    assert response.configuration == current_config.configuration
 
 
 @pytest.mark.asyncio
@@ -68,7 +66,7 @@ async def test_config_endpoint_fails_without_configuration(
     """Test that authorization fails when configuration is not loaded.
 
     This integration test verifies:
-    - LogicError is raised when configuration is not loaded
+    - HTTPException is raised when configuration is not loaded
     - Error message indicates configuration is not loaded
 
     Args:
@@ -76,9 +74,13 @@ async def test_config_endpoint_fails_without_configuration(
         test_auth: noop authentication tuple
     """
 
-    # Verify that LogicError is raised when authorization tries to access config
-    with pytest.raises(LogicError) as exc_info:
+    # Verify that HTTPException is raised when configuration is not loaded
+    with pytest.raises(HTTPException) as exc_info:
         await config_endpoint_handler(auth=test_auth, request=test_request)
 
-    # Verify error message
-    assert "configuration is not loaded" in str(exc_info.value)
+    # Verify error details
+    assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+    assert isinstance(exc_info.value.detail, dict)
+    assert (
+        "configuration is not loaded" in exc_info.value.detail["response"].lower()
+    )  # type: ignore
diff --git a/tests/integration/endpoints/test_health_integration.py b/tests/integration/endpoints/test_health_integration.py
index 10fd9b2a..cc138544 100644
--- a/tests/integration/endpoints/test_health_integration.py
+++ b/tests/integration/endpoints/test_health_integration.py
@@ -5,7 +5,7 @@
 from pytest_mock import MockerFixture, AsyncMockType
 from llama_stack.providers.datatypes import HealthStatus
 
-from fastapi import Response, status
+from fastapi import Response
 from authentication.interface import AuthTuple
 
 from configuration import AppConfig
@@ -126,32 +126,21 @@ async def test_health_readiness_client_error(
     """Test that readiness probe endpoint handles uninitialized client gracefully.
 
     This integration test verifies:
-    - Endpoint handles missing client initialization gracefully
-    - Error is caught and returned as proper health status
-    - Service returns 503 status code for unhealthy state
-    - Error message includes details about initialization failure
+    - RuntimeError from uninitialized client is NOT caught by the endpoint
+    - Error propagates from the endpoint handler (desired behavior)
+    - The endpoint does not catch RuntimeError, only APIConnectionError
 
     Args:
         test_response: FastAPI response object
         test_auth: noop authentication tuple
     """
-    result = await readiness_probe_get_method(auth=test_auth, response=test_response)
 
-    # Verify HTTP status code is 503 (Service Unavailable)
-    assert test_response.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
-
-    # Verify that service returns error response when client not initialized
-    assert result.ready is False
-    assert "Providers not healthy" in result.reason
-    assert "unknown" in result.reason
-
-    # Verify the response includes provider error details
-    assert len(result.providers) == 1
-    assert result.providers[0].provider_id == "unknown"
-    assert result.providers[0].status == "Error"
-    assert (
-        "AsyncLlamaStackClient has not been initialised" in result.providers[0].message
-    )
+    # Verify that RuntimeError propagates from the endpoint (not caught)
+    with pytest.raises(RuntimeError) as exc_info:
+        await readiness_probe_get_method(auth=test_auth, response=test_response)
+
+    assert "AsyncLlamaStackClient has not been initialised" in str(exc_info.value)
+    assert "Ensure 'load(..)' has been called" in str(exc_info.value)
 
 
 @pytest.mark.asyncio
diff --git a/tests/integration/endpoints/test_info_integration.py b/tests/integration/endpoints/test_info_integration.py
index c1a3ea4d..690227b2 100644
--- a/tests/integration/endpoints/test_info_integration.py
+++ b/tests/integration/endpoints/test_info_integration.py
@@ -106,9 +106,9 @@ async def test_info_endpoint_handles_connection_error(
         await info_endpoint_handler(auth=test_auth, request=test_request)
 
     # Verify error details
-    assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+    assert exc_info.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
     assert isinstance(exc_info.value.detail, dict)
-    assert exc_info.value.detail["response"] == "Unable to connect to Llama Stack"
+    assert exc_info.value.detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
     assert "cause" in exc_info.value.detail
 
 
diff --git a/tests/integration/test_middleware_integration.py b/tests/integration/test_middleware_integration.py
new file mode 100644
index 00000000..80dde1c5
--- /dev/null
+++ b/tests/integration/test_middleware_integration.py
@@ -0,0 +1,34 @@
+"""Integration tests for the global exception middleware."""
+
+from fastapi import Request, status
+from fastapi.testclient import TestClient
+
+from configuration import configuration
+from models.responses import InternalServerErrorResponse
+
+
+class TestGlobalExceptionMiddlewareIntegration:  # pylint: disable=too-few-public-methods
+    """Integration test suite for global exception middleware."""
+
+    def test_middleware_catches_unexpected_exception_in_endpoint(self) -> None:
+        """Test that middleware catches unexpected exceptions from endpoints."""
+        configuration_filename = "tests/configuration/lightspeed-stack-proper-name.yaml"
+        cfg = configuration
+        cfg.load_configuration(configuration_filename)
+        from app.main import app  # pylint: disable=C0415
+
+        @app.get("/test-middleware-exception", include_in_schema=False)
+        async def _(request: Request) -> dict[str, str]:
+            """Test endpoint that raises an unexpected exception for middleware testing."""
+            raise ValueError("Unexpected error in endpoint for testing middleware")
+
+        client = TestClient(app)
+        response = client.get("/test-middleware-exception")
+        assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        response_data = response.json()
+        assert "detail" in response_data
+        detail = response_data["detail"]
+        expected_response = InternalServerErrorResponse.generic()
+        expected_detail = expected_response.model_dump()["detail"]
+        assert detail["response"] == expected_detail["response"]
+        assert detail["cause"] == expected_detail["cause"]
diff --git a/tests/integration/test_openapi_json.py b/tests/integration/test_openapi_json.py
index 8fe1c689..3fa83fb2 100644
--- a/tests/integration/test_openapi_json.py
+++ b/tests/integration/test_openapi_json.py
@@ -1,13 +1,13 @@
 """Tests the OpenAPI specification that is to be stored in docs/openapi.json."""
 
-from typing import Any
 import json
 from pathlib import Path
+from typing import Any
 
 import pytest
 import requests
-
 from fastapi.testclient import TestClient
+
 from configuration import configuration
 
 # Strategy:
@@ -123,49 +123,63 @@ def test_servers_section_present_from_url(spec_from_url: dict[str, Any]) -> None
     "path,method,expected_codes",
     [
         ("/", "get", {"200"}),
-        ("/v1/info", "get", {"200", "500"}),
-        ("/v1/models", "get", {"200", "500"}),
-        ("/v1/tools", "get", {"200", "500"}),
-        ("/v1/shields", "get", {"200", "500"}),
-        ("/v1/providers", "get", {"200", "500"}),
-        ("/v1/providers/{provider_id}", "get", {"200", "404", "422", "500"}),
-        ("/v1/query", "post", {"200", "400", "403", "500", "422"}),
-        ("/v1/streaming_query", "post", {"200", "400", "401", "403", "422", "500"}),
-        ("/v1/config", "get", {"200", "503"}),
-        ("/v1/feedback", "post", {"200", "401", "403", "500", "422"}),
+        ("/v1/info", "get", {"200", "401", "403", "503"}),
+        ("/v1/models", "get", {"200", "401", "403", "500", "503"}),
+        ("/v1/tools", "get", {"200", "401", "403", "500", "503"}),
+        ("/v1/shields", "get", {"200", "401", "403", "500", "503"}),
+        ("/v1/providers", "get", {"200", "401", "403", "500", "503"}),
+        (
+            "/v1/providers/{provider_id}",
+            "get",
+            {"200", "401", "403", "404", "500", "503"},
+        ),
+        ("/v1/rags", "get", {"200", "401", "403", "500", "503"}),
+        (
+            "/v1/rags/{rag_id}",
+            "get",
+            {"200", "401", "403", "404", "500", "503"},
+        ),
+        ("/v1/query", "post", {"200", "401", "403", "404", "422", "429", "500", "503"}),
+        (
+            "/v1/streaming_query",
+            "post",
+            {"200", "401", "403", "404", "422", "429", "500", "503"},
+        ),
+        ("/v1/config", "get", {"200", "401", "403", "500"}),
+        ("/v1/feedback", "post", {"200", "401", "403", "404", "500"}),
         ("/v1/feedback/status", "get", {"200"}),
-        ("/v1/feedback/status", "put", {"200", "422"}),
-        ("/v1/conversations", "get", {"200", "401", "503"}),
+        ("/v1/feedback/status", "put", {"200", "401", "403", "500"}),
+        ("/v1/conversations", "get", {"200", "401", "403", "500", "503"}),
         (
             "/v1/conversations/{conversation_id}",
             "get",
-            {"200", "400", "401", "404", "503", "422"},
+            {"200", "400", "401", "403", "404", "500", "503"},
         ),
         (
             "/v1/conversations/{conversation_id}",
             "delete",
-            {"200", "400", "401", "404", "503", "422"},
+            {"200", "400", "401", "403", "404", "500", "503"},
         ),
-        ("/v2/conversations", "get", {"200"}),
+        ("/v2/conversations", "get", {"200", "401", "403", "500"}),
         (
             "/v2/conversations/{conversation_id}",
             "get",
-            {"200", "400", "401", "404", "422"},
+            {"200", "400", "401", "403", "404", "500"},
         ),
         (
             "/v2/conversations/{conversation_id}",
             "delete",
-            {"200", "400", "401", "404", "422"},
+            {"200", "400", "401", "403", "404", "500"},
         ),
         (
             "/v2/conversations/{conversation_id}",
             "put",
-            {"200", "400", "401", "404", "422"},
+            {"200", "400", "401", "403", "404", "500"},
         ),
-        ("/readiness", "get", {"200", "503"}),
-        ("/liveness", "get", {"200"}),
-        ("/authorized", "post", {"200", "400", "401", "403"}),
-        ("/metrics", "get", {"200"}),
+        ("/readiness", "get", {"200", "401", "403", "503"}),
+        ("/liveness", "get", {"200", "401", "403"}),
+        ("/authorized", "post", {"200", "401", "403"}),
+        ("/metrics", "get", {"200", "401", "403", "500", "503"}),
     ],
 )
 def test_paths_and_responses_exist_from_file(
@@ -178,50 +192,64 @@ def test_paths_and_responses_exist_from_file(
 @pytest.mark.parametrize(
     "path,method,expected_codes",
     [
-        ("/", "get", {"200"}),
-        ("/v1/info", "get", {"200", "500"}),
-        ("/v1/models", "get", {"200", "500"}),
-        ("/v1/tools", "get", {"200", "500"}),
-        ("/v1/shields", "get", {"200", "500"}),
-        ("/v1/providers", "get", {"200", "500"}),
-        ("/v1/providers/{provider_id}", "get", {"200", "404", "422", "500"}),
-        ("/v1/query", "post", {"200", "400", "403", "500", "422"}),
-        ("/v1/streaming_query", "post", {"200", "400", "401", "403", "422", "500"}),
-        ("/v1/config", "get", {"200", "503"}),
-        ("/v1/feedback", "post", {"200", "401", "403", "500", "422"}),
+        ("/", "get", {"200", "401", "403"}),
+        ("/v1/info", "get", {"200", "401", "403", "503"}),
+        ("/v1/models", "get", {"200", "401", "403", "500", "503"}),
+        ("/v1/tools", "get", {"200", "401", "403", "500", "503"}),
+        ("/v1/shields", "get", {"200", "401", "403", "500", "503"}),
+        ("/v1/providers", "get", {"200", "401", "403", "500", "503"}),
+        (
+            "/v1/providers/{provider_id}",
+            "get",
+            {"200", "401", "403", "404", "500", "503"},
+        ),
+        ("/v1/rags", "get", {"200", "401", "403", "500", "503"}),
+        (
+            "/v1/rags/{rag_id}",
+            "get",
+            {"200", "401", "403", "404", "500", "503"},
+        ),
+        ("/v1/query", "post", {"200", "401", "403", "404", "422", "429", "500", "503"}),
+        (
+            "/v1/streaming_query",
+            "post",
+            {"200", "401", "403", "404", "422", "429", "500", "503"},
+        ),
+        ("/v1/config", "get", {"200", "401", "403", "500"}),
+        ("/v1/feedback", "post", {"200", "401", "403", "404", "500"}),
         ("/v1/feedback/status", "get", {"200"}),
-        ("/v1/feedback/status", "put", {"200", "422"}),
-        ("/v1/conversations", "get", {"200", "401", "503"}),
+        ("/v1/feedback/status", "put", {"200", "401", "403", "500"}),
+        ("/v1/conversations", "get", {"200", "401", "403", "500", "503"}),
         (
             "/v1/conversations/{conversation_id}",
             "get",
-            {"200", "400", "401", "404", "503", "422"},
+            {"200", "400", "401", "403", "404", "500", "503"},
         ),
         (
             "/v1/conversations/{conversation_id}",
             "delete",
-            {"200", "400", "401", "404", "503", "422"},
+            {"200", "400", "401", "403", "404", "500", "503"},
         ),
-        ("/v2/conversations", "get", {"200"}),
+        ("/v2/conversations", "get", {"200", "401", "403", "500"}),
         (
             "/v2/conversations/{conversation_id}",
             "get",
-            {"200", "400", "401", "404", "422"},
+            {"200", "400", "401", "403", "404", "500"},
         ),
         (
             "/v2/conversations/{conversation_id}",
             "delete",
-            {"200", "400", "401", "404", "422"},
+            {"200", "400", "401", "403", "404", "500"},
         ),
         (
             "/v2/conversations/{conversation_id}",
             "put",
-            {"200", "400", "401", "404", "422"},
+            {"200", "400", "401", "403", "404", "500"},
         ),
-        ("/readiness", "get", {"200", "503"}),
-        ("/liveness", "get", {"200"}),
-        ("/authorized", "post", {"200", "400", "401", "403"}),
-        ("/metrics", "get", {"200"}),
+        ("/readiness", "get", {"200", "401", "403", "503"}),
+        ("/liveness", "get", {"200", "401", "403"}),
+        ("/authorized", "post", {"200", "401", "403"}),
+        ("/metrics", "get", {"200", "401", "403", "500", "503"}),
     ],
 )
 def test_paths_and_responses_exist_from_url(
diff --git a/tests/unit/app/endpoints/test_authorized.py b/tests/unit/app/endpoints/test_authorized.py
index ab92cb32..fa140a3f 100644
--- a/tests/unit/app/endpoints/test_authorized.py
+++ b/tests/unit/app/endpoints/test_authorized.py
@@ -51,16 +51,24 @@ async def test_authorized_dependency_unauthorized() -> None:
     # Test the auth utility function that would be called by auth dependencies
     # This simulates the unauthorized scenario that would prevent the handler from being called
 
-    # Test case 1: No Authorization header (400 error from extract_user_token)
     headers_no_auth = Headers({})
     with pytest.raises(HTTPException) as exc_info:
         extract_user_token(headers_no_auth)
-    assert exc_info.value.status_code == 400
-    assert exc_info.value.detail == "No Authorization header found"
+    assert exc_info.value.status_code == 401
+    assert exc_info.value.detail["response"] == (  # type: ignore
+        "Missing or invalid credentials provided by client"
+    )
+    assert exc_info.value.detail["cause"] == (  # type: ignore
+        "No Authorization header found"
+    )
 
-    # Test case 2: Invalid Authorization header format (400 error from extract_user_token)
     headers_invalid_auth = Headers({"Authorization": "InvalidFormat"})
     with pytest.raises(HTTPException) as exc_info:
         extract_user_token(headers_invalid_auth)
-    assert exc_info.value.status_code == 400
-    assert exc_info.value.detail == "No token found in Authorization header"
+    assert exc_info.value.status_code == 401
+    assert exc_info.value.detail["response"] == (  # type: ignore
+        "Missing or invalid credentials provided by client"
+    )
+    assert exc_info.value.detail["cause"] == (  # type: ignore
+        "No token found in Authorization header"
+    )
diff --git a/tests/unit/app/endpoints/test_config.py b/tests/unit/app/endpoints/test_config.py
index dcaca95e..02224bcc 100644
--- a/tests/unit/app/endpoints/test_config.py
+++ b/tests/unit/app/endpoints/test_config.py
@@ -1,12 +1,11 @@
 """Unit tests for the /config REST API endpoint."""
 
-from typing import Any
 import pytest
+from fastapi import HTTPException, Request, status
 from pytest_mock import MockerFixture
 
-from fastapi import HTTPException, Request, status
-from authentication.interface import AuthTuple
 from app.endpoints.config import config_endpoint_handler
+from authentication.interface import AuthTuple
 from configuration import AppConfig
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
@@ -18,12 +17,8 @@ async def test_config_endpoint_handler_configuration_not_loaded(
     """Test the config endpoint handler when configuration is not loaded."""
     mock_authorization_resolvers(mocker)
 
-    # mock for missing configuration
-    mocker.patch(
-        "app.endpoints.config.configuration._configuration",
-        new=None,
-    )
-    mocker.patch("app.endpoints.config.configuration", None)
+    mock_config = AppConfig()
+    mocker.patch("app.endpoints.config.configuration", mock_config)
 
     # HTTP request mock required by URL endpoint handler
     request = Request(
@@ -43,48 +38,21 @@ async def test_config_endpoint_handler_configuration_not_loaded(
 
     detail = exc_info.value.detail
     assert isinstance(detail, dict)
-    assert detail["response"] == "Configuration is not loaded"
+    assert detail["response"] == "Configuration is not loaded"  # type: ignore
+    assert detail["cause"] == (  # type: ignore
+        "Lightspeed Stack configuration has not been initialized."
+    )
 
 
 @pytest.mark.asyncio
 async def test_config_endpoint_handler_configuration_loaded(
     mocker: MockerFixture,
+    minimal_config: AppConfig,
 ) -> None:
     """Test the config endpoint handler when configuration is loaded."""
     mock_authorization_resolvers(mocker)
 
-    # configuration to be loaded
-    config_dict: dict[Any, Any] = {
-        "name": "foo",
-        "service": {
-            "host": "localhost",
-            "port": 8080,
-            "auth_enabled": False,
-            "workers": 1,
-            "color_log": True,
-            "access_log": True,
-        },
-        "llama_stack": {
-            "api_key": "xyzzy",
-            "url": "http://x.y.com:1234",
-            "use_as_library_client": False,
-        },
-        "user_data_collection": {
-            "feedback_enabled": False,
-        },
-        "authentication": {
-            "module": "noop",
-        },
-        "authorization": {"access_rules": []},
-        "customization": None,
-    }
-
-    # load the configuration
-    cfg = AppConfig()
-    cfg.init_from_dict(config_dict)
-
-    # Mock configuration
-    mocker.patch("app.endpoints.config.configuration", cfg)
+    mocker.patch("app.endpoints.config.configuration", minimal_config)
 
     # HTTP request mock required by URL endpoint handler
     request = Request(
@@ -100,4 +68,4 @@ async def test_config_endpoint_handler_configuration_loaded(
         auth=auth, request=request  # pyright:ignore[reportArgumentType]
     )
     assert response is not None
-    assert response == cfg.configuration
+    assert response.configuration == minimal_config.configuration
diff --git a/tests/unit/app/endpoints/test_conversations.py b/tests/unit/app/endpoints/test_conversations.py
index 6b92bcc7..fcb0cb54 100644
--- a/tests/unit/app/endpoints/test_conversations.py
+++ b/tests/unit/app/endpoints/test_conversations.py
@@ -4,25 +4,27 @@
 """Unit tests for the /conversations REST API endpoints."""
 
 from typing import Any, Optional
-from fastapi import HTTPException, status, Request
+
 import pytest
-from pytest_mock import MockerFixture, MockType
+from fastapi import HTTPException, Request, status
 from llama_stack_client import APIConnectionError, NotFoundError
+from pytest_mock import MockerFixture, MockType
+from sqlalchemy.exc import SQLAlchemyError
 
 from app.endpoints.conversations import (
-    get_conversation_endpoint_handler,
     delete_conversation_endpoint_handler,
+    get_conversation_endpoint_handler,
     get_conversations_list_endpoint_handler,
     simplify_session_data,
 )
+from configuration import AppConfig
 from models.config import Action
 from models.database.conversations import UserConversation
 from models.responses import (
-    ConversationResponse,
     ConversationDeleteResponse,
+    ConversationResponse,
     ConversationsListResponse,
 )
-from configuration import AppConfig
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
 MOCK_AUTH = ("mock_user_id", "mock_username", False, "mock_token")
@@ -278,7 +280,8 @@ async def test_configuration_not_loaded(
     ) -> None:
         """Test the endpoint when configuration is not loaded."""
         mock_authorization_resolvers(mocker)
-        mocker.patch("app.endpoints.conversations.configuration", None)
+        mock_config = AppConfig()
+        mocker.patch("app.endpoints.conversations.configuration", mock_config)
 
         with pytest.raises(HTTPException) as exc_info:
             await get_conversation_endpoint_handler(
@@ -291,7 +294,7 @@ async def test_configuration_not_loaded(
 
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Configuration is not loaded" in detail["response"]
+        assert "Configuration is not loaded" in detail["response"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_invalid_conversation_id_format(
@@ -315,8 +318,8 @@ async def test_invalid_conversation_id_format(
 
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Invalid conversation ID format" in detail["response"]
-        assert INVALID_CONVERSATION_ID in detail["cause"]
+        assert "Invalid conversation ID format" in detail["response"]  # type: ignore
+        assert INVALID_CONVERSATION_ID in detail["cause"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_llama_stack_connection_error(
@@ -354,7 +357,7 @@ async def test_llama_stack_connection_error(
 
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Unable to connect to Llama Stack" in detail["response"]
+        assert detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
 
     @pytest.mark.asyncio
     async def test_llama_stack_not_found_error(
@@ -391,9 +394,9 @@ async def test_llama_stack_not_found_error(
 
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Conversation not found" in detail["response"]
-        assert "does not exist" in detail["cause"]
-        assert VALID_CONVERSATION_ID in detail["cause"]
+        assert "Conversation not found" in detail["response"]  # type: ignore
+        assert "does not exist" in detail["cause"]  # type: ignore
+        assert VALID_CONVERSATION_ID in detail["cause"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_session_retrieve_exception(
@@ -402,19 +405,21 @@ async def test_session_retrieve_exception(
         setup_configuration: AppConfig,
         dummy_request: Request,
     ) -> None:
-        """Test the endpoint when session retrieval raises an exception."""
+        """Test the endpoint when session retrieval raises an APIConnectionError."""
         mock_authorization_resolvers(mocker)
         mocker.patch("app.endpoints.conversations.configuration", setup_configuration)
         mocker.patch("app.endpoints.conversations.check_suid", return_value=True)
         mocker.patch("app.endpoints.conversations.can_access_conversation")
         mocker.patch("app.endpoints.conversations.retrieve_conversation")
 
-        # Mock AsyncLlamaStackClientHolder to raise a general exception
-        mock_client = mocker.AsyncMock()
-        mock_client.agents.session.list.side_effect = Exception("Failed to get session")
+        # Mock AsyncLlamaStackClientHolder to raise APIConnectionError
         mock_client_holder = mocker.patch(
             "app.endpoints.conversations.AsyncLlamaStackClientHolder"
         )
+        mock_client = mocker.AsyncMock()
+        mock_client.agents.session.list.side_effect = APIConnectionError(
+            request=mocker.Mock()
+        )
         mock_client_holder.return_value.get_client.return_value = mock_client
 
         with pytest.raises(HTTPException) as exc_info:
@@ -424,12 +429,10 @@ async def test_session_retrieve_exception(
                 auth=MOCK_AUTH,
             )
 
-        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-
+        assert exc_info.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Unknown error" in detail["response"]
-        assert "Unknown error while getting conversation" in detail["cause"]
+        assert detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
 
     @pytest.mark.asyncio
     async def test_get_conversation_forbidden(
@@ -475,11 +478,11 @@ async def test_get_conversation_forbidden(
         assert exc_info.value.status_code == status.HTTP_403_FORBIDDEN
         expected = (
             f"User {MOCK_AUTH[0]} does not have permission "
-            f"to read conversation with ID {VALID_CONVERSATION_ID}."
+            f"to read conversation with ID {VALID_CONVERSATION_ID}"
         )
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert expected in detail["cause"]
+        assert expected in detail["cause"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_get_others_conversations_allowed_for_authorized_user(
@@ -567,6 +570,115 @@ async def test_successful_conversation_retrieval(
             agent_id=VALID_CONVERSATION_ID
         )
 
+    @pytest.mark.asyncio
+    async def test_retrieve_conversation_returns_none(
+        self,
+        mocker: MockerFixture,
+        setup_configuration: AppConfig,
+        dummy_request: Request,
+    ) -> None:
+        """Test when retrieve_conversation returns None."""
+        mock_authorization_resolvers(mocker)
+        mocker.patch("app.endpoints.conversations.configuration", setup_configuration)
+        mocker.patch("app.endpoints.conversations.check_suid", return_value=True)
+        mocker.patch("app.endpoints.conversations.can_access_conversation")
+        mocker.patch(
+            "app.endpoints.conversations.retrieve_conversation", return_value=None
+        )
+
+        with pytest.raises(HTTPException) as exc_info:
+            await get_conversation_endpoint_handler(
+                request=dummy_request,
+                conversation_id=VALID_CONVERSATION_ID,
+                auth=MOCK_AUTH,
+            )
+
+        assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+        detail = exc_info.value.detail
+        assert isinstance(detail, dict)
+        assert "Conversation not found" in detail["response"]  # type: ignore
+
+    @pytest.mark.asyncio
+    async def test_no_sessions_found_in_get_conversation(
+        self,
+        mocker: MockerFixture,
+        setup_configuration: AppConfig,
+        dummy_request: Request,
+        mock_conversation: MockType,
+    ) -> None:
+        """Test when no sessions are found for the conversation."""
+        mock_authorization_resolvers(mocker)
+        mocker.patch("app.endpoints.conversations.configuration", setup_configuration)
+        mocker.patch("app.endpoints.conversations.check_suid", return_value=True)
+        mocker.patch("app.endpoints.conversations.can_access_conversation")
+        mocker.patch(
+            "app.endpoints.conversations.retrieve_conversation",
+            return_value=mock_conversation,
+        )
+
+        # Mock AsyncLlamaStackClientHolder with empty sessions list
+        mock_client = mocker.AsyncMock()
+        mock_client.agents.session.list.return_value = mocker.Mock(data=[])
+        mock_client_holder = mocker.patch(
+            "app.endpoints.conversations.AsyncLlamaStackClientHolder"
+        )
+        mock_client_holder.return_value.get_client.return_value = mock_client
+
+        with pytest.raises(HTTPException) as exc_info:
+            await get_conversation_endpoint_handler(
+                request=dummy_request,
+                conversation_id=VALID_CONVERSATION_ID,
+                auth=MOCK_AUTH,
+            )
+
+        assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+        detail = exc_info.value.detail
+        assert isinstance(detail, dict)
+        assert "Conversation not found" in detail["response"]  # type: ignore
+
+    @pytest.mark.asyncio
+    async def test_sqlalchemy_error_in_get_conversation(
+        self,
+        mocker: MockerFixture,
+        setup_configuration: AppConfig,
+        dummy_request: Request,
+        mock_conversation: MockType,
+    ) -> None:
+        """Test when SQLAlchemyError is raised during conversation retrieval."""
+        mock_authorization_resolvers(mocker)
+        mocker.patch("app.endpoints.conversations.configuration", setup_configuration)
+        mocker.patch("app.endpoints.conversations.check_suid", return_value=True)
+        mocker.patch("app.endpoints.conversations.can_access_conversation")
+        mocker.patch(
+            "app.endpoints.conversations.retrieve_conversation",
+            return_value=mock_conversation,
+        )
+
+        # Mock AsyncLlamaStackClientHolder - SQLAlchemyError should come from session.retrieve
+        mock_client = mocker.AsyncMock()
+        mock_session_list_response = mocker.Mock()
+        mock_session_list_response.data = [{"session_id": VALID_CONVERSATION_ID}]
+        mock_client.agents.session.list.return_value = mock_session_list_response
+        mock_client.agents.session.retrieve.side_effect = SQLAlchemyError(
+            "Database error"
+        )
+        mock_client_holder = mocker.patch(
+            "app.endpoints.conversations.AsyncLlamaStackClientHolder"
+        )
+        mock_client_holder.return_value.get_client.return_value = mock_client
+
+        with pytest.raises(HTTPException) as exc_info:
+            await get_conversation_endpoint_handler(
+                request=dummy_request,
+                conversation_id=VALID_CONVERSATION_ID,
+                auth=MOCK_AUTH,
+            )
+
+        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        detail = exc_info.value.detail
+        assert isinstance(detail, dict)
+        assert "Database" in detail["response"]  # type: ignore
+
 
 class TestDeleteConversationEndpoint:
     """Test cases for the DELETE /conversations/{conversation_id} endpoint."""
@@ -577,7 +689,8 @@ async def test_configuration_not_loaded(
     ) -> None:
         """Test the endpoint when configuration is not loaded."""
         mock_authorization_resolvers(mocker)
-        mocker.patch("app.endpoints.conversations.configuration", None)
+        mock_config = AppConfig()
+        mocker.patch("app.endpoints.conversations.configuration", mock_config)
 
         with pytest.raises(HTTPException) as exc_info:
             await delete_conversation_endpoint_handler(
@@ -590,7 +703,7 @@ async def test_configuration_not_loaded(
 
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Configuration is not loaded" in detail["response"]
+        assert "Configuration is not loaded" in detail["response"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_invalid_conversation_id_format(
@@ -615,8 +728,8 @@ async def test_invalid_conversation_id_format(
 
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Invalid conversation ID format" in detail["response"]
-        assert INVALID_CONVERSATION_ID in detail["cause"]
+        assert "Invalid conversation ID format" in detail["response"]  # type: ignore
+        assert INVALID_CONVERSATION_ID in detail["cause"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_llama_stack_connection_error(
@@ -652,7 +765,7 @@ async def test_llama_stack_connection_error(
         assert exc_info.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Unable to connect to Llama Stack" in detail["response"]
+        assert detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
 
     @pytest.mark.asyncio
     async def test_llama_stack_not_found_error(
@@ -688,9 +801,9 @@ async def test_llama_stack_not_found_error(
         assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Conversation not found" in detail["response"]
-        assert "does not exist" in detail["cause"]
-        assert VALID_CONVERSATION_ID in detail["cause"]
+        assert "Conversation not found" in detail["response"]  # type: ignore
+        assert "does not exist" in detail["cause"]  # type: ignore
+        assert VALID_CONVERSATION_ID in detail["cause"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_session_deletion_exception(
@@ -708,8 +821,8 @@ async def test_session_deletion_exception(
 
         # Mock AsyncLlamaStackClientHolder to raise a general exception
         mock_client = mocker.AsyncMock()
-        mock_client.agents.session.delete.side_effect = Exception(
-            "Session deletion failed"
+        mock_client.agents.session.delete.side_effect = APIConnectionError(
+            request=None  # type: ignore
         )
         mock_client_holder = mocker.patch(
             "app.endpoints.conversations.AsyncLlamaStackClientHolder"
@@ -722,12 +835,10 @@ async def test_session_deletion_exception(
                 conversation_id=VALID_CONVERSATION_ID,
                 auth=MOCK_AUTH,
             )
-
-        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        assert exc_info.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Unknown error" in detail["response"]
-        assert "Unknown error while deleting conversation" in detail["cause"]
+        assert "Unable to connect to Llama Stack" in detail["response"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_delete_conversation_forbidden(
@@ -773,11 +884,11 @@ async def test_delete_conversation_forbidden(
         assert exc_info.value.status_code == status.HTTP_403_FORBIDDEN
         expected = (
             f"User {MOCK_AUTH[0]} does not have permission "
-            f"to delete conversation with ID {VALID_CONVERSATION_ID}."
+            f"to delete conversation with ID {VALID_CONVERSATION_ID}"
         )
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert expected in detail["cause"]
+        assert expected in detail["cause"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_delete_others_conversations_allowed_for_authorized_user(
@@ -867,6 +978,118 @@ async def test_successful_conversation_deletion(
             agent_id=VALID_CONVERSATION_ID, session_id=VALID_CONVERSATION_ID
         )
 
+    @pytest.mark.asyncio
+    async def test_retrieve_conversation_returns_none_in_delete(
+        self,
+        mocker: MockerFixture,
+        setup_configuration: AppConfig,
+        dummy_request: Request,
+    ) -> None:
+        """Test when retrieve_conversation returns None in delete endpoint."""
+        mock_authorization_resolvers(mocker)
+        mocker.patch("app.endpoints.conversations.configuration", setup_configuration)
+        mocker.patch("app.endpoints.conversations.check_suid", return_value=True)
+        mocker.patch("app.endpoints.conversations.can_access_conversation")
+        mocker.patch(
+            "app.endpoints.conversations.retrieve_conversation", return_value=None
+        )
+
+        with pytest.raises(HTTPException) as exc_info:
+            await delete_conversation_endpoint_handler(
+                request=dummy_request,
+                conversation_id=VALID_CONVERSATION_ID,
+                auth=MOCK_AUTH,
+            )
+
+        assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+        detail = exc_info.value.detail
+        assert isinstance(detail, dict)
+        assert "Conversation not found" in detail["response"]  # type: ignore
+
+    @pytest.mark.asyncio
+    async def test_no_sessions_found_in_delete(
+        self,
+        mocker: MockerFixture,
+        setup_configuration: AppConfig,
+        dummy_request: Request,
+        mock_conversation: MockType,
+    ) -> None:
+        """Test when no sessions are found in delete endpoint (early return)."""
+        mock_authorization_resolvers(mocker)
+        mocker.patch("app.endpoints.conversations.configuration", setup_configuration)
+        mocker.patch("app.endpoints.conversations.check_suid", return_value=True)
+        mocker.patch("app.endpoints.conversations.can_access_conversation")
+        mocker.patch(
+            "app.endpoints.conversations.retrieve_conversation",
+            return_value=mock_conversation,
+        )
+
+        # Mock AsyncLlamaStackClientHolder with empty sessions list
+        mock_client = mocker.AsyncMock()
+        mock_client.agents.session.list.return_value = mocker.Mock(data=[])
+        mock_client_holder = mocker.patch(
+            "app.endpoints.conversations.AsyncLlamaStackClientHolder"
+        )
+        mock_client_holder.return_value.get_client.return_value = mock_client
+
+        response = await delete_conversation_endpoint_handler(
+            request=dummy_request,
+            conversation_id=VALID_CONVERSATION_ID,
+            auth=MOCK_AUTH,
+        )
+
+        assert isinstance(response, ConversationDeleteResponse)
+        assert response.conversation_id == VALID_CONVERSATION_ID
+        assert response.success is True  # Operation completed successfully
+        assert "cannot be deleted" in response.response  # But nothing was deleted
+
+    @pytest.mark.asyncio
+    async def test_sqlalchemy_error_in_delete(
+        self,
+        mocker: MockerFixture,
+        setup_configuration: AppConfig,
+        dummy_request: Request,
+        mock_conversation: MockType,
+    ) -> None:
+        """Test when SQLAlchemyError is raised during conversation deletion."""
+        mock_authorization_resolvers(mocker)
+        mocker.patch("app.endpoints.conversations.configuration", setup_configuration)
+        mocker.patch("app.endpoints.conversations.check_suid", return_value=True)
+        mocker.patch("app.endpoints.conversations.can_access_conversation")
+        mocker.patch(
+            "app.endpoints.conversations.retrieve_conversation",
+            return_value=mock_conversation,
+        )
+
+        # Mock AsyncLlamaStackClientHolder - SQLAlchemyError should come from delete_conversation
+        mock_client = mocker.AsyncMock()
+        mock_session_list_response = mocker.Mock()
+        mock_session_list_response.data = [{"session_id": VALID_CONVERSATION_ID}]
+        mock_client.agents.session.list.return_value = mock_session_list_response
+        mock_client.agents.session.delete.return_value = None
+        mock_client_holder = mocker.patch(
+            "app.endpoints.conversations.AsyncLlamaStackClientHolder"
+        )
+        mock_client_holder.return_value.get_client.return_value = mock_client
+
+        # Mock delete_conversation to raise SQLAlchemyError
+        mocker.patch(
+            "app.endpoints.conversations.delete_conversation",
+            side_effect=SQLAlchemyError("Database error"),
+        )
+
+        with pytest.raises(HTTPException) as exc_info:
+            await delete_conversation_endpoint_handler(
+                request=dummy_request,
+                conversation_id=VALID_CONVERSATION_ID,
+                auth=MOCK_AUTH,
+            )
+
+        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        detail = exc_info.value.detail
+        assert isinstance(detail, dict)
+        assert "Database" in detail["response"]  # type: ignore
+
 
 # Generated entirely by AI, no human review, so read with that in mind.
 class TestGetConversationsListEndpoint:
@@ -878,7 +1101,8 @@ async def test_configuration_not_loaded(
     ) -> None:
         """Test the endpoint when configuration is not loaded."""
         mock_authorization_resolvers(mocker)
-        mocker.patch("app.endpoints.conversations.configuration", None)
+        mock_config = AppConfig()
+        mocker.patch("app.endpoints.conversations.configuration", mock_config)
 
         with pytest.raises(HTTPException) as exc_info:
             await get_conversations_list_endpoint_handler(
@@ -888,7 +1112,7 @@ async def test_configuration_not_loaded(
         assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Configuration is not loaded" in detail["response"]
+        assert "Configuration is not loaded" in detail["response"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_successful_conversations_list_retrieval(
@@ -990,6 +1214,40 @@ async def test_database_exception(
         mock_session = mock_database_session(mocker)
         mock_session.query.side_effect = Exception("Database error")
 
+        with pytest.raises(Exception, match="Database error"):
+            await get_conversations_list_endpoint_handler(
+                auth=MOCK_AUTH, request=dummy_request
+            )
+
+    @pytest.mark.asyncio
+    async def test_sqlalchemy_error_in_list(
+        self,
+        mocker: MockerFixture,
+        setup_configuration: AppConfig,
+        dummy_request: Request,
+    ) -> None:
+        """Test when database query raises SQLAlchemyError."""
+        mock_authorization_resolvers(mocker)
+        mocker.patch("app.endpoints.conversations.configuration", setup_configuration)
+
+        # Mock database session to raise SQLAlchemyError when all() is called
+        # Since dummy_request has all actions, it will use query directly (not filter_by)
+        mock_session = mocker.Mock()
+        mock_query = mocker.Mock()
+        # Configure all() to raise SQLAlchemyError
+        mock_query.all = mocker.Mock(
+            side_effect=SQLAlchemyError("Database connection error")
+        )
+        mock_session.query.return_value = mock_query
+
+        # Mock get_session to return a context manager
+        mock_session_context = mocker.MagicMock()
+        mock_session_context.__enter__.return_value = mock_session
+        mock_session_context.__exit__.return_value = None
+        mocker.patch(
+            "app.endpoints.conversations.get_session", return_value=mock_session_context
+        )
+
         with pytest.raises(HTTPException) as exc_info:
             await get_conversations_list_endpoint_handler(
                 auth=MOCK_AUTH, request=dummy_request
@@ -998,7 +1256,7 @@ async def test_database_exception(
         assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Unknown error" in detail["response"]
+        assert "Database" in detail["response"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_conversations_list_with_none_topic_summary(
diff --git a/tests/unit/app/endpoints/test_conversations_v2.py b/tests/unit/app/endpoints/test_conversations_v2.py
index c8ba44c7..1d44a369 100644
--- a/tests/unit/app/endpoints/test_conversations_v2.py
+++ b/tests/unit/app/endpoints/test_conversations_v2.py
@@ -3,25 +3,27 @@
 """Unit tests for the /conversations REST API endpoints."""
 
 from datetime import datetime, timezone
+
 import pytest
-from pytest_mock import MockerFixture, MockType
 from fastapi import HTTPException, status
+from pytest_mock import MockerFixture, MockType
 
 from app.endpoints.conversations_v2 import (
-    transform_chat_message,
-    update_conversation_endpoint_handler,
-    check_valid_conversation_id,
     check_conversation_existence,
-    get_conversations_list_endpoint_handler,
-    get_conversation_endpoint_handler,
+    check_valid_conversation_id,
     delete_conversation_endpoint_handler,
+    get_conversation_endpoint_handler,
+    get_conversations_list_endpoint_handler,
+    transform_chat_message,
+    update_conversation_endpoint_handler,
 )
+from configuration import AppConfig
 from models.cache_entry import CacheEntry
 from models.requests import ConversationUpdateRequest
 from models.responses import (
+    ConversationData,
     ConversationUpdateResponse,
     ReferencedDocument,
-    ConversationData,
 )
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
@@ -90,7 +92,9 @@ def test_transform_message_without_documents(self) -> None:
 
     def test_transform_message_with_referenced_documents(self) -> None:
         """Test the transformation when referenced_documents are present."""
-        docs = [ReferencedDocument(doc_title="Test Doc", doc_url="http://example.com")]
+        docs = [
+            ReferencedDocument(doc_title="Test Doc", doc_url="http://example.com")
+        ]  # type: ignore
         entry = CacheEntry(
             query="query",
             response="response",
@@ -157,7 +161,7 @@ def test_invalid_conversation_id(self, mocker: MockerFixture) -> None:
         assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Invalid conversation ID format" in detail["response"]
+        assert "Invalid conversation ID format" in detail["response"]  # type: ignore
 
 
 class TestCheckConversationExistence:
@@ -188,7 +192,22 @@ def test_conversation_not_exists(
         assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Conversation not found" in detail["response"]
+        assert "Conversation not found" in detail["response"]  # type: ignore
+
+    def test_conversation_cache_type_none(
+        self, mocker: MockerFixture, mock_configuration: MockType
+    ) -> None:
+        """Test when conversation_cache_configuration.type is None."""
+        mock_cache_config = mocker.Mock()
+        mock_cache_config.type = None
+        mock_configuration.conversation_cache_configuration = mock_cache_config
+        mocker.patch("app.endpoints.conversations_v2.configuration", mock_configuration)
+
+        # Should return early without raising an exception or calling list
+        check_conversation_existence("user_id", VALID_CONVERSATION_ID)
+
+        # Verify that conversation_cache.list was not called
+        mock_configuration.conversation_cache.list.assert_not_called()
 
 
 class TestGetConversationsListEndpoint:
@@ -198,7 +217,9 @@ class TestGetConversationsListEndpoint:
     async def test_configuration_not_loaded(self, mocker: MockerFixture) -> None:
         """Test the endpoint when configuration is not loaded."""
         mock_authorization_resolvers(mocker)
-        mocker.patch("app.endpoints.conversations_v2.configuration", None)
+        mock_config = AppConfig()
+        mock_config._configuration = None  # pylint: disable=protected-access
+        mocker.patch("app.endpoints.conversations_v2.configuration", mock_config)
 
         with pytest.raises(HTTPException) as exc_info:
             await get_conversations_list_endpoint_handler(
@@ -215,7 +236,8 @@ async def test_conversation_cache_not_configured(
         """Test the endpoint when conversation cache is not configured."""
         mock_authorization_resolvers(mocker)
         mock_config = mocker.Mock()
-        mock_config.conversation_cache = None
+        mock_config.conversation_cache_configuration = mocker.Mock()
+        mock_config.conversation_cache_configuration.type = None
         mocker.patch("app.endpoints.conversations_v2.configuration", mock_config)
 
         with pytest.raises(HTTPException) as exc_info:
@@ -224,10 +246,10 @@ async def test_conversation_cache_not_configured(
                 auth=MOCK_AUTH,
             )
 
-        assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Conversation cache is not configured" in detail["response"]
+        assert "Conversation cache not configured" in detail["response"]
 
     @pytest.mark.asyncio
     async def test_successful_retrieval(
@@ -296,23 +318,6 @@ async def test_with_skip_userid_check(
             "mock_user_id", True
         )
 
-    @pytest.mark.asyncio
-    async def test_cache_exception(
-        self, mocker: MockerFixture, mock_configuration: MockType
-    ) -> None:
-        """Test the endpoint when the cache throws an exception."""
-        mock_authorization_resolvers(mocker)
-        mocker.patch("app.endpoints.conversations_v2.configuration", mock_configuration)
-        mock_configuration.conversation_cache.list.side_effect = Exception(
-            "Cache error"
-        )
-        with pytest.raises(Exception) as exc_info:
-            await get_conversations_list_endpoint_handler(
-                request=mocker.Mock(),
-                auth=MOCK_AUTH,
-            )
-        assert str(exc_info.value) == "Cache error"
-
     @pytest.mark.asyncio
     async def test_malformed_auth_object(
         self, mocker: MockerFixture, mock_configuration: MockType
@@ -335,7 +340,9 @@ class TestGetConversationEndpoint:
     async def test_configuration_not_loaded(self, mocker: MockerFixture) -> None:
         """Test the endpoint when configuration is not loaded."""
         mock_authorization_resolvers(mocker)
-        mocker.patch("app.endpoints.conversations_v2.configuration", None)
+        mock_config = AppConfig()
+        mock_config._configuration = None  # pylint: disable=protected-access
+        mocker.patch("app.endpoints.conversations_v2.configuration", mock_config)
 
         with pytest.raises(HTTPException) as exc_info:
             await get_conversation_endpoint_handler(
@@ -371,7 +378,8 @@ async def test_conversation_cache_not_configured(
         """Test the endpoint when conversation cache is not configured."""
         mock_authorization_resolvers(mocker)
         mock_config = mocker.Mock()
-        mock_config.conversation_cache = None
+        mock_config.conversation_cache_configuration = mocker.Mock()
+        mock_config.conversation_cache_configuration.type = None
         mocker.patch("app.endpoints.conversations_v2.configuration", mock_config)
         mocker.patch("app.endpoints.conversations_v2.check_suid", return_value=True)
 
@@ -382,7 +390,10 @@ async def test_conversation_cache_not_configured(
                 auth=MOCK_AUTH,
             )
 
-        assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        detail = exc_info.value.detail
+        assert isinstance(detail, dict)
+        assert "Conversation cache not configured" in detail["response"]
 
     @pytest.mark.asyncio
     async def test_conversation_not_found(
@@ -469,26 +480,6 @@ async def test_with_skip_userid_check(
             "mock_user_id", VALID_CONVERSATION_ID, True
         )
 
-    @pytest.mark.asyncio
-    async def test_cache_exception(
-        self, mocker: MockerFixture, mock_configuration: MockType
-    ) -> None:
-        """Test the endpoint when the cache throws an exception."""
-        mock_authorization_resolvers(mocker)
-        mocker.patch("app.endpoints.conversations_v2.configuration", mock_configuration)
-        mocker.patch("app.endpoints.conversations_v2.check_suid", return_value=True)
-        mock_configuration.conversation_cache.list.return_value = [
-            mocker.Mock(conversation_id=VALID_CONVERSATION_ID)
-        ]
-        mock_configuration.conversation_cache.get.side_effect = Exception("Cache error")
-        with pytest.raises(Exception) as exc_info:
-            await get_conversation_endpoint_handler(
-                request=mocker.Mock(),
-                conversation_id=VALID_CONVERSATION_ID,
-                auth=MOCK_AUTH,
-            )
-        assert str(exc_info.value) == "Cache error"
-
     @pytest.mark.asyncio
     async def test_malformed_auth_object(
         self, mocker: MockerFixture, mock_configuration: MockType
@@ -513,7 +504,9 @@ class TestDeleteConversationEndpoint:
     async def test_configuration_not_loaded(self, mocker: MockerFixture) -> None:
         """Test the endpoint when configuration is not loaded."""
         mock_authorization_resolvers(mocker)
-        mocker.patch("app.endpoints.conversations_v2.configuration", None)
+        mock_config = AppConfig()
+        mock_config._configuration = None  # pylint: disable=protected-access
+        mocker.patch("app.endpoints.conversations_v2.configuration", mock_config)
 
         with pytest.raises(HTTPException) as exc_info:
             await delete_conversation_endpoint_handler(
@@ -549,7 +542,8 @@ async def test_conversation_cache_not_configured(
         """Test the endpoint when conversation cache is not configured."""
         mock_authorization_resolvers(mocker)
         mock_config = mocker.Mock()
-        mock_config.conversation_cache = None
+        mock_config.conversation_cache_configuration = mocker.Mock()
+        mock_config.conversation_cache_configuration.type = None
         mocker.patch("app.endpoints.conversations_v2.configuration", mock_config)
         mocker.patch("app.endpoints.conversations_v2.check_suid", return_value=True)
 
@@ -560,7 +554,10 @@ async def test_conversation_cache_not_configured(
                 auth=MOCK_AUTH,
             )
 
-        assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        detail = exc_info.value.detail
+        assert isinstance(detail, dict)
+        assert "Conversation cache not configured" in detail["response"]
 
     @pytest.mark.asyncio
     async def test_conversation_not_found(
@@ -627,7 +624,7 @@ async def test_unsuccessful_deletion(
         assert response is not None
         assert response.conversation_id == VALID_CONVERSATION_ID
         assert response.success is True
-        assert response.response == "Conversation can not be deleted"
+        assert response.response == "Conversation cannot be deleted"
 
     @pytest.mark.asyncio
     async def test_with_skip_userid_check(
@@ -652,30 +649,6 @@ async def test_with_skip_userid_check(
             "mock_user_id", VALID_CONVERSATION_ID, True
         )
 
-    @pytest.mark.asyncio
-    async def test_cache_exception(
-        self, mocker: MockerFixture, mock_configuration: MockType
-    ) -> None:
-        """Test the endpoint when the cache throws an exception."""
-        mock_authorization_resolvers(mocker)
-        mocker.patch("app.endpoints.conversations_v2.configuration", mock_configuration)
-        mocker.patch("app.endpoints.conversations_v2.check_suid", return_value=True)
-        mock_configuration.conversation_cache.list.return_value = [
-            mocker.Mock(conversation_id=VALID_CONVERSATION_ID)
-        ]
-        mock_configuration.conversation_cache.delete.side_effect = Exception(
-            "Cache error"
-        )
-
-        with pytest.raises(Exception) as exc_info:
-            await delete_conversation_endpoint_handler(
-                request=mocker.Mock(),
-                conversation_id=VALID_CONVERSATION_ID,
-                auth=MOCK_AUTH,
-            )
-
-        assert str(exc_info.value) == "Cache error"
-
     @pytest.mark.asyncio
     async def test_malformed_auth_object(
         self, mocker: MockerFixture, mock_configuration: MockType
@@ -700,7 +673,8 @@ class TestUpdateConversationEndpoint:
     async def test_configuration_not_loaded(self, mocker: MockerFixture) -> None:
         """Test the endpoint when configuration is not loaded."""
         mock_authorization_resolvers(mocker)
-        mocker.patch("app.endpoints.conversations_v2.configuration", None)
+        mock_config = AppConfig()
+        mocker.patch("app.endpoints.conversations_v2.configuration", mock_config)
 
         update_request = ConversationUpdateRequest(topic_summary="New topic summary")
 
@@ -712,6 +686,9 @@ async def test_configuration_not_loaded(self, mocker: MockerFixture) -> None:
             )
 
         assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        detail = exc_info.value.detail
+        assert isinstance(detail, dict)
+        assert "Configuration is not loaded" in detail["response"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_invalid_conversation_id_format(
@@ -734,7 +711,7 @@ async def test_invalid_conversation_id_format(
         assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Invalid conversation ID format" in detail["response"]
+        assert "Invalid conversation ID format" in detail["response"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_conversation_cache_not_configured(
@@ -743,7 +720,8 @@ async def test_conversation_cache_not_configured(
         """Test the endpoint when conversation cache is not configured."""
         mock_authorization_resolvers(mocker)
         mock_config = mocker.Mock()
-        mock_config.conversation_cache = None
+        mock_config.conversation_cache_configuration = mocker.Mock()
+        mock_config.conversation_cache_configuration.type = None
         mocker.patch("app.endpoints.conversations_v2.configuration", mock_config)
         mocker.patch("app.endpoints.conversations_v2.check_suid", return_value=True)
 
@@ -756,10 +734,10 @@ async def test_conversation_cache_not_configured(
                 auth=MOCK_AUTH,
             )
 
-        assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+        assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Conversation cache is not configured" in detail["response"]
+        assert "Conversation cache not configured" in detail["response"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_conversation_not_found(
@@ -783,7 +761,7 @@ async def test_conversation_not_found(
         assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
         detail = exc_info.value.detail
         assert isinstance(detail, dict)
-        assert "Conversation not found" in detail["response"]
+        assert "Conversation not found" in detail["response"]  # type: ignore
 
     @pytest.mark.asyncio
     async def test_successful_update(
@@ -839,31 +817,6 @@ async def test_with_skip_userid_check(
             "mock_user_id", VALID_CONVERSATION_ID, "New topic summary", True
         )
 
-    @pytest.mark.asyncio
-    async def test_cache_exception(
-        self, mocker: MockerFixture, mock_configuration: MockType
-    ) -> None:
-        """Test the endpoint when the cache throws an exception."""
-        mock_authorization_resolvers(mocker)
-        mocker.patch("app.endpoints.conversations_v2.configuration", mock_configuration)
-        mocker.patch("app.endpoints.conversations_v2.check_suid", return_value=True)
-        mock_configuration.conversation_cache.list.return_value = [
-            mocker.Mock(conversation_id=VALID_CONVERSATION_ID)
-        ]
-        mock_configuration.conversation_cache.set_topic_summary.side_effect = Exception(
-            "Cache error"
-        )
-        update_request = ConversationUpdateRequest(topic_summary="New topic summary")
-
-        with pytest.raises(Exception) as exc_info:
-            await update_conversation_endpoint_handler(
-                conversation_id=VALID_CONVERSATION_ID,
-                update_request=update_request,
-                auth=MOCK_AUTH,
-            )
-
-        assert str(exc_info.value) == "Cache error"
-
     @pytest.mark.asyncio
     async def test_malformed_auth_object(
         self, mocker: MockerFixture, mock_configuration: MockType
diff --git a/tests/unit/app/endpoints/test_feedback.py b/tests/unit/app/endpoints/test_feedback.py
index 22b631f2..8c91c876 100644
--- a/tests/unit/app/endpoints/test_feedback.py
+++ b/tests/unit/app/endpoints/test_feedback.py
@@ -1,23 +1,27 @@
+# pylint: disable=protected-access
+
 """Unit tests for the /feedback REST API endpoint."""
 
 from typing import Any
-from fastapi import HTTPException, status
+
 import pytest
+from fastapi import HTTPException, status
 from pytest_mock import MockerFixture
 
-from configuration import configuration
 from app.endpoints.feedback import (
-    is_feedback_enabled,
     assert_feedback_enabled,
     feedback_endpoint_handler,
+    feedback_status,
+    is_feedback_enabled,
     store_feedback,
     update_feedback_status,
 )
 from authentication.interface import AuthTuple
-from models.requests import FeedbackStatusUpdateRequest, FeedbackRequest
+from configuration import AppConfig, configuration
+from models.config import UserDataCollection
+from models.requests import FeedbackRequest, FeedbackStatusUpdateRequest
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
-
 MOCK_AUTH = ("mock_user_id", "mock_username", False, "mock_token")
 VALID_BASE = {
     "conversation_id": "12345678-abcd-0000-0123-456789abcdef",
@@ -26,15 +30,25 @@
 }
 
 
-def test_is_feedback_enabled() -> None:
+def test_is_feedback_enabled(mocker: MockerFixture) -> None:
     """Test that is_feedback_enabled returns True when feedback is not disabled."""
-    configuration.user_data_collection_configuration.feedback_enabled = True
+    mock_config = AppConfig()
+    mock_config._configuration = mocker.Mock()
+    mock_config._configuration.user_data_collection = UserDataCollection(
+        feedback_enabled=True, feedback_storage="/tmp"
+    )
+    mocker.patch("app.endpoints.feedback.configuration", mock_config)
     assert is_feedback_enabled() is True, "Feedback should be enabled"
 
 
-def test_is_feedback_disabled() -> None:
+def test_is_feedback_disabled(mocker: MockerFixture) -> None:
     """Test that is_feedback_enabled returns False when feedback is disabled."""
-    configuration.user_data_collection_configuration.feedback_enabled = False
+    mock_config = AppConfig()
+    mock_config._configuration = mocker.Mock()
+    mock_config._configuration.user_data_collection = UserDataCollection(
+        feedback_enabled=False, feedback_storage=None
+    )
+    mocker.patch("app.endpoints.feedback.configuration", mock_config)
     assert is_feedback_enabled() is False, "Feedback should be disabled"
 
 
@@ -48,7 +62,8 @@ async def test_assert_feedback_enabled_disabled(mocker: MockerFixture) -> None:
         await assert_feedback_enabled(mocker.Mock())
 
     assert exc_info.value.status_code == status.HTTP_403_FORBIDDEN
-    assert exc_info.value.detail == "Forbidden: Feedback is disabled"
+    assert exc_info.value.detail["response"] == "Feedback is disabled"  # type: ignore
+    assert exc_info.value.detail["cause"] == "Storing feedback is disabled."  # type: ignore
 
 
 async def test_assert_feedback_enabled(mocker: MockerFixture) -> None:
@@ -107,35 +122,36 @@ async def test_feedback_endpoint_handler(
 
 @pytest.mark.asyncio
 async def test_feedback_endpoint_handler_error(mocker: MockerFixture) -> None:
-    """Test that feedback_endpoint_handler raises an HTTPException on error."""
+    """Test feedback_endpoint_handler raises HTTPException when store_feedback raises OSError."""
     mock_authorization_resolvers(mocker)
-
-    # Mock the dependencies
     mocker.patch("app.endpoints.feedback.assert_feedback_enabled", return_value=None)
+    mocker.patch("app.endpoints.feedback.check_configuration_loaded", return_value=None)
+    # Mock Path.mkdir to raise OSError so the try block in store_feedback catches it
     mocker.patch(
-        "app.endpoints.feedback.store_feedback",
-        side_effect=Exception("Error storing feedback"),
+        "app.endpoints.feedback.Path.mkdir", side_effect=OSError("Permission denied")
+    )
+    feedback_request = FeedbackRequest(
+        conversation_id="123e4567-e89b-12d3-a456-426614174000",
+        user_question="test question",
+        llm_response="test response",
+        user_feedback="test feedback",
+        sentiment=1,
     )
-
-    # Mock the feedback request
-    feedback_request = mocker.Mock()
 
     # Authorization tuple required by URL endpoint handler
     auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
 
-    # Call the endpoint handler and assert it raises an exception
     with pytest.raises(HTTPException) as exc_info:
         await feedback_endpoint_handler(
             feedback_request=feedback_request,
             _ensure_feedback_enabled=assert_feedback_enabled,
             auth=auth,
         )
-
     assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-
     detail = exc_info.value.detail
     assert isinstance(detail, dict)
-    assert detail["response"] == "Error storing user feedback"
+    assert detail["response"] == "Failed to store feedback"  # type: ignore
+    assert "Failed to store feedback at directory" in detail["cause"]  # type: ignore
 
 
 @pytest.mark.parametrize(
@@ -219,8 +235,13 @@ def test_store_feedback_on_io_error(
 
     user_id = "test_user_id"
 
-    with pytest.raises(OSError, match="EACCES"):
+    with pytest.raises(HTTPException) as exc_info:
         store_feedback(user_id, feedback_request_data)
+    assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+    detail = exc_info.value.detail
+    assert isinstance(detail, dict)
+    assert detail["response"] == "Failed to store feedback"  # type: ignore
+    assert "Failed to store feedback at directory" in detail["cause"]  # type: ignore
 
 
 async def test_update_feedback_status_different(mocker: MockerFixture) -> None:
@@ -286,3 +307,33 @@ async def test_feedback_endpoint_valid_requests(
         _ensure_feedback_enabled=None,
     )
     assert response.response == "feedback received"
+
+
+def test_feedback_status_enabled(mocker: MockerFixture) -> None:
+    """Test that feedback_status returns enabled status when feedback is enabled."""
+    mock_config = AppConfig()
+    mock_config._configuration = mocker.Mock()
+    mock_config._configuration.user_data_collection = UserDataCollection(
+        feedback_enabled=True, feedback_storage="/tmp"
+    )
+    mocker.patch("app.endpoints.feedback.configuration", mock_config)
+
+    response = feedback_status()
+
+    assert response.functionality == "feedback"
+    assert response.status == {"enabled": True}
+
+
+def test_feedback_status_disabled(mocker: MockerFixture) -> None:
+    """Test that feedback_status returns disabled status when feedback is disabled."""
+    mock_config = AppConfig()
+    mock_config._configuration = mocker.Mock()
+    mock_config._configuration.user_data_collection = UserDataCollection(
+        feedback_enabled=False, feedback_storage=None
+    )
+    mocker.patch("app.endpoints.feedback.configuration", mock_config)
+
+    response = feedback_status()
+
+    assert response.functionality == "feedback"
+    assert response.status == {"enabled": False}
diff --git a/tests/unit/app/endpoints/test_health.py b/tests/unit/app/endpoints/test_health.py
index 60782b01..d9532d3c 100644
--- a/tests/unit/app/endpoints/test_health.py
+++ b/tests/unit/app/endpoints/test_health.py
@@ -1,15 +1,16 @@
 """Unit tests for the /health REST API endpoint."""
 
-from pytest_mock import MockerFixture
-
+from llama_stack_client import APIConnectionError
 import pytest
 from llama_stack.providers.datatypes import HealthStatus
-from authentication.interface import AuthTuple
+from pytest_mock import MockerFixture
+
 from app.endpoints.health import (
-    readiness_probe_get_method,
-    liveness_probe_get_method,
     get_providers_health_statuses,
+    liveness_probe_get_method,
+    readiness_probe_get_method,
 )
+from authentication.interface import AuthTuple
 from models.responses import ProviderHealthStatus, ReadinessResponse
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
@@ -113,7 +114,9 @@ def test_provider_health_status_creation(self) -> None:
 
     def test_provider_health_status_optional_fields(self) -> None:
         """Test creating a ProviderHealthStatus with minimal fields."""
-        status = ProviderHealthStatus(provider_id="test_provider", status="ok")
+        status = ProviderHealthStatus(
+            provider_id="test_provider", status="ok", message=None
+        )
         assert status.provider_id == "test_provider"
         assert status.status == "ok"
         assert status.message is None
@@ -181,7 +184,7 @@ async def test_get_providers_health_statuses_connection_error(
         mock_lsc = mocker.patch("client.AsyncLlamaStackClientHolder.get_client")
 
         # Mock get_llama_stack_client to raise an exception
-        mock_lsc.side_effect = Exception("Connection error")
+        mock_lsc.side_effect = APIConnectionError(request=mocker.Mock())
 
         result = await get_providers_health_statuses()
 
@@ -189,5 +192,5 @@ async def test_get_providers_health_statuses_connection_error(
         assert result[0].provider_id == "unknown"
         assert result[0].status == HealthStatus.ERROR.value
         assert (
-            result[0].message == "Failed to initialize health check: Connection error"
+            result[0].message == "Failed to initialize health check: Connection error."
         )
diff --git a/tests/unit/app/endpoints/test_info.py b/tests/unit/app/endpoints/test_info.py
index 816749e3..9800be08 100644
--- a/tests/unit/app/endpoints/test_info.py
+++ b/tests/unit/app/endpoints/test_info.py
@@ -1,15 +1,15 @@
 """Unit tests for the /info REST API endpoint."""
 
 from typing import Any
-import pytest
-from fastapi import Request, HTTPException, status
-from pytest_mock import MockerFixture
 
+import pytest
+from fastapi import HTTPException, Request, status
 from llama_stack_client import APIConnectionError
 from llama_stack_client.types import VersionInfo
+from pytest_mock import MockerFixture
 
-from authentication.interface import AuthTuple
 from app.endpoints.info import info_endpoint_handler
+from authentication.interface import AuthTuple
 from configuration import AppConfig
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
@@ -131,5 +131,6 @@ async def test_info_endpoint_connection_error(mocker: MockerFixture) -> None:
 
     with pytest.raises(HTTPException) as e:
         await info_endpoint_handler(auth=auth, request=request)
-        assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-        assert e.detail["response"] == "Unable to connect to Llama Stack"
+        assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+        assert e.value.detail["response"] == "Service unavailable"  # type: ignore
+        assert "Unable to connect to Llama Stack" in e.value.detail["cause"]  # type: ignore
diff --git a/tests/unit/app/endpoints/test_metrics.py b/tests/unit/app/endpoints/test_metrics.py
index ea422c91..cdd30636 100644
--- a/tests/unit/app/endpoints/test_metrics.py
+++ b/tests/unit/app/endpoints/test_metrics.py
@@ -1,11 +1,11 @@
 """Unit tests for the /metrics REST API endpoint."""
 
 import pytest
-from pytest_mock import MockerFixture
 from fastapi import Request
+from pytest_mock import MockerFixture
 
-from authentication.interface import AuthTuple
 from app.endpoints.metrics import metrics_endpoint_handler
+from authentication.interface import AuthTuple
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
 
@@ -32,7 +32,7 @@ async def test_metrics_endpoint(mocker: MockerFixture) -> None:
     assert response.status_code == 200
     assert "text/plain" in response.headers["Content-Type"]
 
-    response_body = response.body.decode()
+    response_body = response.body.decode()  # type: ignore
 
     # Assert metrics were set up
     mock_setup_metrics.assert_called_once()
diff --git a/tests/unit/app/endpoints/test_models.py b/tests/unit/app/endpoints/test_models.py
index 99392ace..fb2b65f6 100644
--- a/tests/unit/app/endpoints/test_models.py
+++ b/tests/unit/app/endpoints/test_models.py
@@ -1,15 +1,14 @@
 """Unit tests for the /models REST API endpoint."""
 
 from typing import Any
-import pytest
 
+import pytest
 from fastapi import HTTPException, Request, status
-from pytest_mock import MockerFixture
-
 from llama_stack_client import APIConnectionError
+from pytest_mock import MockerFixture
 
-from authentication.interface import AuthTuple
 from app.endpoints.models import models_endpoint_handler
+from authentication.interface import AuthTuple
 from configuration import AppConfig
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
@@ -22,66 +21,8 @@ async def test_models_endpoint_handler_configuration_not_loaded(
     mock_authorization_resolvers(mocker)
 
     # simulate state when no configuration is loaded
-    mocker.patch(
-        "app.endpoints.models.configuration",
-        return_value=mocker.Mock(),
-    )
-    mocker.patch("app.endpoints.models.configuration", None)
-
-    request = Request(
-        scope={
-            "type": "http",
-            "headers": [(b"authorization", b"Bearer invalid-token")],
-        }
-    )
-
-    # Authorization tuple required by URL endpoint handler
-    auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
-
-    with pytest.raises(HTTPException) as e:
-        await models_endpoint_handler(request=request, auth=auth)
-        assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-        assert e.detail["response"] == "Configuration is not loaded"
-
-
-@pytest.mark.asyncio
-async def test_models_endpoint_handler_improper_llama_stack_configuration(
-    mocker: MockerFixture,
-) -> None:
-    """Test the models endpoint handler if Llama Stack configuration is not proper."""
-    mock_authorization_resolvers(mocker)
-
-    # configuration for tests
-    config_dict: dict[str, Any] = {
-        "name": "test",
-        "service": {
-            "host": "localhost",
-            "port": 8080,
-            "auth_enabled": False,
-            "workers": 1,
-            "color_log": True,
-            "access_log": True,
-        },
-        "llama_stack": {
-            "api_key": "test-key",
-            "url": "http://test.com:1234",
-            "use_as_library_client": False,
-        },
-        "user_data_collection": {
-            "transcripts_enabled": False,
-        },
-        "mcp_servers": [],
-        "customization": None,
-        "authorization": {"access_rules": []},
-        "authentication": {"module": "noop"},
-    }
-    cfg = AppConfig()
-    cfg.init_from_dict(config_dict)
-
-    mocker.patch(
-        "app.endpoints.models.configuration",
-        return_value=None,
-    )
+    mock_config = AppConfig()
+    mocker.patch("app.endpoints.models.configuration", mock_config)
 
     request = Request(
         scope={
@@ -96,7 +37,7 @@ async def test_models_endpoint_handler_improper_llama_stack_configuration(
     with pytest.raises(HTTPException) as e:
         await models_endpoint_handler(request=request, auth=auth)
         assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-        assert e.detail["response"] == "Llama stack is not configured"
+        assert e.value.detail["response"] == "Configuration is not loaded"  # type: ignore
 
 
 @pytest.mark.asyncio
@@ -132,6 +73,14 @@ async def test_models_endpoint_handler_configuration_loaded(
     cfg = AppConfig()
     cfg.init_from_dict(config_dict)
 
+    mocker.patch("app.endpoints.models.configuration", cfg)
+    mock_client_holder = mocker.patch(
+        "app.endpoints.models.AsyncLlamaStackClientHolder"
+    )
+    mock_client_holder.return_value.get_client.side_effect = APIConnectionError(
+        request=mocker.Mock()
+    )
+
     request = Request(
         scope={
             "type": "http",
@@ -144,8 +93,8 @@ async def test_models_endpoint_handler_configuration_loaded(
 
     with pytest.raises(HTTPException) as e:
         await models_endpoint_handler(request=request, auth=auth)
-        assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-        assert e.detail["response"] == "Unable to connect to Llama Stack"
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+    assert e.value.detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
 
 
 @pytest.mark.asyncio
@@ -260,5 +209,6 @@ async def test_models_endpoint_llama_stack_connection_error(
 
     with pytest.raises(HTTPException) as e:
         await models_endpoint_handler(request=request, auth=auth)
-        assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-        assert e.detail["response"] == "Unable to connect to Llama Stack"
+        assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+        assert e.value.detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
+        assert "Unable to connect to Llama Stack" in e.value.detail["cause"]  # type: ignore
diff --git a/tests/unit/app/endpoints/test_providers.py b/tests/unit/app/endpoints/test_providers.py
index c1963daa..c27cb2ad 100644
--- a/tests/unit/app/endpoints/test_providers.py
+++ b/tests/unit/app/endpoints/test_providers.py
@@ -1,16 +1,18 @@
 """Unit tests for the /providers REST API endpoints."""
 
 import pytest
-from pytest_mock import MockerFixture
 from fastapi import HTTPException, Request, status
-from llama_stack_client import APIConnectionError
-
-from authentication.interface import AuthTuple
+from llama_stack_client import APIConnectionError, BadRequestError
+from llama_stack_client.types import ProviderInfo
+from pytest_mock import MockerFixture
 
 from app.endpoints.providers import (
     get_provider_endpoint_handler,
     providers_endpoint_handler,
 )
+from authentication.interface import AuthTuple
+from configuration import AppConfig
+from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
 
 @pytest.mark.asyncio
@@ -18,7 +20,10 @@ async def test_providers_endpoint_configuration_not_loaded(
     mocker: MockerFixture,
 ) -> None:
     """Test that /providers endpoint raises HTTP 500 if configuration is not loaded."""
-    mocker.patch("app.endpoints.providers.configuration", None)
+    mock_authorization_resolvers(mocker)
+    mock_config = AppConfig()
+    mock_config._configuration = None  # pylint: disable=protected-access
+    mocker.patch("app.endpoints.providers.configuration", mock_config)
     request = Request(scope={"type": "http"})
 
     # Authorization tuple required by URL endpoint handler
@@ -27,16 +32,19 @@ async def test_providers_endpoint_configuration_not_loaded(
     with pytest.raises(HTTPException) as e:
         await providers_endpoint_handler(request=request, auth=auth)
     assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+    assert e.value.detail["response"] == "Configuration is not loaded"  # type: ignore
 
 
 @pytest.mark.asyncio
-async def test_providers_endpoint_connection_error(mocker: MockerFixture) -> None:
+async def test_providers_endpoint_connection_error(
+    mocker: MockerFixture, minimal_config: AppConfig
+) -> None:
     """Test that /providers endpoint raises HTTP 500 if Llama Stack connection fails."""
-    mock_client = mocker.AsyncMock()
-    mock_client.providers.list.side_effect = APIConnectionError(request=None)  # type: ignore
+    mocker.patch("app.endpoints.providers.configuration", minimal_config)
+
     mocker.patch(
         "app.endpoints.providers.AsyncLlamaStackClientHolder"
-    ).return_value.get_client.return_value = mock_client
+    ).return_value.get_client.side_effect = APIConnectionError(request=mocker.Mock())
 
     request = Request(scope={"type": "http"})
 
@@ -45,31 +53,41 @@ async def test_providers_endpoint_connection_error(mocker: MockerFixture) -> Non
 
     with pytest.raises(HTTPException) as e:
         await providers_endpoint_handler(request=request, auth=auth)
-    assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
     detail = e.value.detail
     assert isinstance(detail, dict)
-    assert "Unable to connect to Llama Stack" in detail["response"]
+    assert detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
 
 
 @pytest.mark.asyncio
-async def test_providers_endpoint_success(mocker: MockerFixture) -> None:
+async def test_providers_endpoint_success(
+    mocker: MockerFixture, minimal_config: AppConfig
+) -> None:
     """Test that /providers endpoint returns a grouped list of providers on success."""
+    mocker.patch("app.endpoints.providers.configuration", minimal_config)
+
     provider_list = [
-        {
-            "api": "inference",
-            "provider_id": "openai",
-            "provider_type": "remote::openai",
-        },
-        {
-            "api": "inference",
-            "provider_id": "st",
-            "provider_type": "inline::sentence-transformers",
-        },
-        {
-            "api": "datasetio",
-            "provider_id": "huggingface",
-            "provider_type": "remote::huggingface",
-        },
+        ProviderInfo(
+            api="inference",
+            provider_id="openai",
+            provider_type="remote::openai",
+            config={},
+            health={},
+        ),
+        ProviderInfo(
+            api="inference",
+            provider_id="st",
+            provider_type="inline::sentence-transformers",
+            config={},
+            health={},
+        ),
+        ProviderInfo(
+            api="datasetio",
+            provider_id="huggingface",
+            provider_type="remote::huggingface",
+            config={},
+            health={},
+        ),
     ]
     mock_client = mocker.AsyncMock()
     mock_client.providers.list.return_value = provider_list
@@ -89,13 +107,25 @@ async def test_providers_endpoint_success(mocker: MockerFixture) -> None:
 
 
 @pytest.mark.asyncio
-async def test_get_provider_not_found(mocker: MockerFixture) -> None:
+async def test_get_provider_not_found(
+    mocker: MockerFixture, minimal_config: AppConfig
+) -> None:
     """Test that /providers/{provider_id} endpoint raises HTTP 404 if the provider is not found."""
-    mock_client = mocker.AsyncMock()
-    mock_client.providers.list.return_value = []
-    mocker.patch(
+    mocker.patch("app.endpoints.providers.configuration", minimal_config)
+
+    # Mock AsyncLlamaStackClientHolder to return a client that raises BadRequestError
+    mock_client_holder = mocker.patch(
         "app.endpoints.providers.AsyncLlamaStackClientHolder"
-    ).return_value.get_client.return_value = mock_client
+    )
+    mock_client = mocker.AsyncMock()
+    mock_client.providers.retrieve = mocker.AsyncMock(
+        side_effect=BadRequestError(
+            message="Provider not found",
+            response=mocker.Mock(request=None),
+            body=None,
+        )
+    )  # type: ignore
+    mock_client_holder.return_value.get_client.return_value = mock_client
 
     request = Request(scope={"type": "http"})
 
@@ -109,21 +139,26 @@ async def test_get_provider_not_found(mocker: MockerFixture) -> None:
     assert e.value.status_code == status.HTTP_404_NOT_FOUND
     detail = e.value.detail
     assert isinstance(detail, dict)
-    assert "not found" in detail["response"]
+    assert "not found" in detail["response"]  # type: ignore
+    assert "Provider with ID openai does not exist" in detail["cause"]  # type: ignore
 
 
 @pytest.mark.asyncio
-async def test_get_provider_success(mocker: MockerFixture) -> None:
+async def test_get_provider_success(
+    mocker: MockerFixture, minimal_config: AppConfig
+) -> None:
     """Test that /providers/{provider_id} endpoint returns provider details on success."""
-    provider = {
-        "api": "inference",
-        "provider_id": "openai",
-        "provider_type": "remote::openai",
-        "config": {"api_key": "*****"},
-        "health": {"status": "OK", "message": "Healthy"},
-    }
+    mocker.patch("app.endpoints.providers.configuration", minimal_config)
+
+    provider = ProviderInfo(
+        api="inference",
+        provider_id="openai",
+        provider_type="remote::openai",
+        config={"api_key": "*****"},
+        health={"status": "OK", "message": "Healthy"},
+    )
     mock_client = mocker.AsyncMock()
-    mock_client.providers.list.return_value = [provider]
+    mock_client.providers.retrieve = mocker.AsyncMock(return_value=provider)
     mocker.patch(
         "app.endpoints.providers.AsyncLlamaStackClientHolder"
     ).return_value.get_client.return_value = mock_client
@@ -141,37 +176,16 @@ async def test_get_provider_success(mocker: MockerFixture) -> None:
 
 
 @pytest.mark.asyncio
-async def test_get_provider_connection_error(mocker: MockerFixture) -> None:
+async def test_get_provider_connection_error(
+    mocker: MockerFixture, minimal_config: AppConfig
+) -> None:
     """Test that /providers/{provider_id} raises HTTP 500 if Llama Stack connection fails."""
-    mock_client = mocker.AsyncMock()
-    mock_client.providers.list.side_effect = APIConnectionError(request=None)  # type: ignore
-    mocker.patch(
-        "app.endpoints.providers.AsyncLlamaStackClientHolder"
-    ).return_value.get_client.return_value = mock_client
-
-    request = Request(scope={"type": "http"})
-
-    # Authorization tuple required by URL endpoint handler
-    auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
-
-    with pytest.raises(HTTPException) as e:
-        await get_provider_endpoint_handler(
-            request=request, provider_id="openai", auth=auth
-        )
-    assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-    detail = e.value.detail
-    assert isinstance(detail, dict)
-    assert "Unable to connect to Llama Stack" in detail["response"]
+    mocker.patch("app.endpoints.providers.configuration", minimal_config)
+    mock_authorization_resolvers(mocker)
 
-
-@pytest.mark.asyncio
-async def test_get_provider_unexpected_exception(mocker: MockerFixture) -> None:
-    """Test that /providers/{provider_id} endpoint raises HTTP 500 for unexpected exceptions."""
-    mock_client = mocker.AsyncMock()
-    mock_client.providers.list.side_effect = Exception("boom")
     mocker.patch(
         "app.endpoints.providers.AsyncLlamaStackClientHolder"
-    ).return_value.get_client.return_value = mock_client
+    ).return_value.get_client.side_effect = APIConnectionError(request=mocker.Mock())
 
     request = Request(scope={"type": "http"})
 
@@ -182,7 +196,7 @@ async def test_get_provider_unexpected_exception(mocker: MockerFixture) -> None:
         await get_provider_endpoint_handler(
             request=request, provider_id="openai", auth=auth
         )
-    assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
     detail = e.value.detail
     assert isinstance(detail, dict)
-    assert "Unable to retrieve list of providers" in detail["response"]
+    assert detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py
index c7e6415b..54b46a3c 100644
--- a/tests/unit/app/endpoints/test_query.py
+++ b/tests/unit/app/endpoints/test_query.py
@@ -5,27 +5,24 @@
 # pylint: disable=ungrouped-imports
 
 import json
-
 from typing import Any
+
 import pytest
-from pytest_mock import MockerFixture
 from fastapi import HTTPException, Request, status
 from litellm.exceptions import RateLimitError
-
 from llama_stack_client import APIConnectionError
-from llama_stack_client.types import UserMessage  # type: ignore
+from llama_stack_client.types import UserMessage
 from llama_stack_client.types.agents.turn import Turn
 from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
 from llama_stack_client.types.tool_execution_step import ToolExecutionStep
 from llama_stack_client.types.tool_response import ToolResponse
 from pydantic import AnyUrl
-
-from tests.unit.conftest import AgentFixtures
+from pytest_mock import MockerFixture
 
 from app.endpoints.query import (
     evaluate_model_hints,
-    get_topic_summary,
     get_rag_toolgroups,
+    get_topic_summary,
     is_transcripts_enabled,
     parse_metadata_from_text_item,
     parse_referenced_documents,
@@ -44,8 +41,10 @@
 from tests.unit.app.endpoints.test_streaming_query import (
     SAMPLE_KNOWLEDGE_SEARCH_RESULTS,
 )
-from utils.types import ToolCallSummary, TurnSummary
+from tests.unit.conftest import AgentFixtures
+from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 from utils.token_counter import TokenCounter
+from utils.types import ToolCallSummary, TurnSummary
 
 # User ID must be proper UUID
 MOCK_AUTH = (
@@ -133,12 +132,12 @@ async def test_query_endpoint_handler_configuration_not_loaded(
     mocker: MockerFixture, dummy_request: Request
 ) -> None:
     """Test the query endpoint handler if configuration is not loaded."""
+
+    mock_authorization_resolvers(mocker)
     # simulate state when no configuration is loaded
-    mocker.patch(
-        "app.endpoints.query.configuration",
-        return_value=mocker.Mock(),
-    )
-    mocker.patch("app.endpoints.query.configuration", None)
+    mock_config = AppConfig()
+    mock_config._configuration = None  # pylint: disable=protected-access
+    mocker.patch("app.endpoints.query.configuration", mock_config)
 
     query = "What is OpenStack?"
     query_request = QueryRequest(query=query)
@@ -438,10 +437,11 @@ def test_select_model_and_provider_id_invalid_model(mocker: MockerFixture) -> No
             mock_client.models.list(), query_request.model, query_request.provider
         )
 
-    assert (
-        "Model invalid_model from provider provider1 not found in available models"
-        in str(exc_info.value)
-    )
+    assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+    detail = exc_info.value.detail
+    assert isinstance(detail, dict)
+    assert detail["response"] == "Model not found"
+    assert "invalid_model" in detail["cause"]
 
 
 def test_select_model_and_provider_id_no_available_models(
@@ -459,7 +459,12 @@ def test_select_model_and_provider_id_no_available_models(
             mock_client.models.list(), query_request.model, query_request.provider
         )
 
-    assert "No LLM model found in available models" in str(exc_info.value)
+    assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND
+    detail = exc_info.value.detail
+    assert isinstance(detail, dict)
+    assert detail["response"] == "Model not found"
+    # The cause may vary, but should indicate no model found
+    assert "Model" in detail["cause"]
 
 
 def test_validate_attachments_metadata() -> None:
@@ -493,12 +498,12 @@ def test_validate_attachments_metadata_invalid_type() -> None:
 
     with pytest.raises(HTTPException) as exc_info:
         validate_attachments_metadata(attachments)
-    assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
+    assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_CONTENT
 
     detail = exc_info.value.detail
     assert isinstance(detail, dict)
-    assert detail["response"] == "Unable to process this request"
-    assert "Attachment with improper type invalid_type detected" in detail["cause"]
+    assert detail["response"] == "Invalid attribute value"
+    assert "Invalid attatchment type invalid_type" in detail["cause"]
 
 
 def test_validate_attachments_metadata_invalid_content_type() -> None:
@@ -513,14 +518,13 @@ def test_validate_attachments_metadata_invalid_content_type() -> None:
 
     with pytest.raises(HTTPException) as exc_info:
         validate_attachments_metadata(attachments)
-    assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
+    assert exc_info.value.status_code == status.HTTP_422_UNPROCESSABLE_CONTENT
 
     detail = exc_info.value.detail
     assert isinstance(detail, dict)
-    assert detail["response"] == "Unable to process this request"
+    assert detail["response"] == "Invalid attribute value"
     assert (
-        "Attachment with improper content type text/invalid_content_type detected"
-        in detail["cause"]
+        "Invalid attatchment content type text/invalid_content_type" in detail["cause"]
     )
 
 
@@ -1450,8 +1454,10 @@ async def test_query_endpoint_handler_on_connection_error(
             query_request=query_request, request=dummy_request, auth=MOCK_AUTH
         )
 
-    assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-    assert "Unable to connect to Llama Stack" in str(exc_info.value.detail)
+    assert exc_info.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+    detail = exc_info.value.detail
+    assert isinstance(detail, dict)
+    assert detail["response"] == "Unable to connect to Llama Stack"
     mock_metric.inc.assert_called_once()
 
 
@@ -2303,7 +2309,7 @@ async def test_query_endpoint_quota_exceeded(
     assert exc_info.value.status_code == status.HTTP_429_TOO_MANY_REQUESTS
     detail = exc_info.value.detail
     assert isinstance(detail, dict)
-    assert detail["response"] == "Model quota exceeded"  # type: ignore
+    assert detail["response"] == "The model quota has been exceeded"  # type: ignore
     assert "gpt-4-turbo" in detail["cause"]  # type: ignore
 
 
diff --git a/tests/unit/app/endpoints/test_query_v2.py b/tests/unit/app/endpoints/test_query_v2.py
index 10d46a68..247a4261 100644
--- a/tests/unit/app/endpoints/test_query_v2.py
+++ b/tests/unit/app/endpoints/test_query_v2.py
@@ -2,22 +2,21 @@
 """Unit tests for the /query (v2) REST API endpoint using Responses API."""
 
 from typing import Any
-from litellm.exceptions import RateLimitError
-import pytest
-from pytest_mock import MockerFixture
-from fastapi import HTTPException, status, Request
 
+import pytest
+from fastapi import HTTPException, Request, status
+from litellm.exceptions import RateLimitError
 from llama_stack_client import APIConnectionError
-
-from models.requests import QueryRequest, Attachment
-from models.config import ModelContextProtocolServer
+from pytest_mock import MockerFixture
 
 from app.endpoints.query_v2 import (
-    get_rag_tools,
     get_mcp_tools,
-    retrieve_response,
+    get_rag_tools,
     query_endpoint_handler_v2,
+    retrieve_response,
 )
+from models.config import ModelContextProtocolServer
+from models.requests import Attachment, QueryRequest
 
 # User ID must be proper UUID
 MOCK_AUTH = (
@@ -490,8 +489,10 @@ def _raise(*_args: Any, **_kwargs: Any) -> Exception:
             mcp_headers={},
         )
 
-    assert exc.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-    assert "Unable to connect to Llama Stack" in str(exc.value.detail)
+    assert exc.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+    detail = exc.value.detail
+    assert isinstance(detail, dict)
+    assert detail["response"] == "Unable to connect to Llama Stack"
     fail_metric.inc.assert_called_once()
 
 
@@ -527,7 +528,7 @@ async def test_query_endpoint_quota_exceeded(
     assert exc_info.value.status_code == status.HTTP_429_TOO_MANY_REQUESTS
     detail = exc_info.value.detail
     assert isinstance(detail, dict)
-    assert detail["response"] == "Model quota exceeded"  # type: ignore
+    assert detail["response"] == "The model quota has been exceeded"  # type: ignore
     assert "gpt-4-turbo" in detail["cause"]  # type: ignore
 
 
diff --git a/tests/unit/app/endpoints/test_rags.py b/tests/unit/app/endpoints/test_rags.py
index d7f0766c..f8f3390c 100644
--- a/tests/unit/app/endpoints/test_rags.py
+++ b/tests/unit/app/endpoints/test_rags.py
@@ -1,16 +1,17 @@
 """Unit tests for the /rags REST API endpoints."""
 
 import pytest
-from pytest_mock import MockerFixture
 from fastapi import HTTPException, Request, status
-from llama_stack_client import APIConnectionError
-
-from authentication.interface import AuthTuple
+from llama_stack_client import APIConnectionError, BadRequestError
+from pytest_mock import MockerFixture
 
 from app.endpoints.rags import (
     get_rag_endpoint_handler,
     rags_endpoint_handler,
 )
+from authentication.interface import AuthTuple
+from configuration import AppConfig
+from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
 
 @pytest.mark.asyncio
@@ -18,7 +19,10 @@ async def test_rags_endpoint_configuration_not_loaded(
     mocker: MockerFixture,
 ) -> None:
     """Test that /rags endpoint raises HTTP 500 if configuration is not loaded."""
-    mocker.patch("app.endpoints.rags.configuration", None)
+    mock_authorization_resolvers(mocker)
+    mock_config = AppConfig()
+    mock_config._configuration = None  # pylint: disable=protected-access
+    mocker.patch("app.endpoints.rags.configuration", mock_config)
     request = Request(scope={"type": "http"})
 
     # Authorization tuple required by URL endpoint handler
@@ -30,8 +34,11 @@ async def test_rags_endpoint_configuration_not_loaded(
 
 
 @pytest.mark.asyncio
-async def test_rags_endpoint_connection_error(mocker: MockerFixture) -> None:
-    """Test that /rags endpoint raises HTTP 500 if Llama Stack connection fails."""
+async def test_rags_endpoint_connection_error(
+    mocker: MockerFixture, minimal_config: AppConfig
+) -> None:
+    """Test that /rags endpoint raises HTTP 503 if Llama Stack connection fails."""
+    mocker.patch("app.endpoints.rags.configuration", minimal_config)
     mock_client = mocker.AsyncMock()
     mock_client.vector_stores.list.side_effect = APIConnectionError(request=None)  # type: ignore
     mocker.patch(
@@ -45,7 +52,7 @@ async def test_rags_endpoint_connection_error(mocker: MockerFixture) -> None:
 
     with pytest.raises(HTTPException) as e:
         await rags_endpoint_handler(request=request, auth=auth)
-    assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
     detail = e.value.detail
     assert isinstance(detail, dict)
     assert "response" in detail
@@ -53,31 +60,11 @@ async def test_rags_endpoint_connection_error(mocker: MockerFixture) -> None:
 
 
 @pytest.mark.asyncio
-async def test_rags_endpoint_unable_to_retrieve_list(mocker: MockerFixture) -> None:
-    """Test that /rags endpoint raises HTTP 500 if Llama Stack connection fails."""
-    mock_client = mocker.AsyncMock()
-    mock_client.vector_stores.list.side_effect = []  # type: ignore
-    mocker.patch(
-        "app.endpoints.rags.AsyncLlamaStackClientHolder"
-    ).return_value.get_client.return_value = mock_client
-
-    request = Request(scope={"type": "http"})
-
-    # Authorization tuple required by URL endpoint handler
-    auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
-
-    with pytest.raises(HTTPException) as e:
-        await rags_endpoint_handler(request=request, auth=auth)
-    assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-    detail = e.value.detail
-    assert isinstance(detail, dict)
-    assert "response" in detail
-    assert "Unable to retrieve list of RAGs" in detail["response"]
-
-
-@pytest.mark.asyncio
-async def test_rags_endpoint_success(mocker: MockerFixture) -> None:
+async def test_rags_endpoint_success(
+    mocker: MockerFixture, minimal_config: AppConfig
+) -> None:
     """Test that /rags endpoint returns list of RAG IDs."""
+    mocker.patch("app.endpoints.rags.configuration", minimal_config)
 
     # pylint: disable=R0903
     class RagInfo:
@@ -116,7 +103,10 @@ async def test_rag_info_endpoint_configuration_not_loaded(
     mocker: MockerFixture,
 ) -> None:
     """Test that /rags/{rag_id} endpoint raises HTTP 500 if configuration is not loaded."""
-    mocker.patch("app.endpoints.rags.configuration", None)
+    mock_authorization_resolvers(mocker)
+    mock_config = AppConfig()
+    mock_config._configuration = None  # pylint: disable=protected-access
+    mocker.patch("app.endpoints.rags.configuration", mock_config)
     request = Request(scope={"type": "http"})
 
     # Authorization tuple required by URL endpoint handler
@@ -128,11 +118,18 @@ async def test_rag_info_endpoint_configuration_not_loaded(
 
 
 @pytest.mark.asyncio
-async def test_rag_info_endpoint_rag_not_found(mocker: MockerFixture) -> None:
+async def test_rag_info_endpoint_rag_not_found(
+    mocker: MockerFixture, minimal_config: AppConfig
+) -> None:
     """Test that /rags/{rag_id} endpoint returns HTTP 404 when the requested RAG is not found."""
+    mocker.patch("app.endpoints.rags.configuration", minimal_config)
     mock_client = mocker.AsyncMock()
-    mock_client.vector_stores.retrieve.side_effect = HTTPException(
-        status_code=status.HTTP_404_NOT_FOUND
+    mock_client.vector_stores.retrieve = mocker.AsyncMock(
+        side_effect=BadRequestError(
+            message="RAG not found",
+            response=mocker.Mock(request=None),
+            body=None,
+        )
     )  # type: ignore
     mocker.patch(
         "app.endpoints.rags.AsyncLlamaStackClientHolder"
@@ -146,11 +143,18 @@ async def test_rag_info_endpoint_rag_not_found(mocker: MockerFixture) -> None:
     with pytest.raises(HTTPException) as e:
         await get_rag_endpoint_handler(request=request, auth=auth, rag_id="xyzzy")
     assert e.value.status_code == status.HTTP_404_NOT_FOUND
+    detail = e.value.detail
+    assert isinstance(detail, dict)
+    assert "response" in detail
+    assert "Rag not found" in detail["response"]
 
 
 @pytest.mark.asyncio
-async def test_rag_info_endpoint_connection_error(mocker: MockerFixture) -> None:
-    """Test that /rags/{rag_id} endpoint raises HTTP 500 if Llama Stack connection fails."""
+async def test_rag_info_endpoint_connection_error(
+    mocker: MockerFixture, minimal_config: AppConfig
+) -> None:
+    """Test that /rags/{rag_id} endpoint raises HTTP 503 if Llama Stack connection fails."""
+    mocker.patch("app.endpoints.rags.configuration", minimal_config)
     mock_client = mocker.AsyncMock()
     mock_client.vector_stores.retrieve.side_effect = APIConnectionError(
         request=None  # type: ignore
@@ -166,7 +170,7 @@ async def test_rag_info_endpoint_connection_error(mocker: MockerFixture) -> None
 
     with pytest.raises(HTTPException) as e:
         await get_rag_endpoint_handler(request=request, auth=auth, rag_id="xyzzy")
-    assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
     detail = e.value.detail
     assert isinstance(detail, dict)
     assert "response" in detail
@@ -174,31 +178,11 @@ async def test_rag_info_endpoint_connection_error(mocker: MockerFixture) -> None
 
 
 @pytest.mark.asyncio
-async def test_rag_info_endpoint_unable_to_retrieve_list(mocker: MockerFixture) -> None:
-    """Test that /rags/{rag_id} endpoint raises HTTP 500 if Llama Stack connection fails."""
-    mock_client = mocker.AsyncMock()
-    mock_client.vector_stores.retrieve.side_effect = []  # type: ignore
-    mocker.patch(
-        "app.endpoints.rags.AsyncLlamaStackClientHolder"
-    ).return_value.get_client.return_value = mock_client
-
-    request = Request(scope={"type": "http"})
-
-    # Authorization tuple required by URL endpoint handler
-    auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
-
-    with pytest.raises(HTTPException) as e:
-        await get_rag_endpoint_handler(request=request, auth=auth, rag_id="xyzzy")
-    assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-    detail = e.value.detail
-    assert isinstance(detail, dict)
-    assert "response" in detail
-    assert "Unable to retrieve info about RAG" in detail["response"]
-
-
-@pytest.mark.asyncio
-async def test_rag_info_endpoint_success(mocker: MockerFixture) -> None:
+async def test_rag_info_endpoint_success(
+    mocker: MockerFixture, minimal_config: AppConfig
+) -> None:
     """Test that /rags/{rag_id} endpoint returns information about selected RAG."""
+    mocker.patch("app.endpoints.rags.configuration", minimal_config)
 
     # pylint: disable=R0902
     # pylint: disable=R0903
diff --git a/tests/unit/app/endpoints/test_shields.py b/tests/unit/app/endpoints/test_shields.py
index 03bb3008..c904fa9a 100644
--- a/tests/unit/app/endpoints/test_shields.py
+++ b/tests/unit/app/endpoints/test_shields.py
@@ -1,16 +1,16 @@
 """Unit tests for the /shields REST API endpoint."""
 
 from typing import Any
+
 import pytest
-from pytest_mock import MockerFixture
 from fastapi import HTTPException, Request, status
-
 from llama_stack_client import APIConnectionError
-
-from authentication.interface import AuthTuple
+from pytest_mock import MockerFixture
 
 from app.endpoints.shields import shields_endpoint_handler
+from authentication.interface import AuthTuple
 from configuration import AppConfig
+from models.responses import ShieldsResponse
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 
 
@@ -22,11 +22,8 @@ async def test_shields_endpoint_handler_configuration_not_loaded(
     mock_authorization_resolvers(mocker)
 
     # simulate state when no configuration is loaded
-    mocker.patch(
-        "app.endpoints.shields.configuration",
-        return_value=mocker.Mock(),
-    )
-    mocker.patch("app.endpoints.shields.configuration", None)
+    mock_config = AppConfig()
+    mocker.patch("app.endpoints.shields.configuration", mock_config)
 
     request = Request(
         scope={
@@ -41,7 +38,7 @@ async def test_shields_endpoint_handler_configuration_not_loaded(
     with pytest.raises(HTTPException) as e:
         await shields_endpoint_handler(request=request, auth=auth)
         assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-        assert e.detail["response"] == "Configuration is not loaded"
+        assert e.value.detail["response"] == "Configuration is not loaded"  # type: ignore
 
 
 @pytest.mark.asyncio
@@ -78,10 +75,13 @@ async def test_shields_endpoint_handler_improper_llama_stack_configuration(
     cfg = AppConfig()
     cfg.init_from_dict(config_dict)
 
-    mocker.patch(
-        "app.endpoints.shields.configuration",
-        return_value=None,
+    mocker.patch("app.endpoints.shields.configuration", cfg)
+    # Mock client to avoid initialization
+    mock_client_holder = mocker.patch(
+        "app.endpoints.shields.AsyncLlamaStackClientHolder"
     )
+    mock_client = mocker.AsyncMock()
+    mock_client_holder.return_value.get_client.return_value = mock_client
 
     request = Request(
         scope={
@@ -93,10 +93,12 @@ async def test_shields_endpoint_handler_improper_llama_stack_configuration(
     # Authorization tuple required by URL endpoint handler
     auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
 
-    with pytest.raises(HTTPException) as e:
-        await shields_endpoint_handler(request=request, auth=auth)
-        assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-        assert e.detail["response"] == "Llama stack is not configured"
+    # Mock shields.list to return empty list
+    mock_client.shields.list.return_value = []
+
+    response = await shields_endpoint_handler(request=request, auth=auth)
+    assert isinstance(response, ShieldsResponse)
+    assert response.shields == []
 
 
 @pytest.mark.asyncio
@@ -132,6 +134,15 @@ async def test_shields_endpoint_handler_configuration_loaded(
     cfg = AppConfig()
     cfg.init_from_dict(config_dict)
 
+    mocker.patch("app.endpoints.shields.configuration", cfg)
+    # Mock client to raise APIConnectionError
+    mock_client_holder = mocker.patch(
+        "app.endpoints.shields.AsyncLlamaStackClientHolder"
+    )
+    mock_client = mocker.AsyncMock()
+    mock_client.shields.list.side_effect = APIConnectionError(request=None)  # type: ignore
+    mock_client_holder.return_value.get_client.return_value = mock_client
+
     request = Request(
         scope={
             "type": "http",
@@ -144,8 +155,8 @@ async def test_shields_endpoint_handler_configuration_loaded(
 
     with pytest.raises(HTTPException) as e:
         await shields_endpoint_handler(request=request, auth=auth)
-        assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-        assert e.detail["response"] == "Unable to connect to Llama Stack"
+    assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+    assert e.value.detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
 
 
 @pytest.mark.asyncio
@@ -258,8 +269,9 @@ async def test_shields_endpoint_llama_stack_connection_error(
 
     with pytest.raises(HTTPException) as e:
         await shields_endpoint_handler(request=request, auth=auth)
-        assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-        assert e.detail["response"] == "Unable to connect to Llama Stack"
+        assert e.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
+        assert e.value.detail["response"] == "Service unavailable"  # type: ignore
+        assert "Unable to connect to Llama Stack" in e.value.detail["cause"]  # type: ignore
 
 
 @pytest.mark.asyncio
@@ -337,63 +349,3 @@ async def test_shields_endpoint_handler_success_with_shields_data(
     assert len(response.shields) == 2
     assert response.shields[0]["identifier"] == "lightspeed_question_validity-shield"
     assert response.shields[1]["identifier"] == "content_filter-shield"
-
-
-@pytest.mark.asyncio
-async def test_shields_endpoint_handler_general_exception(
-    mocker: MockerFixture,
-) -> None:
-    """Test the shields endpoint handler when a general exception occurs."""
-    mock_authorization_resolvers(mocker)
-
-    # configuration for tests
-    config_dict: dict[str, Any] = {
-        "name": "foo",
-        "service": {
-            "host": "localhost",
-            "port": 8080,
-            "auth_enabled": False,
-            "workers": 1,
-            "color_log": True,
-            "access_log": True,
-        },
-        "llama_stack": {
-            "api_key": "xyzzy",
-            "url": "http://x.y.com:1234",
-            "use_as_library_client": False,
-        },
-        "user_data_collection": {
-            "feedback_enabled": False,
-        },
-        "customization": None,
-        "authorization": {"access_rules": []},
-        "authentication": {"module": "noop"},
-    }
-    cfg = AppConfig()
-    cfg.init_from_dict(config_dict)
-
-    # Mock the LlamaStack client to raise a general exception
-    mock_client = mocker.AsyncMock()
-    mock_client.shields.list.side_effect = Exception("General error")
-    mock_client_holder = mocker.patch(
-        "app.endpoints.shields.AsyncLlamaStackClientHolder"
-    )
-    mock_client_holder.return_value.get_client.return_value = mock_client
-    mock_config = mocker.Mock()
-    mocker.patch("app.endpoints.shields.configuration", mock_config)
-
-    request = Request(
-        scope={
-            "type": "http",
-            "headers": [(b"authorization", b"Bearer invalid-token")],
-        }
-    )
-
-    # Authorization tuple required by URL endpoint handler
-    auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
-
-    with pytest.raises(HTTPException) as e:
-        await shields_endpoint_handler(request=request, auth=auth)
-        assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-        assert e.detail["response"] == "Unable to retrieve list of shields"
-        assert e.detail["cause"] == "General error"
diff --git a/tests/unit/app/endpoints/test_streaming_query.py b/tests/unit/app/endpoints/test_streaming_query.py
index 021a718b..f2ce836c 100644
--- a/tests/unit/app/endpoints/test_streaming_query.py
+++ b/tests/unit/app/endpoints/test_streaming_query.py
@@ -1,65 +1,60 @@
 """Unit tests for the /streaming-query REST API endpoint."""
 
-from datetime import datetime
-
 # pylint: disable=too-many-lines
-
 import json
+from datetime import datetime
 
-from litellm.exceptions import RateLimitError
 import pytest
-from pytest_mock import MockerFixture
-
 from fastapi import HTTPException, Request, status
 from fastapi.responses import StreamingResponse
-
+from litellm.exceptions import RateLimitError
 from llama_stack_client import APIConnectionError
 from llama_stack_client.types import UserMessage  # type: ignore
 from llama_stack_client.types.agents import Turn
-from llama_stack_client.types.shared.completion_message import CompletionMessage
-from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
-from llama_stack_client.types.shared.safety_violation import SafetyViolation
-from llama_stack_client.types.shield_call_step import ShieldCallStep
-from llama_stack_client.types.shared.tool_call import ToolCall
-from llama_stack_client.types.shared.content_delta import TextDelta, ToolCallDelta
-from llama_stack_client.types.agents.turn_response_event import TurnResponseEvent
 from llama_stack_client.types.agents.agent_turn_response_stream_chunk import (
     AgentTurnResponseStreamChunk,
 )
+from llama_stack_client.types.agents.turn_response_event import TurnResponseEvent
 from llama_stack_client.types.agents.turn_response_event_payload import (
-    AgentTurnResponseStepProgressPayload,
     AgentTurnResponseStepCompletePayload,
-    AgentTurnResponseTurnStartPayload,
+    AgentTurnResponseStepProgressPayload,
     AgentTurnResponseTurnAwaitingInputPayload,
     AgentTurnResponseTurnCompletePayload,
+    AgentTurnResponseTurnStartPayload,
 )
+from llama_stack_client.types.shared.completion_message import CompletionMessage
+from llama_stack_client.types.shared.content_delta import TextDelta, ToolCallDelta
+from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
+from llama_stack_client.types.shared.safety_violation import SafetyViolation
+from llama_stack_client.types.shared.tool_call import ToolCall
+from llama_stack_client.types.shield_call_step import ShieldCallStep
 from llama_stack_client.types.tool_execution_step import ToolExecutionStep
 from llama_stack_client.types.tool_response import ToolResponse
+from pytest_mock import MockerFixture
 
-from configuration import AppConfig
 from app.endpoints.query import get_rag_toolgroups
 from app.endpoints.streaming_query import (
-    streaming_query_endpoint_handler,
-    retrieve_response,
-    stream_build_event,
-    stream_event,
-    stream_end_event,
-    prompt_too_long_error,
-    generic_llm_error,
     LLM_TOKEN_EVENT,
     LLM_TOOL_CALL_EVENT,
     LLM_TOOL_RESULT_EVENT,
+    generic_llm_error,
+    prompt_too_long_error,
+    retrieve_response,
+    stream_build_event,
+    stream_end_event,
+    stream_event,
+    streaming_query_endpoint_handler,
 )
-
 from authorization.resolvers import NoopRolesResolver
+from configuration import AppConfig
 from constants import MEDIA_TYPE_JSON, MEDIA_TYPE_TEXT
-from models.config import ModelContextProtocolServer, Action
-from models.requests import QueryRequest, Attachment
+from models.config import Action, ModelContextProtocolServer
+from models.requests import Attachment, QueryRequest
+from tests.unit.conftest import AgentFixtures
+from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 from utils.token_counter import TokenCounter
 from utils.types import TurnSummary
 
-from tests.unit.conftest import AgentFixtures
-
 MOCK_AUTH = (
     "017adfa4-7cc6-46e4-b663-3653e1ae69df",
     "mock_username",
@@ -155,14 +150,13 @@ async def test_streaming_query_endpoint_handler_configuration_not_loaded(
 ) -> None:
     """Test the streaming query endpoint handler if configuration is not loaded."""
     # simulate state when no configuration is loaded
-    mocker.patch(
-        "app.endpoints.streaming_query.configuration",
-        return_value=mocker.Mock(),
-    )
-    mocker.patch("app.endpoints.streaming_query.configuration", None)
+    mock_config = AppConfig()
+    mocker.patch("app.endpoints.streaming_query.configuration", mock_config)
+    # Mock authorization resolvers to avoid accessing configuration properties
+    mock_authorization_resolvers(mocker)
 
     query = "What is OpenStack?"
-    query_request = QueryRequest(query=query)
+    query_request = QueryRequest(query=query)  # type: ignore
 
     request = Request(
         scope={
@@ -172,8 +166,8 @@ async def test_streaming_query_endpoint_handler_configuration_not_loaded(
     # await the async function
     with pytest.raises(HTTPException) as e:
         await streaming_query_endpoint_handler(request, query_request, auth=MOCK_AUTH)
-        assert e.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
-        assert e.detail["response"] == "Configuration is not loaded"
+        assert e.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+        assert e.value.detail["response"] == "Configuration is not loaded"  # type: ignore
 
 
 @pytest.mark.asyncio
@@ -188,11 +182,11 @@ async def test_streaming_query_endpoint_on_connection_error(
     )
 
     query = "What is OpenStack?"
-    query_request = QueryRequest(query=query)
+    query_request = QueryRequest(query=query)  # type: ignore
 
     # simulate situation when it is not possible to connect to Llama Stack
     mock_client = mocker.AsyncMock()
-    mock_client.models.side_effect = APIConnectionError(request=query_request)
+    mock_client.models.side_effect = APIConnectionError(request=query_request)  # type: ignore
     mock_lsc = mocker.patch("client.AsyncLlamaStackClientHolder.get_client")
     mock_lsc.return_value = mock_client
     mock_async_lsc = mocker.patch("client.AsyncLlamaStackClientHolder.get_client")
@@ -319,7 +313,7 @@ async def _test_streaming_query_endpoint_handler(mocker: MockerFixture) -> None:
 
     mock_database_operations(mocker)
 
-    query_request = QueryRequest(query=query)
+    query_request = QueryRequest(query=query)  # type: ignore
 
     request = Request(
         scope={
@@ -1756,7 +1750,7 @@ async def test_query_endpoint_quota_exceeded(mocker: MockerFixture) -> None:
     assert exc_info.value.status_code == status.HTTP_429_TOO_MANY_REQUESTS
     detail = exc_info.value.detail
     assert isinstance(detail, dict)
-    assert detail["response"] == "Model quota exceeded"  # type: ignore
+    assert detail["response"] == "The model quota has been exceeded"  # type: ignore
     assert "gpt-4-turbo" in detail["cause"]  # type: ignore
 
 
diff --git a/tests/unit/app/endpoints/test_streaming_query_v2.py b/tests/unit/app/endpoints/test_streaming_query_v2.py
index 450b4cec..461bc515 100644
--- a/tests/unit/app/endpoints/test_streaming_query_v2.py
+++ b/tests/unit/app/endpoints/test_streaming_query_v2.py
@@ -3,20 +3,19 @@
 
 from types import SimpleNamespace
 from typing import Any, AsyncIterator
+
 import pytest
-from pytest_mock import MockerFixture
-from fastapi import HTTPException, status, Request
+from fastapi import HTTPException, Request, status
 from fastapi.responses import StreamingResponse
-
 from llama_stack_client import APIConnectionError
-
-from models.requests import QueryRequest
-from models.config import Action, ModelContextProtocolServer
+from pytest_mock import MockerFixture
 
 from app.endpoints.streaming_query_v2 import (
     retrieve_response,
     streaming_query_endpoint_handler_v2,
 )
+from models.config import Action, ModelContextProtocolServer
+from models.requests import QueryRequest
 
 
 @pytest.fixture
@@ -223,7 +222,7 @@ def _raise(*_a: Any, **_k: Any) -> None:
             mcp_headers={},
         )
 
-    assert exc.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+    assert exc.value.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
     assert "Unable to connect to Llama Stack" in str(exc.value.detail)
     fail_metric.inc.assert_called_once()
 
diff --git a/tests/unit/app/endpoints/test_tools.py b/tests/unit/app/endpoints/test_tools.py
index 8f8b59c4..559a9550 100644
--- a/tests/unit/app/endpoints/test_tools.py
+++ b/tests/unit/app/endpoints/test_tools.py
@@ -1,23 +1,24 @@
+# pylint: disable=protected-access
+
 """Unit tests for tools endpoint."""
 
 import pytest
-from pytest_mock import MockerFixture, MockType
 from fastapi import HTTPException
-
-from llama_stack_client import APIConnectionError
-
-from authentication.interface import AuthTuple
+from llama_stack_client import APIConnectionError, BadRequestError
+from pytest_mock import MockerFixture, MockType
 
 # Import the function directly to bypass decorators
 from app.endpoints import tools
-from models.responses import ToolsResponse
+from authentication.interface import AuthTuple
+from configuration import AppConfig
 from models.config import (
     Configuration,
-    ServiceConfiguration,
     LlamaStackConfiguration,
-    UserDataCollection,
     ModelContextProtocolServer,
+    ServiceConfiguration,
+    UserDataCollection,
 )
+from models.responses import ToolsResponse
 
 # Shared mock auth tuple with 4 fields as expected by the application
 MOCK_AUTH: AuthTuple = ("mock_user_id", "mock_username", False, "mock_token")
@@ -43,7 +44,7 @@ def mock_configuration() -> Configuration:
                 url="http://localhost:3001",
             ),
         ],
-    )
+    )  # type: ignore
 
 
 @pytest.fixture
@@ -110,8 +111,10 @@ async def test_tools_endpoint_success(
     mock_tools_response: list[MockType],  # pylint: disable=redefined-outer-name
 ) -> None:
     """Test successful tools endpoint response."""
-    # Mock configuration
-    mocker.patch("app.endpoints.tools.configuration", mock_configuration)
+    # Mock configuration - wrap in AppConfig
+    app_config = AppConfig()
+    app_config._configuration = mock_configuration
+    mocker.patch("app.endpoints.tools.configuration", app_config)
 
     # Mock authorization decorator to bypass i
     mocker.patch("app.endpoints.tools.authorize", lambda action: lambda func: func)
@@ -168,7 +171,7 @@ async def test_tools_endpoint_success(
 @pytest.mark.asyncio
 async def test_tools_endpoint_no_mcp_servers(mocker: MockerFixture) -> None:
     """Test tools endpoint with no MCP servers configured."""
-    # Mock configuration with no MCP servers
+    # Mock configuration with no MCP servers - wrap in AppConfig
     mock_config = Configuration(
         name="test",
         service=ServiceConfiguration(),
@@ -176,7 +179,9 @@ async def test_tools_endpoint_no_mcp_servers(mocker: MockerFixture) -> None:
         user_data_collection=UserDataCollection(feedback_enabled=False),
         mcp_servers=[],
     )
-    mocker.patch("app.endpoints.tools.configuration", mock_config)
+    app_config = AppConfig()
+    app_config._configuration = mock_config
+    mocker.patch("app.endpoints.tools.configuration", app_config)
 
     # Mock authorization decorator to bypass i
     mocker.patch("app.endpoints.tools.authorize", lambda action: lambda func: func)
@@ -194,7 +199,9 @@ async def test_tools_endpoint_no_mcp_servers(mocker: MockerFixture) -> None:
     mock_auth = MOCK_AUTH
 
     # Call the endpoint
-    response = await tools.tools_endpoint_handler.__wrapped__(mock_request, mock_auth)
+    response = await tools.tools_endpoint_handler.__wrapped__(
+        mock_request, mock_auth
+    )  # type: ignore
 
     # Verify response
     assert isinstance(response, ToolsResponse)
@@ -207,8 +214,10 @@ async def test_tools_endpoint_api_connection_error(
     mock_configuration: Configuration,  # pylint: disable=redefined-outer-name
 ) -> None:
     """Test tools endpoint with API connection error from individual servers."""
-    # Mock configuration
-    mocker.patch("app.endpoints.tools.configuration", mock_configuration)
+    # Mock configuration - wrap in AppConfig
+    app_config = AppConfig()
+    app_config._configuration = mock_configuration
+    mocker.patch("app.endpoints.tools.configuration", app_config)
 
     # Mock authorization decorator to bypass i
     mocker.patch("app.endpoints.tools.authorize", lambda action: lambda func: func)
@@ -233,23 +242,63 @@ async def test_tools_endpoint_api_connection_error(
     mock_request = mocker.Mock()
     mock_auth = MOCK_AUTH
 
-    # Call the endpointt - should not raise exception but return empty tools
-    response = await tools.tools_endpoint_handler.__wrapped__(mock_request, mock_auth)
+    # Call the endpoint - should raise HTTPException when APIConnectionError occurs
+    with pytest.raises(HTTPException) as exc_info:
+        await tools.tools_endpoint_handler.__wrapped__(mock_request, mock_auth)
 
-    # Verify response - should be empty since all servers failed
-    assert isinstance(response, ToolsResponse)
-    assert len(response.tools) == 0
+    assert exc_info.value.status_code == 503
+    detail = exc_info.value.detail
+    assert isinstance(detail, dict)
+    assert detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
 
 
 @pytest.mark.asyncio
 async def test_tools_endpoint_partial_failure(  # pylint: disable=redefined-outer-name
     mocker: MockerFixture,
     mock_configuration: Configuration,
+) -> None:
+    """Test tools endpoint with one MCP server failing with APIConnectionError."""
+    app_config = AppConfig()
+    app_config._configuration = mock_configuration
+    mocker.patch("app.endpoints.tools.configuration", app_config)
+
+    mocker.patch("app.endpoints.tools.authorize", lambda action: lambda func: func)
+    mock_client_holder = mocker.patch("app.endpoints.tools.AsyncLlamaStackClientHolder")
+    mock_client = mocker.AsyncMock()
+    mock_client_holder.return_value.get_client.return_value = mock_client
+
+    mock_toolgroup1 = mocker.Mock()
+    mock_toolgroup1.identifier = "filesystem-tools"
+    mock_toolgroup2 = mocker.Mock()
+    mock_toolgroup2.identifier = "git-tools"
+    mock_client.toolgroups.list.return_value = [mock_toolgroup1, mock_toolgroup2]
+
+    api_error = APIConnectionError(request=mocker.Mock())
+    mock_client.tools.list.side_effect = api_error
+
+    mock_request = mocker.Mock()
+    mock_auth = MOCK_AUTH
+
+    with pytest.raises(HTTPException) as exc_info:
+        await tools.tools_endpoint_handler.__wrapped__(mock_request, mock_auth)
+
+    assert exc_info.value.status_code == 503
+    detail = exc_info.value.detail
+    assert isinstance(detail, dict)
+    assert detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
+
+
+@pytest.mark.asyncio
+async def test_tools_endpoint_toolgroup_not_found(  # pylint: disable=redefined-outer-name
+    mocker: MockerFixture,
+    mock_configuration: Configuration,
     mock_tools_response: list[MockType],
 ) -> None:
-    """Test tools endpoint with one MCP server failing."""
-    # Mock configuration
-    mocker.patch("app.endpoints.tools.configuration", mock_configuration)
+    """Test tools endpoint when a toolgroup is not found (BadRequestError)."""
+    # Mock configuration - wrap in AppConfig
+    app_config = AppConfig()
+    app_config._configuration = mock_configuration
+    mocker.patch("app.endpoints.tools.configuration", app_config)
 
     # Mock authorization decorator to bypass i
     mocker.patch("app.endpoints.tools.authorize", lambda action: lambda func: func)
@@ -266,27 +315,34 @@ async def test_tools_endpoint_partial_failure(  # pylint: disable=redefined-oute
     mock_toolgroup2.identifier = "git-tools"
     mock_client.toolgroups.list.return_value = [mock_toolgroup1, mock_toolgroup2]
 
-    # Mock tools.list responses - first succeeds, second fails
+    # Mock tools.list responses - first succeeds, second raises BadRequestError
+    bad_request_error = BadRequestError(
+        message="Toolgroup not found",
+        response=mocker.Mock(request=None),
+        body=None,
+    )
     mock_client.tools.list.side_effect = [
         [mock_tools_response[0]],  # filesystem-tools response
-        Exception("Server unavailable"),  # git-tools fails
+        bad_request_error,  # git-tools not found
     ]
 
     # Mock request and auth
     mock_request = mocker.Mock()
     mock_auth = MOCK_AUTH
 
-    # Call the endpoint
+    # Call the endpoint - should continue processing and return tools from successful toolgroups
     response = await tools.tools_endpoint_handler.__wrapped__(mock_request, mock_auth)
 
-    # Verify response - should have only one tool from the successful server
+    # Verify response - should have only one tool from the first successful toolgroup
     assert isinstance(response, ToolsResponse)
     assert len(response.tools) == 1
     assert response.tools[0]["identifier"] == "filesystem_read"
     assert response.tools[0]["server_source"] == "http://localhost:3000"
 
-    # Verify both servers were attempted
+    # Verify that tools.list was called for both toolgroups
     assert mock_client.tools.list.call_count == 2
+    mock_client.tools.list.assert_any_call(toolgroup_id="filesystem-tools")
+    mock_client.tools.list.assert_any_call(toolgroup_id="git-tools")
 
 
 @pytest.mark.asyncio
@@ -295,8 +351,10 @@ async def test_tools_endpoint_builtin_toolgroup(
     mock_configuration: Configuration,  # pylint: disable=redefined-outer-name
 ) -> None:
     """Test tools endpoint with built-in toolgroups."""
-    # Mock configuration
-    mocker.patch("app.endpoints.tools.configuration", mock_configuration)
+    # Mock configuration - wrap in AppConfig
+    app_config = AppConfig()
+    app_config._configuration = mock_configuration
+    mocker.patch("app.endpoints.tools.configuration", app_config)
 
     # Mock authorization decorator to bypass i
     mocker.patch("app.endpoints.tools.authorize", lambda action: lambda func: func)
@@ -347,7 +405,7 @@ async def test_tools_endpoint_builtin_toolgroup(
 @pytest.mark.asyncio
 async def test_tools_endpoint_mixed_toolgroups(mocker: MockerFixture) -> None:
     """Test tools endpoint with both MCP and built-in toolgroups."""
-    # Mock configuration with MCP servers
+    # Mock configuration with MCP servers - wrap in AppConfig
     mock_config = Configuration(
         name="test",
         service=ServiceConfiguration(),
@@ -361,7 +419,9 @@ async def test_tools_endpoint_mixed_toolgroups(mocker: MockerFixture) -> None:
             ),
         ],
     )
-    mocker.patch("app.endpoints.tools.configuration", mock_config)
+    app_config = AppConfig()
+    app_config._configuration = mock_config
+    mocker.patch("app.endpoints.tools.configuration", app_config)
 
     # Mock authorization decorator to bypass i
     mocker.patch("app.endpoints.tools.authorize", lambda action: lambda func: func)
@@ -438,8 +498,10 @@ async def test_tools_endpoint_value_attribute_error(
     mock_configuration: Configuration,  # pylint: disable=redefined-outer-name
 ) -> None:
     """Test tools endpoint with ValueError/AttributeError in toolgroups.list."""
-    # Mock configuration
-    mocker.patch("app.endpoints.tools.configuration", mock_configuration)
+    # Mock configuration - wrap in AppConfig
+    app_config = AppConfig()
+    app_config._configuration = mock_configuration
+    mocker.patch("app.endpoints.tools.configuration", app_config)
 
     # Mock authorization decorator to bypass i
     mocker.patch("app.endpoints.tools.authorize", lambda action: lambda func: func)
@@ -456,12 +518,9 @@ async def test_tools_endpoint_value_attribute_error(
     mock_request = mocker.Mock()
     mock_auth = MOCK_AUTH
 
-    # Call the endpointt - should not raise exception but return empty tools
-    response = await tools.tools_endpoint_handler.__wrapped__(mock_request, mock_auth)
-
-    # Verify response - should be empty since toolgroups.list failed
-    assert isinstance(response, ToolsResponse)
-    assert len(response.tools) == 0
+    # Call the endpointt - should raise exception since toolgroups.list failed
+    with pytest.raises(ValueError, match="Invalid response format"):
+        await tools.tools_endpoint_handler.__wrapped__(mock_request, mock_auth)  # type: ignore
 
 
 @pytest.mark.asyncio
@@ -469,8 +528,10 @@ async def test_tools_endpoint_apiconnection_error_toolgroups(  # pylint: disable
     mocker: MockerFixture, mock_configuration: Configuration
 ) -> None:
     """Test tools endpoint with APIConnectionError in toolgroups.list."""
-    # Mock configuration
-    mocker.patch("app.endpoints.tools.configuration", mock_configuration)
+    # Mock configuration - wrap in AppConfig
+    app_config = AppConfig()
+    app_config._configuration = mock_configuration
+    mocker.patch("app.endpoints.tools.configuration", app_config)
 
     # Mock authorization decorator to bypass i
     mocker.patch("app.endpoints.tools.authorize", lambda action: lambda func: func)
@@ -490,13 +551,13 @@ async def test_tools_endpoint_apiconnection_error_toolgroups(  # pylint: disable
 
     # Call the endpointt and expect HTTPException
     with pytest.raises(HTTPException) as exc_info:
-        await tools.tools_endpoint_handler.__wrapped__(mock_request, mock_auth)
+        await tools.tools_endpoint_handler.__wrapped__(mock_request, mock_auth)  # type: ignore
 
-    assert exc_info.value.status_code == 500
+    assert exc_info.value.status_code == 503
 
     detail = exc_info.value.detail
     assert isinstance(detail, dict)
-    assert "Unable to connect to Llama Stack" in detail["response"]
+    assert detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
 
 
 @pytest.mark.asyncio
@@ -504,15 +565,17 @@ async def test_tools_endpoint_client_holder_apiconnection_error(  # pylint: disa
     mocker: MockerFixture, mock_configuration: Configuration
 ) -> None:
     """Test tools endpoint with APIConnectionError in client holder."""
-    # Mock configuration
-    mocker.patch("app.endpoints.tools.configuration", mock_configuration)
+    # Mock configuration - wrap in AppConfig
+    app_config = AppConfig()
+    app_config._configuration = mock_configuration
+    mocker.patch("app.endpoints.tools.configuration", app_config)
 
     # Mock authorization decorator to bypass i
     mocker.patch("app.endpoints.tools.authorize", lambda action: lambda func: func)
 
     # Mock client holder to raise APIConnectionError
     mock_client_holder = mocker.patch("app.endpoints.tools.AsyncLlamaStackClientHolder")
-    api_error = APIConnectionError(request=mocker.Mock())
+    api_error = APIConnectionError(request=None)  # type: ignore
     mock_client_holder.return_value.get_client.side_effect = api_error
 
     # Mock request and auth
@@ -521,13 +584,13 @@ async def test_tools_endpoint_client_holder_apiconnection_error(  # pylint: disa
 
     # Call the endpointt and expect HTTPException
     with pytest.raises(HTTPException) as exc_info:
-        await tools.tools_endpoint_handler.__wrapped__(mock_request, mock_auth)
+        await tools.tools_endpoint_handler.__wrapped__(mock_request, mock_auth)  # type: ignore
 
-    assert exc_info.value.status_code == 500
+    assert exc_info.value.status_code == 503
 
     detail = exc_info.value.detail
     assert isinstance(detail, dict)
-    assert "Unable to connect to Llama Stack" in detail["response"]
+    assert detail["response"] == "Unable to connect to Llama Stack"  # type: ignore
 
 
 @pytest.mark.asyncio
@@ -536,8 +599,10 @@ async def test_tools_endpoint_general_exception(
     mock_configuration: Configuration,  # pylint: disable=redefined-outer-name
 ) -> None:
     """Test tools endpoint with general exception."""
-    # Mock configuration
-    mocker.patch("app.endpoints.tools.configuration", mock_configuration)
+    # Mock configuration - wrap in AppConfig
+    app_config = AppConfig()
+    app_config._configuration = mock_configuration
+    mocker.patch("app.endpoints.tools.configuration", app_config)
 
     # Mock authorization decorator to bypass i
     mocker.patch("app.endpoints.tools.authorize", lambda action: lambda func: func)
@@ -552,12 +617,6 @@ async def test_tools_endpoint_general_exception(
     mock_request = mocker.Mock()
     mock_auth = MOCK_AUTH
 
-    # Call the endpointt and expect HTTPException
-    with pytest.raises(HTTPException) as exc_info:
-        await tools.tools_endpoint_handler.__wrapped__(mock_request, mock_auth)
-
-    assert exc_info.value.status_code == 500
-
-    detail = exc_info.value.detail
-    assert isinstance(detail, dict)
-    assert "Unable to retrieve list of tools" in detail["response"]
+    # Call the endpointt and expect the exception to propagate (not caught)
+    with pytest.raises(Exception, match="Unexpected error"):
+        await tools.tools_endpoint_handler.__wrapped__(mock_request, mock_auth)  # type: ignore
diff --git a/tests/unit/app/test_main_middleware.py b/tests/unit/app/test_main_middleware.py
new file mode 100644
index 00000000..3b7184d4
--- /dev/null
+++ b/tests/unit/app/test_main_middleware.py
@@ -0,0 +1,74 @@
+"""Unit tests for the global exception middleware in main.py."""
+
+import json
+from typing import cast
+from unittest.mock import Mock
+
+import pytest
+from fastapi import HTTPException, Request, Response, status
+from fastapi.responses import JSONResponse
+from starlette.requests import Request as StarletteRequest
+
+from models.responses import InternalServerErrorResponse
+from app.main import global_exception_middleware
+
+
+@pytest.mark.asyncio
+async def test_global_exception_middleware_catches_unexpected_exception() -> None:
+    """Test that global exception middleware catches unexpected exceptions."""
+
+    mock_request = Mock(spec=StarletteRequest)
+    mock_request.url.path = "/test"
+
+    async def mock_call_next_raises_error(request: Request) -> Response:
+        """Mock call_next that raises an unexpected exception."""
+        raise ValueError("This is an unexpected error for testing")
+
+    response = await global_exception_middleware(
+        mock_request, mock_call_next_raises_error
+    )
+
+    # Verify it returns a JSONResponse
+    assert isinstance(response, JSONResponse)
+    assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
+
+    # Parse the response body
+    response_body_bytes = bytes(response.body)
+    response_body = json.loads(response_body_bytes.decode("utf-8"))
+    assert "detail" in response_body
+    detail = response_body["detail"]
+    assert isinstance(detail, dict)
+    assert "response" in detail
+    assert "cause" in detail
+
+    # Verify it matches the generic InternalServerErrorResponse
+    expected_response = InternalServerErrorResponse.generic()
+    expected_detail = expected_response.model_dump()["detail"]
+    detail_dict = cast(dict[str, str], detail)
+    assert detail_dict["response"] == expected_detail["response"]
+    assert detail_dict["cause"] == expected_detail["cause"]
+
+
+@pytest.mark.asyncio
+async def test_global_exception_middleware_passes_through_http_exception() -> None:
+    """Test that global exception middleware passes through HTTPException unchanged."""
+
+    mock_request = Mock(spec=StarletteRequest)
+    mock_request.url.path = "/test"
+
+    async def mock_call_next_raises_http_exception(request: Request) -> Response:
+        """Mock call_next that raises HTTPException."""
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail={"response": "Test error", "cause": "This is a test"},
+        )
+
+    with pytest.raises(HTTPException) as exc_info:
+        await global_exception_middleware(
+            mock_request, mock_call_next_raises_http_exception
+        )
+
+    assert exc_info.value.status_code == status.HTTP_400_BAD_REQUEST
+    detail = cast(dict[str, str], exc_info.value.detail)
+    assert detail["response"] == "Test error"
+    assert detail["cause"] == "This is a test"
diff --git a/tests/unit/utils/test_endpoints.py b/tests/unit/utils/test_endpoints.py
index cfb3c359..2ddae8f2 100644
--- a/tests/unit/utils/test_endpoints.py
+++ b/tests/unit/utils/test_endpoints.py
@@ -2,24 +2,23 @@
 
 # pylint: disable=too-many-lines
 
-from pathlib import Path
 import os
+from pathlib import Path
+
 import pytest
-from pytest_mock import MockerFixture
 from fastapi import HTTPException
 from pydantic import AnyUrl
+from pytest_mock import MockerFixture
 
 import constants
 from configuration import AppConfig
-from models.config import CustomProfile
-from models.responses import ReferencedDocument
+from models.config import Action, CustomProfile
 from models.requests import QueryRequest
-from models.config import Action
-from utils import endpoints
-from utils.endpoints import get_agent, get_temp_agent
-
+from models.responses import ReferencedDocument
 from tests.unit import config_dict
 from tests.unit.conftest import AgentFixtures
+from utils import endpoints
+from utils.endpoints import get_agent, get_temp_agent
 
 CONFIGURED_SYSTEM_PROMPT = "This is a configured system prompt"
 

From b42eb24aaa83681ca13f9d4ce19581cd383a2254 Mon Sep 17 00:00:00 2001
From: Andrej Simurka <asimurka@asimurka-thinkpadp1gen7.tpb.csb>
Date: Sun, 23 Nov 2025 22:56:02 +0100
Subject: [PATCH 3/4] Updated exception handling in internal logic

---
 src/authentication/jwk_token.py               | 103 +++++++--------
 src/authentication/k8s.py                     | 119 +++++++++---------
 src/authentication/utils.py                   |  10 +-
 src/authorization/middleware.py               |  31 +++--
 src/metrics/utils.py                          |  11 +-
 src/utils/quota.py                            |  32 ++---
 tests/unit/authentication/test_jwk_token.py   |  11 +-
 tests/unit/authentication/test_k8s.py         |  16 +--
 .../authentication/test_noop_with_token.py    |  13 +-
 tests/unit/authentication/test_utils.py       |  16 ++-
 tests/unit/authorization/test_middleware.py   |  33 ++---
 tests/unit/conftest.py                        |  34 ++++-
 12 files changed, 238 insertions(+), 191 deletions(-)

diff --git a/src/authentication/jwk_token.py b/src/authentication/jwk_token.py
index e10bf81a..85c52156 100644
--- a/src/authentication/jwk_token.py
+++ b/src/authentication/jwk_token.py
@@ -1,11 +1,12 @@
 """Manage authentication flow for FastAPI endpoints with JWK based JWT auth."""
 
+import json
 import logging
 from asyncio import Lock
 from typing import Any, Callable
 
-from fastapi import Request, HTTPException, status
-from authlib.jose import JsonWebKey, KeySet, jwt, Key
+import aiohttp
+from authlib.jose import JsonWebKey, Key, KeySet, jwt
 from authlib.jose.errors import (
     BadSignatureError,
     DecodeError,
@@ -13,14 +14,15 @@
     JoseError,
 )
 from cachetools import TTLCache
-import aiohttp
+from fastapi import HTTPException, Request
 
+from authentication.interface import NO_AUTH_TUPLE, AuthInterface, AuthTuple
+from authentication.utils import extract_user_token
 from constants import (
     DEFAULT_VIRTUAL_PATH,
 )
-from authentication.interface import NO_AUTH_TUPLE, AuthInterface, AuthTuple
-from authentication.utils import extract_user_token
 from models.config import JwkConfiguration
+from models.responses import UnauthorizedResponse
 
 logger = logging.getLogger(__name__)
 
@@ -126,68 +128,67 @@ async def __call__(self, request: Request) -> AuthTuple:
             return NO_AUTH_TUPLE
 
         user_token = extract_user_token(request.headers)
-        jwk_set = await get_jwk_set(str(self.config.url))
 
         try:
-            claims = jwt.decode(user_token, key=key_resolver_func(jwk_set))
-        except KeyNotFoundError as exc:
-            raise HTTPException(
-                status_code=status.HTTP_401_UNAUTHORIZED,
-                detail="Invalid token: signed by unknown key or algorithm mismatch",
-            ) from exc
-        except BadSignatureError as exc:
-            raise HTTPException(
-                status_code=status.HTTP_401_UNAUTHORIZED,
-                detail="Invalid token: bad signature",
-            ) from exc
-        except DecodeError as exc:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail="Invalid token: decode error",
-            ) from exc
+            jwk_set = await get_jwk_set(str(self.config.url))
+        except aiohttp.ClientError as exc:
+            logger.error("Failed to fetch JWK set: %s", exc)
+            response = UnauthorizedResponse(
+                cause="Unable to reach authentication key server"
+            )
+            raise HTTPException(**response.model_dump()) from exc
+        except json.JSONDecodeError as exc:
+            logger.error("Invalid JSON in JWK set response: %s", exc)
+            response = UnauthorizedResponse(
+                cause="Authentication key server returned invalid data"
+            )
+            raise HTTPException(**response.model_dump()) from exc
         except JoseError as exc:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail="Invalid token: unknown error",
-            ) from exc
-        except Exception as exc:
-            raise HTTPException(
-                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                detail="Internal server error",
-            ) from exc
+            logger.error("Invalid JWK set format: %s", exc)
+            response = UnauthorizedResponse(cause="Authentication keys are malformed")
+            raise HTTPException(**response.model_dump()) from exc
+
+        try:
+            claims = jwt.decode(user_token, key=key_resolver_func(jwk_set))
+        except (KeyNotFoundError, BadSignatureError, DecodeError, JoseError) as exc:
+            logger.warning("Token decode error: %s", exc)
+            cause_map = {
+                KeyNotFoundError: "Token signed by unknown key",
+                BadSignatureError: "Invalid token signature",
+                DecodeError: "Token could not be decoded",
+                JoseError: "Token format error",
+            }
+            response = UnauthorizedResponse(
+                cause=cause_map.get(type(exc), "Unknown token error")
+            )
+            raise HTTPException(**response.model_dump()) from exc
 
         try:
             claims.validate()
         except ExpiredTokenError as exc:
-            raise HTTPException(
-                status_code=status.HTTP_401_UNAUTHORIZED, detail="Token has expired"
-            ) from exc
+            response = UnauthorizedResponse(cause="Token has expired")
+            raise HTTPException(**response.model_dump()) from exc
         except JoseError as exc:
-            raise HTTPException(
-                status_code=status.HTTP_401_UNAUTHORIZED,
-                detail="Error validating token",
-            ) from exc
-        except Exception as exc:
-            raise HTTPException(
-                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                detail="Internal server error during token validation",
-            ) from exc
+            response = UnauthorizedResponse(cause="Token validation failed")
+            raise HTTPException(**response.model_dump()) from exc
 
         try:
             user_id: str = claims[self.config.jwt_configuration.user_id_claim]
         except KeyError as exc:
-            raise HTTPException(
-                status_code=status.HTTP_401_UNAUTHORIZED,
-                detail=f"Token missing claim: {self.config.jwt_configuration.user_id_claim}",
-            ) from exc
+            missing_claim = self.config.jwt_configuration.user_id_claim
+            response = UnauthorizedResponse(
+                cause=f"Token missing claim: {missing_claim}"
+            )
+            raise HTTPException(**response.model_dump()) from exc
 
         try:
             username: str = claims[self.config.jwt_configuration.username_claim]
         except KeyError as exc:
-            raise HTTPException(
-                status_code=status.HTTP_401_UNAUTHORIZED,
-                detail=f"Token missing claim: {self.config.jwt_configuration.username_claim}",
-            ) from exc
+            missing_claim = self.config.jwt_configuration.username_claim
+            response = UnauthorizedResponse(
+                cause=f"Token missing claim: {missing_claim}"
+            )
+            raise HTTPException(**response.model_dump()) from exc
 
         logger.info("Successfully authenticated user %s (ID: %s)", username, user_id)
 
diff --git a/src/authentication/k8s.py b/src/authentication/k8s.py
index 4a401455..55dd8b69 100644
--- a/src/authentication/k8s.py
+++ b/src/authentication/k8s.py
@@ -4,15 +4,22 @@
 import os
 from pathlib import Path
 from typing import Optional, Self
-from fastapi import Request, HTTPException
 
 import kubernetes.client
+from fastapi import HTTPException, Request
 from kubernetes.client.rest import ApiException
 from kubernetes.config import ConfigException
 
-from configuration import configuration
 from authentication.interface import AuthInterface
+from authentication.utils import extract_user_token
+from configuration import configuration
 from constants import DEFAULT_VIRTUAL_PATH
+from models.responses import (
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    ServiceUnavailableResponse,
+    UnauthorizedResponse,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -172,8 +179,20 @@ def get_user_info(token: str) -> Optional[kubernetes.client.V1TokenReview]:
 
     Returns:
         The user information if the token is valid, None otherwise.
+
+    Raises:
+        HTTPException: If unable to connect to Kubernetes API or unexpected error occurs.
     """
-    auth_api = K8sClientSingleton.get_authn_api()
+    try:
+        auth_api = K8sClientSingleton.get_authn_api()
+    except Exception as e:
+        logger.error("Failed to get Kubernetes authentication API: %s", e)
+        response = ServiceUnavailableResponse(
+            backend_name="Kubernetes API",
+            cause="Unable to initialize Kubernetes client",
+        )
+        raise HTTPException(**response.model_dump()) from e
+
     token_review = kubernetes.client.V1TokenReview(
         spec=kubernetes.client.V1TokenReviewSpec(token=token)
     )
@@ -182,31 +201,9 @@ def get_user_info(token: str) -> Optional[kubernetes.client.V1TokenReview]:
         if response.status.authenticated:
             return response.status
         return None
-    except ApiException as e:
+    except Exception as e:  # pylint: disable=broad-exception-caught
         logger.error("API exception during TokenReview: %s", e)
         return None
-    except Exception as e:
-        logger.error("Unexpected error during TokenReview - Unauthorized: %s", e)
-        raise HTTPException(
-            status_code=500,
-            detail={"response": "Forbidden: Unable to Review Token", "cause": str(e)},
-        ) from e
-
-
-def _extract_bearer_token(header: str) -> str:
-    """Extract the bearer token from an HTTP authorization header.
-
-    Args:
-        header: The authorization header containing the token.
-
-    Returns:
-        The extracted token if present, else an empty string.
-    """
-    try:
-        scheme, token = header.split(" ", 1)
-        return token if scheme.lower() == "bearer" else ""
-    except ValueError:
-        return ""
 
 
 class K8SAuthDependency(AuthInterface):  # pylint: disable=too-few-public-methods
@@ -239,47 +236,51 @@ async def __call__(self, request: Request) -> tuple[str, str, bool, str]:
             user_id check should never be skipped with K8s authentication
             If user_id check should be skipped - always return False for k8s
             User's token
-        """
-        authorization_header = request.headers.get("Authorization")
-        if not authorization_header:
-            raise HTTPException(
-                status_code=401, detail="Unauthorized: No auth header found"
-            )
-
-        token = _extract_bearer_token(authorization_header)
-        if not token:
-            raise HTTPException(
-                status_code=401,
-                detail="Unauthorized: Bearer token not found or invalid",
-            )
 
+        Raises:
+            HTTPException: If authentication or authorization fails.
+        """
+        token = extract_user_token(request.headers)
         user_info = get_user_info(token)
+
         if user_info is None:
-            raise HTTPException(
-                status_code=403, detail="Forbidden: Invalid or expired token"
-            )
+            response = UnauthorizedResponse(cause="Invalid or expired Kubernetes token")
+            raise HTTPException(**response.model_dump())
+
         if user_info.user.username == "kube:admin":
-            user_info.user.uid = K8sClientSingleton.get_cluster_id()
-        authorization_api = K8sClientSingleton.get_authz_api()
-
-        sar = kubernetes.client.V1SubjectAccessReview(
-            spec=kubernetes.client.V1SubjectAccessReviewSpec(
-                user=user_info.user.username,
-                groups=user_info.user.groups,
-                non_resource_attributes=kubernetes.client.V1NonResourceAttributes(
-                    path=self.virtual_path, verb="get"
-                ),
-            )
-        )
+            try:
+                user_info.user.uid = K8sClientSingleton.get_cluster_id()
+            except ClusterIDUnavailableError as e:
+                logger.error("Failed to get cluster ID: %s", e)
+                response = InternalServerErrorResponse(
+                    response="Internal server error",
+                    cause="Unable to retrieve cluster ID",
+                )
+                raise HTTPException(**response.model_dump()) from e
+
         try:
+            authorization_api = K8sClientSingleton.get_authz_api()
+            sar = kubernetes.client.V1SubjectAccessReview(
+                spec=kubernetes.client.V1SubjectAccessReviewSpec(
+                    user=user_info.user.username,
+                    groups=user_info.user.groups,
+                    non_resource_attributes=kubernetes.client.V1NonResourceAttributes(
+                        path=self.virtual_path, verb="get"
+                    ),
+                )
+            )
             response = authorization_api.create_subject_access_review(sar)
+
             if not response.status.allowed:
-                raise HTTPException(
-                    status_code=403, detail="Forbidden: User does not have access"
-                )
-        except ApiException as e:
+                response = ForbiddenResponse.endpoint(user_id=user_info.user.uid)
+                raise HTTPException(**response.model_dump())
+        except Exception as e:
             logger.error("API exception during SubjectAccessReview: %s", e)
-            raise HTTPException(status_code=403, detail="Internal server error") from e
+            response = ServiceUnavailableResponse(
+                backend_name="Kubernetes API",
+                cause="Unable to perform authorization check",
+            )
+            raise HTTPException(**response.model_dump()) from e
 
         return (
             user_info.user.uid,
diff --git a/src/authentication/utils.py b/src/authentication/utils.py
index c92898ac..d0224157 100644
--- a/src/authentication/utils.py
+++ b/src/authentication/utils.py
@@ -2,6 +2,7 @@
 
 from fastapi import HTTPException
 from starlette.datastructures import Headers
+from models.responses import UnauthorizedResponse
 
 
 def extract_user_token(headers: Headers) -> str:
@@ -9,18 +10,17 @@ def extract_user_token(headers: Headers) -> str:
 
     Args:
         header: The authorization header containing the token.
-
     Returns:
         The extracted token if present, else an empty string.
     """
     authorization_header = headers.get("Authorization")
     if not authorization_header:
-        raise HTTPException(status_code=400, detail="No Authorization header found")
+        response = UnauthorizedResponse(cause="No Authorization header found")
+        raise HTTPException(**response.model_dump())
 
     scheme_and_token = authorization_header.strip().split()
     if len(scheme_and_token) != 2 or scheme_and_token[0].lower() != "bearer":
-        raise HTTPException(
-            status_code=400, detail="No token found in Authorization header"
-        )
+        response = UnauthorizedResponse(cause="No token found in Authorization header")
+        raise HTTPException(**response.model_dump())
 
     return scheme_and_token[1]
diff --git a/src/authorization/middleware.py b/src/authorization/middleware.py
index 6d03b8d6..b2272bad 100644
--- a/src/authorization/middleware.py
+++ b/src/authorization/middleware.py
@@ -1,11 +1,13 @@
 """Authorization middleware and decorators."""
 
 import logging
-from functools import wraps, lru_cache
+from functools import lru_cache, wraps
 from typing import Any, Callable, Tuple
-from fastapi import HTTPException, status
+
+from fastapi import HTTPException
 from starlette.requests import Request
 
+import constants
 from authorization.resolvers import (
     AccessResolver,
     GenericAccessResolver,
@@ -14,9 +16,12 @@
     NoopRolesResolver,
     RolesResolver,
 )
-from models.config import Action
 from configuration import configuration
-import constants
+from models.config import Action
+from models.responses import (
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -59,10 +64,8 @@ def get_authorization_resolvers() -> Tuple[RolesResolver, AccessResolver]:
             )
 
         case _:
-            raise HTTPException(
-                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                detail="Internal server error",
-            )
+            response = InternalServerErrorResponse.generic()
+            raise HTTPException(**response.model_dump())
 
 
 async def _perform_authorization_check(
@@ -78,10 +81,8 @@ async def _perform_authorization_check(
             "Authorization only allowed on endpoints that accept "
             "'auth: Any = Depends(get_auth_dependency())'"
         )
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Internal server error",
-        ) from exc
+        response = InternalServerErrorResponse.generic()
+        raise HTTPException(**response.model_dump()) from exc
 
     # Everyone gets the everyone (aka *) role
     everyone_roles = {"*"}
@@ -89,10 +90,8 @@ async def _perform_authorization_check(
     user_roles = await role_resolver.resolve_roles(auth) | everyone_roles
 
     if not access_resolver.check_access(action, user_roles):
-        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
-            detail=f"Insufficient permissions for action: {action}",
-        )
+        response = ForbiddenResponse.endpoint(user_id=auth[1])
+        raise HTTPException(**response.model_dump())
 
     authorized_actions = access_resolver.get_actions(user_roles)
 
diff --git a/src/metrics/utils.py b/src/metrics/utils.py
index 2ba51645..f7e15b10 100644
--- a/src/metrics/utils.py
+++ b/src/metrics/utils.py
@@ -2,16 +2,20 @@
 
 from typing import cast
 
+from fastapi import HTTPException
 from llama_stack.models.llama.datatypes import RawMessage
 from llama_stack.models.llama.llama3.chat_format import ChatFormat
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
+from llama_stack_client import APIConnectionError
 from llama_stack_client.types.agents.turn import Turn
 
 import metrics
 from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from log import get_logger
+from models.responses import ServiceUnavailableResponse
 from utils.common import run_once_async
+from utils.endpoints import check_configuration_loaded
 
 logger = get_logger(__name__)
 
@@ -20,7 +24,12 @@
 async def setup_model_metrics() -> None:
     """Perform setup of all metrics related to LLM model and provider."""
     logger.info("Setting up model metrics")
-    model_list = await AsyncLlamaStackClientHolder().get_client().models.list()
+    check_configuration_loaded(configuration)
+    try:
+        model_list = await AsyncLlamaStackClientHolder().get_client().models.list()
+    except APIConnectionError as e:
+        response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
+        raise HTTPException(**response.model_dump()) from e
 
     models = [
         model
diff --git a/src/utils/quota.py b/src/utils/quota.py
index a7a046b5..d7fcc7a1 100644
--- a/src/utils/quota.py
+++ b/src/utils/quota.py
@@ -1,13 +1,12 @@
 """Quota handling helper functions."""
 
 import psycopg2
-
-from fastapi import HTTPException, status
-
-from quota.quota_limiter import QuotaLimiter
-from quota.quota_exceed_error import QuotaExceedError
+from fastapi import HTTPException
 
 from log import get_logger
+from models.responses import InternalServerErrorResponse, QuotaExceededResponse
+from quota.quota_exceed_error import QuotaExceedError
+from quota.quota_limiter import QuotaLimiter
 
 logger = get_logger(__name__)
 
@@ -59,23 +58,12 @@ def check_tokens_available(quota_limiters: list[QuotaLimiter], user_id: str) ->
     except psycopg2.Error as pg_error:
         message = "Error communicating with quota database backend"
         logger.error(message)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "response": message,
-                "cause": str(pg_error),
-            },
-        ) from pg_error
-    except QuotaExceedError as quota_exceed_error:
-        message = "The quota has been exceeded"
-        logger.error(message)
-        raise HTTPException(
-            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
-            detail={
-                "response": message,
-                "cause": str(quota_exceed_error),
-            },
-        ) from quota_exceed_error
+        response = InternalServerErrorResponse.database_error()
+        raise HTTPException(**response.model_dump()) from pg_error
+    except QuotaExceedError as e:
+        logger.error("The quota has been exceeded")
+        response = QuotaExceededResponse.from_exception(e)
+        raise HTTPException(**response.model_dump()) from e
 
 
 def get_available_quotas(
diff --git a/tests/unit/authentication/test_jwk_token.py b/tests/unit/authentication/test_jwk_token.py
index 4a08294f..b2b1cd2f 100644
--- a/tests/unit/authentication/test_jwk_token.py
+++ b/tests/unit/authentication/test_jwk_token.py
@@ -3,13 +3,12 @@
 """Unit tests for functions defined in authentication/jwk_token.py"""
 
 import time
-
-from typing import Any, Generator
-from pytest_mock import MockerFixture
+from typing import Any, Generator, cast
 
 import pytest
 from fastapi import HTTPException, Request
 from pydantic import AnyHttpUrl
+from pytest_mock import MockerFixture
 from authlib.jose import JsonWebKey, JsonWebToken
 
 from authentication.jwk_token import JwkTokenAuthDependency, _jwk_cache
@@ -302,8 +301,10 @@ async def test_no_bearer(
     with pytest.raises(HTTPException) as exc_info:
         await dependency(not_bearer_token_request)
 
-    assert exc_info.value.status_code == 400
-    assert exc_info.value.detail == "No token found in Authorization header"
+    assert exc_info.value.status_code == 401
+    detail = cast(dict[str, str], exc_info.value.detail)
+    assert detail["response"] == ("Missing or invalid credentials provided by client")
+    assert detail["cause"] == "No token found in Authorization header"
 
 
 @pytest.fixture
diff --git a/tests/unit/authentication/test_k8s.py b/tests/unit/authentication/test_k8s.py
index be27850e..72b314df 100644
--- a/tests/unit/authentication/test_k8s.py
+++ b/tests/unit/authentication/test_k8s.py
@@ -3,20 +3,19 @@
 # pylint: disable=too-many-arguments,too-many-positional-arguments,too-few-public-methods,protected-access
 
 import os
+from typing import Optional, cast
 
-from typing import Optional
 import pytest
-from pytest_mock import MockerFixture
-
 from fastapi import HTTPException, Request
 from kubernetes.client import AuthenticationV1Api, AuthorizationV1Api
 from kubernetes.client.rest import ApiException
+from pytest_mock import MockerFixture
 
 from authentication.k8s import (
-    K8sClientSingleton,
-    K8SAuthDependency,
-    ClusterIDUnavailableError,
     CLUSTER_ID_LOCAL,
+    ClusterIDUnavailableError,
+    K8SAuthDependency,
+    K8sClientSingleton,
 )
 
 
@@ -153,7 +152,10 @@ async def test_auth_dependency_invalid_token(mocker: MockerFixture) -> None:
         await dependency(request)
 
     # Check if the correct status code is returned for unauthorized access
-    assert exc_info.value.status_code == 403
+    assert exc_info.value.status_code == 401
+    detail = cast(dict[str, str], exc_info.value.detail)
+    assert detail["response"] == ("Missing or invalid credentials provided by client")
+    assert detail["cause"] == "Invalid or expired Kubernetes token"
 
 
 async def test_cluster_id_is_used_for_kube_admin(mocker: MockerFixture) -> None:
diff --git a/tests/unit/authentication/test_noop_with_token.py b/tests/unit/authentication/test_noop_with_token.py
index b18ae7a1..c51be623 100644
--- a/tests/unit/authentication/test_noop_with_token.py
+++ b/tests/unit/authentication/test_noop_with_token.py
@@ -1,5 +1,6 @@
 """Unit tests for functions defined in authentication/noop_with_token.py"""
 
+from typing import cast
 from fastapi import Request, HTTPException
 import pytest
 
@@ -81,8 +82,10 @@ async def test_noop_with_token_auth_dependency_no_token() -> None:
     with pytest.raises(HTTPException) as exc_info:
         await dependency(request)
 
-    assert exc_info.value.status_code == 400
-    assert exc_info.value.detail == "No Authorization header found"
+    assert exc_info.value.status_code == 401
+    detail = cast(dict[str, str], exc_info.value.detail)
+    assert detail["response"] == ("Missing or invalid credentials provided by client")
+    assert detail["cause"] == "No Authorization header found"
 
 
 async def test_noop_with_token_auth_dependency_no_bearer() -> None:
@@ -102,5 +105,7 @@ async def test_noop_with_token_auth_dependency_no_bearer() -> None:
     with pytest.raises(HTTPException) as exc_info:
         await dependency(request)
 
-    assert exc_info.value.status_code == 400
-    assert exc_info.value.detail == "No token found in Authorization header"
+    assert exc_info.value.status_code == 401
+    detail = cast(dict[str, str], exc_info.value.detail)
+    assert detail["response"] == ("Missing or invalid credentials provided by client")
+    assert detail["cause"] == "No token found in Authorization header"
diff --git a/tests/unit/authentication/test_utils.py b/tests/unit/authentication/test_utils.py
index ee1d34dc..a414c14d 100644
--- a/tests/unit/authentication/test_utils.py
+++ b/tests/unit/authentication/test_utils.py
@@ -1,5 +1,6 @@
 """Unit tests for functions defined in authentication/utils.py"""
 
+from typing import cast
 from fastapi import HTTPException
 from starlette.datastructures import Headers
 
@@ -19,8 +20,11 @@ def test_extract_user_token_no_header() -> None:
     try:
         extract_user_token(headers)
     except HTTPException as exc:
-        assert exc.status_code == 400
-        assert exc.detail == "No Authorization header found"
+        assert exc.status_code == 401
+        assert exc.detail["response"] == (  # type: ignore
+            "Missing or invalid credentials provided by client"
+        )
+        assert exc.detail["cause"] == "No Authorization header found"  # type: ignore
 
 
 def test_extract_user_token_invalid_format() -> None:
@@ -29,5 +33,9 @@ def test_extract_user_token_invalid_format() -> None:
     try:
         extract_user_token(headers)
     except HTTPException as exc:
-        assert exc.status_code == 400
-        assert exc.detail == "No token found in Authorization header"
+        assert exc.status_code == 401
+        detail = cast(dict[str, str], exc.detail)
+        assert detail["response"] == (
+            "Missing or invalid credentials provided by client"
+        )
+        assert detail["cause"] == "No token found in Authorization header"
diff --git a/tests/unit/authorization/test_middleware.py b/tests/unit/authorization/test_middleware.py
index 808e52ea..14490e02 100644
--- a/tests/unit/authorization/test_middleware.py
+++ b/tests/unit/authorization/test_middleware.py
@@ -1,29 +1,27 @@
 """Unit tests for the authorization middleware."""
 
-from typing import Any
+from typing import Any, cast
+
 import pytest
 from fastapi import HTTPException, status
-from starlette.requests import Request
-
 from pytest_mock import MockerFixture, MockType
+from starlette.requests import Request
 
-from authentication.interface import AuthTuple
-
-from models.config import Action, JwtRoleRule, AccessRule, JsonPathOperator
 import constants
-
+from authentication.interface import AuthTuple
 from authorization.middleware import (
-    get_authorization_resolvers,
     _perform_authorization_check,
     authorize,
+    get_authorization_resolvers,
 )
 from authorization.resolvers import (
     AccessResolver,
-    NoopRolesResolver,
-    NoopAccessResolver,
-    JwtRolesResolver,
     GenericAccessResolver,
+    JwtRolesResolver,
+    NoopAccessResolver,
+    NoopRolesResolver,
 )
+from models.config import AccessRule, Action, JsonPathOperator, JwtRoleRule
 
 
 @pytest.fixture(name="dummy_auth_tuple")
@@ -84,8 +82,8 @@ def test_noop_auth_modules(
 
         roles_resolver, access_resolver = get_authorization_resolvers()
 
-        assert isinstance(roles_resolver, expected_types[0])
-        assert isinstance(access_resolver, expected_types[1])
+        assert isinstance(roles_resolver, expected_types[0])  # type: ignore
+        assert isinstance(access_resolver, expected_types[1])  # type: ignore
 
     @pytest.mark.parametrize(
         "empty_rules", ["role_rules", "access_rules", "both_rules"]
@@ -208,9 +206,12 @@ async def test_access_denied(
             )
 
         assert exc_info.value.status_code == status.HTTP_403_FORBIDDEN
-        assert (
-            "Insufficient permissions for action: Action.ADMIN" in exc_info.value.detail
+        assert exc_info.value.status_code == 403
+        detail = cast(dict[str, str], exc_info.value.detail)
+        assert detail["response"] == (
+            "User does not have permission to access this endpoint"
         )
+        assert "not authorized to access this endpoint" in detail["cause"]
 
     @pytest.mark.parametrize("request_location", ["kwargs", "args", "none"])
     async def test_request_state_handling(
@@ -239,7 +240,7 @@ async def test_request_state_handling(
                 mock_request,
             ]
 
-        await _perform_authorization_check(Action.QUERY, args, kwargs)
+        await _perform_authorization_check(Action.QUERY, args, kwargs)  # type: ignore
 
         if request_location != "none":
             assert mock_request.state.authorized_actions == {Action.QUERY}
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index f3bba060..ed75c1f6 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -3,8 +3,11 @@
 from __future__ import annotations
 
 from typing import Generator
+
 import pytest
-from pytest_mock import MockerFixture, AsyncMockType
+from pytest_mock import AsyncMockType, MockerFixture
+
+from configuration import AppConfig
 
 type AgentFixtures = Generator[
     tuple[
@@ -39,3 +42,32 @@ def prepare_agent_mocks_fixture(
     mock_agent.create_turn.return_value.steps = []
 
     yield mock_client, mock_agent
+
+
+@pytest.fixture(name="minimal_config")
+def minimal_config_fixture() -> AppConfig:
+    """Create a minimal AppConfig with only required fields.
+
+    This fixture provides a minimal valid configuration that can be used
+    in tests that don't need specific configuration values. It includes
+    only the required fields to avoid unnecessary instantiation.
+
+    Returns:
+        AppConfig: A minimal AppConfig instance with required fields only.
+    """
+    cfg = AppConfig()
+    cfg.init_from_dict(
+        {
+            "name": "test",
+            "service": {"host": "localhost", "port": 8080},
+            "llama_stack": {
+                "api_key": "test-key",
+                "url": "http://test.com:1234",
+                "use_as_library_client": False,
+            },
+            "user_data_collection": {},
+            "authentication": {"module": "noop"},
+            "authorization": {"access_rules": []},
+        }
+    )
+    return cfg

From ff940a9e35c1b5a2f6e74aebf1abe8b283adf315 Mon Sep 17 00:00:00 2001
From: Andrej Simurka <asimurka@asimurka-thinkpadp1gen7.tpb.csb>
Date: Mon, 24 Nov 2025 09:40:51 +0100
Subject: [PATCH 4/4] Reflected coderabbit suggestions

---
 docs/openapi.json                                   |  4 ++--
 src/authentication/k8s.py                           |  7 ++++---
 src/authentication/utils.py                         | 10 +++++++---
 src/authorization/middleware.py                     |  2 +-
 src/metrics/utils.py                                |  4 ++--
 src/models/responses.py                             |  4 ++--
 tests/e2e/features/feedback.feature                 |  2 +-
 tests/unit/app/endpoints/test_config.py             |  1 +
 tests/unit/app/endpoints/test_feedback.py           |  2 +-
 tests/unit/app/endpoints/test_providers.py          |  2 +-
 tests/unit/app/endpoints/test_shields.py            |  1 +
 tests/unit/app/endpoints/test_streaming_query.py    |  1 +
 tests/unit/authentication/test_noop_with_token.py   |  4 ++--
 tests/unit/authentication/test_utils.py             |  5 +++--
 tests/unit/models/responses/test_error_responses.py |  6 +++++-
 15 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/docs/openapi.json b/docs/openapi.json
index 1bbeb357..3fb04467 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -2013,7 +2013,7 @@
                                         "value": {
                                             "detail": {
                                                 "cause": "Storing feedback is disabled.",
-                                                "response": "Storing feedback is disabled."
+                                                "response": "Storing feedback is disabled"
                                             }
                                         }
                                     }
@@ -5674,7 +5674,7 @@
                     {
                         "detail": {
                             "cause": "Storing feedback is disabled.",
-                            "response": "Storing feedback is disabled."
+                            "response": "Storing feedback is disabled"
                         },
                         "label": "feedback"
                     },
diff --git a/src/authentication/k8s.py b/src/authentication/k8s.py
index 55dd8b69..4cc4c18c 100644
--- a/src/authentication/k8s.py
+++ b/src/authentication/k8s.py
@@ -271,9 +271,6 @@ async def __call__(self, request: Request) -> tuple[str, str, bool, str]:
             )
             response = authorization_api.create_subject_access_review(sar)
 
-            if not response.status.allowed:
-                response = ForbiddenResponse.endpoint(user_id=user_info.user.uid)
-                raise HTTPException(**response.model_dump())
         except Exception as e:
             logger.error("API exception during SubjectAccessReview: %s", e)
             response = ServiceUnavailableResponse(
@@ -282,6 +279,10 @@ async def __call__(self, request: Request) -> tuple[str, str, bool, str]:
             )
             raise HTTPException(**response.model_dump()) from e
 
+        if not response.status.allowed:
+            response = ForbiddenResponse.endpoint(user_id=user_info.user.uid)
+            raise HTTPException(**response.model_dump())
+
         return (
             user_info.user.uid,
             user_info.user.username,
diff --git a/src/authentication/utils.py b/src/authentication/utils.py
index d0224157..8acb800c 100644
--- a/src/authentication/utils.py
+++ b/src/authentication/utils.py
@@ -6,12 +6,16 @@
 
 
 def extract_user_token(headers: Headers) -> str:
-    """Extract the bearer token from an HTTP authorization header.
+    """Extract the bearer token from an HTTP Authorization header.
 
     Args:
-        header: The authorization header containing the token.
+        headers: The request headers containing the Authorization value.
+
     Returns:
-        The extracted token if present, else an empty string.
+        The extracted bearer token.
+
+    Raises:
+        HTTPException: If the Authorization header is missing or malformed.
     """
     authorization_header = headers.get("Authorization")
     if not authorization_header:
diff --git a/src/authorization/middleware.py b/src/authorization/middleware.py
index b2272bad..8cb27e02 100644
--- a/src/authorization/middleware.py
+++ b/src/authorization/middleware.py
@@ -90,7 +90,7 @@ async def _perform_authorization_check(
     user_roles = await role_resolver.resolve_roles(auth) | everyone_roles
 
     if not access_resolver.check_access(action, user_roles):
-        response = ForbiddenResponse.endpoint(user_id=auth[1])
+        response = ForbiddenResponse.endpoint(user_id=auth[0])
         raise HTTPException(**response.model_dump())
 
     authorized_actions = access_resolver.get_actions(user_roles)
diff --git a/src/metrics/utils.py b/src/metrics/utils.py
index f7e15b10..451487be 100644
--- a/src/metrics/utils.py
+++ b/src/metrics/utils.py
@@ -6,7 +6,7 @@
 from llama_stack.models.llama.datatypes import RawMessage
 from llama_stack.models.llama.llama3.chat_format import ChatFormat
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack_client import APIConnectionError
+from llama_stack_client import APIConnectionError, APIStatusError
 from llama_stack_client.types.agents.turn import Turn
 
 import metrics
@@ -27,7 +27,7 @@ async def setup_model_metrics() -> None:
     check_configuration_loaded(configuration)
     try:
         model_list = await AsyncLlamaStackClientHolder().get_client().models.list()
-    except APIConnectionError as e:
+    except (APIConnectionError, APIStatusError) as e:
         response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
         raise HTTPException(**response.model_dump()) from e
 
diff --git a/src/models/responses.py b/src/models/responses.py
index e98be0f7..e893d4e7 100644
--- a/src/models/responses.py
+++ b/src/models/responses.py
@@ -1287,7 +1287,7 @@ class ForbiddenResponse(AbstractErrorResponse):
                 {
                     "label": "feedback",
                     "detail": {
-                        "response": "Storing feedback is disabled.",
+                        "response": "Storing feedback is disabled",
                         "cause": "Storing feedback is disabled.",
                     },
                 },
@@ -1332,7 +1332,7 @@ def endpoint(cls, user_id: str) -> "ForbiddenResponse":
     def feedback_disabled(cls) -> "ForbiddenResponse":
         """Create a ForbiddenResponse for disabled feedback."""
         return cls(
-            response="Feedback is disabled",
+            response="Storing feedback is disabled",
             cause="Storing feedback is disabled.",
         )
 
diff --git a/tests/e2e/features/feedback.feature b/tests/e2e/features/feedback.feature
index abe26f4a..3c1a1354 100644
--- a/tests/e2e/features/feedback.feature
+++ b/tests/e2e/features/feedback.feature
@@ -109,7 +109,7 @@ Feature: feedback endpoint API tests
         """
         {
             "detail": {
-                "response": "Feedback is disabled",
+                "response": "Storing feedback is disabled",
                 "cause": "Storing feedback is disabled."
             }
         }  
diff --git a/tests/unit/app/endpoints/test_config.py b/tests/unit/app/endpoints/test_config.py
index 02224bcc..cd108ec2 100644
--- a/tests/unit/app/endpoints/test_config.py
+++ b/tests/unit/app/endpoints/test_config.py
@@ -18,6 +18,7 @@ async def test_config_endpoint_handler_configuration_not_loaded(
     mock_authorization_resolvers(mocker)
 
     mock_config = AppConfig()
+    mock_config._configuration = None  # pylint: disable=protected-access
     mocker.patch("app.endpoints.config.configuration", mock_config)
 
     # HTTP request mock required by URL endpoint handler
diff --git a/tests/unit/app/endpoints/test_feedback.py b/tests/unit/app/endpoints/test_feedback.py
index 8c91c876..9f1b0cad 100644
--- a/tests/unit/app/endpoints/test_feedback.py
+++ b/tests/unit/app/endpoints/test_feedback.py
@@ -62,7 +62,7 @@ async def test_assert_feedback_enabled_disabled(mocker: MockerFixture) -> None:
         await assert_feedback_enabled(mocker.Mock())
 
     assert exc_info.value.status_code == status.HTTP_403_FORBIDDEN
-    assert exc_info.value.detail["response"] == "Feedback is disabled"  # type: ignore
+    assert exc_info.value.detail["response"] == "Storing feedback is disabled"  # type: ignore
     assert exc_info.value.detail["cause"] == "Storing feedback is disabled."  # type: ignore
 
 
diff --git a/tests/unit/app/endpoints/test_providers.py b/tests/unit/app/endpoints/test_providers.py
index c27cb2ad..9905ed04 100644
--- a/tests/unit/app/endpoints/test_providers.py
+++ b/tests/unit/app/endpoints/test_providers.py
@@ -39,7 +39,7 @@ async def test_providers_endpoint_configuration_not_loaded(
 async def test_providers_endpoint_connection_error(
     mocker: MockerFixture, minimal_config: AppConfig
 ) -> None:
-    """Test that /providers endpoint raises HTTP 500 if Llama Stack connection fails."""
+    """Test that /providers endpoint raises HTTP 503 if Llama Stack connection fails."""
     mocker.patch("app.endpoints.providers.configuration", minimal_config)
 
     mocker.patch(
diff --git a/tests/unit/app/endpoints/test_shields.py b/tests/unit/app/endpoints/test_shields.py
index c904fa9a..4306aff4 100644
--- a/tests/unit/app/endpoints/test_shields.py
+++ b/tests/unit/app/endpoints/test_shields.py
@@ -23,6 +23,7 @@ async def test_shields_endpoint_handler_configuration_not_loaded(
 
     # simulate state when no configuration is loaded
     mock_config = AppConfig()
+    mock_config._configuration = None  # pylint: disable=protected-access
     mocker.patch("app.endpoints.shields.configuration", mock_config)
 
     request = Request(
diff --git a/tests/unit/app/endpoints/test_streaming_query.py b/tests/unit/app/endpoints/test_streaming_query.py
index f2ce836c..e93f9566 100644
--- a/tests/unit/app/endpoints/test_streaming_query.py
+++ b/tests/unit/app/endpoints/test_streaming_query.py
@@ -151,6 +151,7 @@ async def test_streaming_query_endpoint_handler_configuration_not_loaded(
     """Test the streaming query endpoint handler if configuration is not loaded."""
     # simulate state when no configuration is loaded
     mock_config = AppConfig()
+    mock_config._configuration = None  # pylint: disable=protected-access
     mocker.patch("app.endpoints.streaming_query.configuration", mock_config)
     # Mock authorization resolvers to avoid accessing configuration properties
     mock_authorization_resolvers(mocker)
diff --git a/tests/unit/authentication/test_noop_with_token.py b/tests/unit/authentication/test_noop_with_token.py
index c51be623..1fc17ec7 100644
--- a/tests/unit/authentication/test_noop_with_token.py
+++ b/tests/unit/authentication/test_noop_with_token.py
@@ -64,8 +64,8 @@ async def test_noop_with_token_auth_dependency_no_token() -> None:
     Test that NoopWithTokenAuthDependency raises an HTTPException when no
     Authorization header is present in the request.
 
-    Asserts that the exception has a status code of 400 and the detail message
-    "No Authorization header found".
+    Asserts that the exception has a status code of 401 and a structured
+    detail message indicating that no Authorization header was found.
     """
     dependency = NoopWithTokenAuthDependency()
 
diff --git a/tests/unit/authentication/test_utils.py b/tests/unit/authentication/test_utils.py
index a414c14d..33a58adc 100644
--- a/tests/unit/authentication/test_utils.py
+++ b/tests/unit/authentication/test_utils.py
@@ -21,10 +21,11 @@ def test_extract_user_token_no_header() -> None:
         extract_user_token(headers)
     except HTTPException as exc:
         assert exc.status_code == 401
-        assert exc.detail["response"] == (  # type: ignore
+        detail = cast(dict[str, str], exc.detail)
+        assert detail["response"] == (
             "Missing or invalid credentials provided by client"
         )
-        assert exc.detail["cause"] == "No Authorization header found"  # type: ignore
+        assert detail["cause"] == "No Authorization header found"
 
 
 def test_extract_user_token_invalid_format() -> None:
diff --git a/tests/unit/models/responses/test_error_responses.py b/tests/unit/models/responses/test_error_responses.py
index 3d8fe7ee..2e6ae99b 100644
--- a/tests/unit/models/responses/test_error_responses.py
+++ b/tests/unit/models/responses/test_error_responses.py
@@ -207,7 +207,7 @@ def test_factory_feedback_disabled(self) -> None:
         assert isinstance(response, AbstractErrorResponse)
         assert response.status_code == status.HTTP_403_FORBIDDEN
         assert isinstance(response.detail, DetailModel)
-        assert response.detail.response == "Feedback is disabled"
+        assert response.detail.response == "Storing feedback is disabled"
         assert response.detail.cause == "Storing feedback is disabled."
 
     def test_openapi_response(self) -> None:
@@ -238,6 +238,10 @@ def test_openapi_response(self) -> None:
         assert "detail" in feedback_example["value"]
         assert (
             feedback_example["value"]["detail"]["response"]
+            == "Storing feedback is disabled"
+        )
+        assert (
+            feedback_example["value"]["detail"]["cause"]
             == "Storing feedback is disabled."
         )