Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 72 additions & 3 deletions src/utils/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,13 @@ def validate_conversation_ownership(


def check_configuration_loaded(config: AppConfig) -> None:
"""Check that configuration is loaded and raise exception when it is not."""
"""
Ensure the application configuration object is present.

Raises:
HTTPException: HTTP 500 Internal Server Error with detail `{"response":
"Configuration is not loaded"}` when `config` is None.
"""
if config is None:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
Expand All @@ -71,7 +77,31 @@ def check_configuration_loaded(config: AppConfig) -> None:


def get_system_prompt(query_request: QueryRequest, config: AppConfig) -> str:
"""Get the system prompt: the provided one, configured one, or default one."""
"""
Resolve which system prompt to use for a query.

Precedence:
1. If the request includes `system_prompt`, that value is returned (highest
precedence).
2. Else if the application configuration provides a customization
`system_prompt`, that value is returned.
3. Otherwise the module default `constants.DEFAULT_SYSTEM_PROMPT` is
returned (lowest precedence).

If configuration disables per-request system prompts
(config.customization.disable_query_system_prompt) and the incoming
`query_request` contains a `system_prompt`, an HTTP 422 Unprocessable
Entity is raised instructing the client to remove the field.

Parameters:
query_request (QueryRequest): The incoming query payload; may contain a
per-request `system_prompt`.
config (AppConfig): Application configuration which may include
customization flags and a default `system_prompt`.

Returns:
str: The resolved system prompt to apply to the request.
"""
system_prompt_disabled = (
config.customization is not None
and config.customization.disable_query_system_prompt
Expand Down Expand Up @@ -171,7 +201,46 @@ async def get_agent(
conversation_id: str | None,
no_tools: bool = False,
) -> tuple[AsyncAgent, str, str]:
"""Get existing agent or create a new one with session persistence."""
"""
Create or reuse an AsyncAgent with session persistence.

Return the agent, conversation and session IDs.

If a conversation_id is provided, the function attempts to retrieve the
existing agent and, on success, rebinds a newly created agent instance to
that conversation (deleting the temporary/orphan agent) and returns the
first existing session_id for the conversation. If no conversation_id is
provided or the existing agent cannot be retrieved, a new agent and session
are created.

Parameters:
model_id (str): Identifier of the model to instantiate the agent with.
system_prompt (str): Instructions/system prompt to initialize the agent with.

available_input_shields (list[str]): Input shields to apply to the
agent; empty list used if None/empty.

available_output_shields (list[str]): Output shields to apply to the
agent; empty list used if None/empty.

conversation_id (str | None): If provided, attempt to reuse the agent
for this conversation; otherwise a new conversation_id is created.

no_tools (bool): When True, disables tool parsing for the agent (uses no tool parser).

Returns:
tuple[AsyncAgent, str, str]: A tuple of (agent, conversation_id, session_id).

Raises:
HTTPException: Raises HTTP 404 Not Found if an attempt to reuse a
conversation succeeds in retrieving the agent but no sessions are found
for that conversation.

Side effects:
- May delete an orphan agent when rebinding a newly created agent to an
existing conversation_id.
- Initializes the agent and may create a new session.
"""
existing_agent_id = None
if conversation_id:
with suppress(ValueError):
Expand Down
28 changes: 26 additions & 2 deletions src/utils/llama_stack_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,15 @@ class InvalidLlamaStackVersionException(Exception):
async def check_llama_stack_version(
client: AsyncLlamaStackClient,
) -> None:
"""Check if the Llama Stack version is supported by the LCS."""
"""
Verify the connected Llama Stack's version is within the supported range.

This coroutine fetches the Llama Stack version from the
provided client and validates it against the configured minimal
and maximal supported versions. Raises
InvalidLlamaStackVersionException if the detected version is
outside the supported range.
"""
version_info = await client.inspect.version()
compare_versions(
version_info.version,
Expand All @@ -32,7 +40,23 @@ async def check_llama_stack_version(


def compare_versions(version_info: str, minimal: str, maximal: str) -> None:
"""Compare current Llama Stack version with minimal and maximal allowed versions."""
"""
Validate that a semver version string is within the inclusive [minimal, maximal] range.

Parses `version_info`, `minimal`, and `maximal` with semver.Version.parse
and compares them. If the current version is lower than `minimal` or
higher than `maximal`, an InvalidLlamaStackVersionException is raised.

Parameters:
version_info (str): Semver version string to validate (must be
parseable by semver.Version.parse).
minimal (str): Minimum allowed semver version (inclusive).
maximal (str): Maximum allowed semver version (inclusive).

Raises:
InvalidLlamaStackVersionException: If `version_info` is outside the
inclusive range defined by `minimal` and `maximal`.
"""
current_version = Version.parse(version_info)
minimal_version = Version.parse(minimal)
maximal_version = Version.parse(maximal)
Expand Down
Loading