diff --git a/docs/openapi.json b/docs/openapi.json index 26d57d5d..fa840281 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -121,7 +121,7 @@ "query" ], "summary": "Query Endpoint Handler", - "description": "Handle request to the /query endpoint.", + "description": "Handle request to the /query endpoint.\n\nProcesses a POST request to the /query endpoint, forwarding the\nuser's query to a selected Llama Stack LLM or agent and\nreturning the generated response.\n\nValidates configuration and authentication, selects the appropriate model\nand provider, retrieves the LLM response, updates metrics, and optionally\nstores a transcript of the interaction. Handles connection errors to the\nLlama Stack service by returning an HTTP 500 error.\n\nReturns:\n QueryResponse: Contains the conversation ID and the LLM-generated response.", "operationId": "query_endpoint_handler_v1_query_post", "requestBody": { "content": { @@ -192,7 +192,7 @@ "streaming_query" ], "summary": "Streaming Query Endpoint Handler", - "description": "Handle request to the /streaming_query endpoint.", + "description": "Handle request to the /streaming_query endpoint.\n\nThis endpoint receives a query request, authenticates the user,\nselects the appropriate model and provider, and streams\nincremental response events from the Llama Stack backend to the\nclient. Events include start, token updates, tool calls, turn\ncompletions, errors, and end-of-stream metadata. Optionally\nstores the conversation transcript if enabled in configuration.\n\nReturns:\n StreamingResponse: An HTTP streaming response yielding\n SSE-formatted events for the query lifecycle.\n\nRaises:\n HTTPException: Returns HTTP 500 if unable to connect to the\n Llama Stack server.", "operationId": "streaming_query_endpoint_handler_v1_streaming_query_post", "requestBody": { "content": { @@ -837,6 +837,48 @@ } ] }, + "CORSConfiguration": { + "properties": { + "allow_origins": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Allow Origins", + "default": [ + "*" + ] + }, + "allow_credentials": { + "type": "boolean", + "title": "Allow Credentials", + "default": false + }, + "allow_methods": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Allow Methods", + "default": [ + "*" + ] + }, + "allow_headers": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Allow Headers", + "default": [ + "*" + ] + } + }, + "type": "object", + "title": "CORSConfiguration", + "description": "CORS configuration." + }, "Configuration": { "properties": { "name": { @@ -1612,6 +1654,7 @@ }, "port": { "type": "integer", + "exclusiveMinimum": 0.0, "title": "Port", "default": 5432 }, @@ -1969,6 +2012,7 @@ }, "port": { "type": "integer", + "exclusiveMinimum": 0.0, "title": "Port", "default": 8080 }, @@ -1979,6 +2023,7 @@ }, "workers": { "type": "integer", + "exclusiveMinimum": 0.0, "title": "Workers", "default": 1 }, @@ -1995,6 +2040,21 @@ "tls_config": { "$ref": "#/components/schemas/TLSConfiguration", "default": {} + }, + "cors": { + "$ref": "#/components/schemas/CORSConfiguration", + "default": { + "allow_origins": [ + "*" + ], + "allow_credentials": false, + "allow_methods": [ + "*" + ], + "allow_headers": [ + "*" + ] + } } }, "type": "object", diff --git a/docs/openapi.md b/docs/openapi.md index eaefd368..9f9209aa 100644 --- a/docs/openapi.md +++ b/docs/openapi.md @@ -56,6 +56,16 @@ Returns: Handle requests to the /models endpoint. +Process GET requests to the /models endpoint, returning a list of available +models from the Llama Stack service. + +Raises: + HTTPException: If unable to connect to the Llama Stack server or if + model retrieval fails for any reason. + +Returns: + ModelsResponse: An object containing the list of available models. + @@ -73,6 +83,18 @@ Handle requests to the /models endpoint. Handle request to the /query endpoint. +Processes a POST request to the /query endpoint, forwarding the +user's query to a selected Llama Stack LLM or agent and +returning the generated response. + +Validates configuration and authentication, selects the appropriate model +and provider, retrieves the LLM response, updates metrics, and optionally +stores a transcript of the interaction. Handles connection errors to the +Llama Stack service by returning an HTTP 500 error. + +Returns: + QueryResponse: Contains the conversation ID and the LLM-generated response. + @@ -96,6 +118,21 @@ Handle request to the /query endpoint. Handle request to the /streaming_query endpoint. +This endpoint receives a query request, authenticates the user, +selects the appropriate model and provider, and streams +incremental response events from the Llama Stack backend to the +client. Events include start, token updates, tool calls, turn +completions, errors, and end-of-stream metadata. Optionally +stores the conversation transcript if enabled in configuration. + +Returns: + StreamingResponse: An HTTP streaming response yielding + SSE-formatted events for the query lifecycle. + +Raises: + HTTPException: Returns HTTP 500 if unable to connect to the + Llama Stack server. + @@ -138,6 +175,9 @@ Returns: Handle feedback requests. +Processes a user feedback submission, storing the feedback and +returning a confirmation response. + Args: feedback_request: The request containing feedback information. ensure_feedback_enabled: The feedback handler (FastAPI Depends) that @@ -148,6 +188,9 @@ Args: Returns: Response indicating the status of the feedback storage request. +Raises: + HTTPException: Returns HTTP 500 if feedback storage fails. + @@ -420,6 +463,20 @@ Attributes: | username | string | User name | +## CORSConfiguration + + +CORS configuration. + + +| Field | Type | Description | +|-------|------|-------------| +| allow_origins | array | | +| allow_credentials | boolean | | +| allow_methods | array | | +| allow_headers | array | | + + ## Configuration @@ -962,6 +1019,7 @@ Service configuration. | color_log | boolean | | | access_log | boolean | | | tls_config | | | +| cors | | | ## StatusResponse diff --git a/docs/output.md b/docs/output.md index 0757bbc8..d649c558 100644 --- a/docs/output.md +++ b/docs/output.md @@ -74,15 +74,27 @@ Returns: | Status Code | Description | Component | |-------------|-------------|-----------| -| 200 | Successful Response | [ModelsResponse](#modelsresponse) - | +| 200 | Successful Response | [ModelsResponse](#modelsresponse) | | 503 | Connection to Llama Stack is broken | | + ## POST `/v1/query` > **Query Endpoint Handler** Handle request to the /query endpoint. +Processes a POST request to the /query endpoint, forwarding the +user's query to a selected Llama Stack LLM or agent and +returning the generated response. + +Validates configuration and authentication, selects the appropriate model +and provider, retrieves the LLM response, updates metrics, and optionally +stores a transcript of the interaction. Handles connection errors to the +Llama Stack service by returning an HTTP 500 error. + +Returns: + QueryResponse: Contains the conversation ID and the LLM-generated response. + @@ -95,21 +107,32 @@ Handle request to the /query endpoint. | Status Code | Description | Component | |-------------|-------------|-----------| -| 200 | Successful Response | [QueryResponse](#queryresponse) - | -| 400 | Missing or invalid credentials provided by client | [UnauthorizedResponse](#unauthorizedresponse) - | -| 403 | User is not authorized | [ForbiddenResponse](#forbiddenresponse) - | +| 200 | Successful Response | [QueryResponse](#queryresponse) | +| 400 | Missing or invalid credentials provided by client | [UnauthorizedResponse](#unauthorizedresponse) | +| 403 | User is not authorized | [ForbiddenResponse](#forbiddenresponse) | | 503 | Service Unavailable | | -| 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) - | +| 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) | ## POST `/v1/streaming_query` > **Streaming Query Endpoint Handler** Handle request to the /streaming_query endpoint. +This endpoint receives a query request, authenticates the user, +selects the appropriate model and provider, and streams +incremental response events from the Llama Stack backend to the +client. Events include start, token updates, tool calls, turn +completions, errors, and end-of-stream metadata. Optionally +stores the conversation transcript if enabled in configuration. + +Returns: + StreamingResponse: An HTTP streaming response yielding + SSE-formatted events for the query lifecycle. + +Raises: + HTTPException: Returns HTTP 500 if unable to connect to the + Llama Stack server. + @@ -123,8 +146,7 @@ Handle request to the /streaming_query endpoint. | Status Code | Description | Component | |-------------|-------------|-----------| | 200 | Successful Response | ... | -| 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) - | +| 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) | ## GET `/v1/config` > **Config Endpoint Handler** @@ -145,8 +167,7 @@ Returns: | Status Code | Description | Component | |-------------|-------------|-----------| -| 200 | Successful Response | [Configuration](#configuration) - | +| 200 | Successful Response | [Configuration](#configuration) | | 503 | Service Unavailable | | ## POST `/v1/feedback` @@ -442,6 +463,20 @@ Attributes: | username | string | User name | +## CORSConfiguration + + +CORS configuration. + + +| Field | Type | Description | +|-------|------|-------------| +| allow_origins | array | | +| allow_credentials | boolean | | +| allow_methods | array | | +| allow_headers | array | | + + ## Configuration @@ -974,6 +1009,7 @@ Service configuration. | color_log | boolean | | | access_log | boolean | | | tls_config | | | +| cors | | | ## StatusResponse