From ecfab5ab62a05a8b89c5e6b1ac14d9e1ea1b25e4 Mon Sep 17 00:00:00 2001 From: Pavel Tisnovsky Date: Wed, 7 May 2025 11:10:33 +0200 Subject: [PATCH 1/5] Fixed imports and types --- src/app/endpoints/info.py | 3 +-- src/app/endpoints/root.py | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/app/endpoints/info.py b/src/app/endpoints/info.py index 4605a95b..3197477f 100644 --- a/src/app/endpoints/info.py +++ b/src/app/endpoints/info.py @@ -1,8 +1,7 @@ """Handler for REST API call to provide info.""" -import asyncio import logging -from typing import Any, Optional +from typing import Any from fastapi import APIRouter, Request diff --git a/src/app/endpoints/root.py b/src/app/endpoints/root.py index 742e5968..f3bf6c8d 100644 --- a/src/app/endpoints/root.py +++ b/src/app/endpoints/root.py @@ -1,8 +1,6 @@ """Handler for the / endpoint.""" -import asyncio import logging -from typing import Any, Optional from fastapi import APIRouter, Request from fastapi.responses import HTMLResponse @@ -13,4 +11,4 @@ @router.get("/", response_class=HTMLResponse) def root_endpoint_handler(request: Request) -> HTMLResponse: - return "foo" + return HTMLResponse("foo") From cedd5dc177acc45d15c841472f179e9fdd5f6036 Mon Sep 17 00:00:00 2001 From: Pavel Tisnovsky Date: Wed, 7 May 2025 11:11:00 +0200 Subject: [PATCH 2/5] Query route --- src/app/main.py | 2 +- src/app/routers.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/app/main.py b/src/app/main.py index 607d3413..3527022e 100644 --- a/src/app/main.py +++ b/src/app/main.py @@ -1,4 +1,4 @@ -from fastapi import FastAPI, Request, Response +from fastapi import FastAPI from app import routers import version diff --git a/src/app/routers.py b/src/app/routers.py index 8b651844..9aa1dee2 100644 --- a/src/app/routers.py +++ b/src/app/routers.py @@ -2,7 +2,7 @@ from fastapi import FastAPI -from app.endpoints import info, models, root +from app.endpoints import info, models, root, query def include_routers(app: FastAPI) -> None: @@ -14,3 +14,4 @@ def include_routers(app: FastAPI) -> None: app.include_router(root.router) app.include_router(info.router, prefix="/v1") app.include_router(models.router, prefix="/v1") + app.include_router(query.router, prefix="/v1") From 71f99c94ac3838c48fa5e405cb6d3e75a57a4cc9 Mon Sep 17 00:00:00 2001 From: Pavel Tisnovsky Date: Wed, 7 May 2025 11:11:26 +0200 Subject: [PATCH 3/5] Proper model list respone and query response --- src/models/responses.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/models/responses.py b/src/models/responses.py index eacdc137..30f921c9 100644 --- a/src/models/responses.py +++ b/src/models/responses.py @@ -1,6 +1,19 @@ from pydantic import BaseModel +class ModelsResponse(BaseModel): + """Model representing a response to models request.""" + + models: list[dict] + + +class QueryResponse(BaseModel): + """Model representing LLM response to a query.""" + + query: str + response: str + + class InfoResponse(BaseModel): """Model representing a response to a info request. From c33d48e403d0c59bf4959c62f7f62d263ec41cb7 Mon Sep 17 00:00:00 2001 From: Pavel Tisnovsky Date: Wed, 7 May 2025 11:11:57 +0200 Subject: [PATCH 4/5] Model for model list --- src/app/endpoints/models.py | 35 ++++++++++++++++++++++++++++++++--- src/models/config.py | 9 +++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/src/app/endpoints/models.py b/src/app/endpoints/models.py index 6fc2830f..546f7ad6 100644 --- a/src/app/endpoints/models.py +++ b/src/app/endpoints/models.py @@ -7,12 +7,41 @@ from fastapi import APIRouter, Request from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient +from models.responses import ModelsResponse + logger = logging.getLogger(__name__) router = APIRouter(tags=["models"]) -@router.get("/models") -def models_endpoint_handler(request: Request) -> list[dict]: +models_responses: dict[int | str, dict[str, Any]] = { + 200: { + "models": [ + { + "identifier": "all-MiniLM-L6-v2", + "metadata": {"embedding_dimension": 384}, + "api_model_type": "embedding", + "provider_id": "ollama", + "provider_resource_id": "all-minilm:latest", + "type": "model", + "model_type": "embedding", + }, + { + "identifier": "llama3.2:3b-instruct-fp16", + "metadata": {}, + "api_model_type": "llm", + "provider_id": "ollama", + "provider_resource_id": "llama3.2:3b-instruct-fp16", + "type": "model", + "model_type": "llm", + }, + ] + }, +} + + +@router.get("/models", responses=models_responses) +def models_endpoint_handler(request: Request) -> ModelsResponse: client = LlamaStackClient(base_url="http://localhost:8321") models = client.models.list() - return [dict(m) for m in models] + m = [dict(m) for m in models] + return ModelsResponse(models=m) diff --git a/src/models/config.py b/src/models/config.py index dd60a447..3888569c 100644 --- a/src/models/config.py +++ b/src/models/config.py @@ -1,7 +1,16 @@ +from typing import Optional + from pydantic import BaseModel +class LLamaStackConfiguration(BaseModel): + """Llama stack configuration.""" + + url: str + + class Configuration(BaseModel): """Global service configuration.""" name: str + llama_stack: LLamaStackConfiguration From 0b5e683257dc9b2fd31ffc8b47407c3b6b4e82e0 Mon Sep 17 00:00:00 2001 From: Pavel Tisnovsky Date: Wed, 7 May 2025 11:12:18 +0200 Subject: [PATCH 5/5] Query response endpoint --- src/app/endpoints/query.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/app/endpoints/query.py diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py new file mode 100644 index 00000000..a2b4e357 --- /dev/null +++ b/src/app/endpoints/query.py @@ -0,0 +1,25 @@ +"""Handler for REST API call to provide answer to query.""" + +import logging +from typing import Any + +from fastapi import APIRouter, Request + +from version import __version__ +from models.responses import QueryResponse + +logger = logging.getLogger(__name__) +router = APIRouter(tags=["models"]) + + +query_response: dict[int | str, dict[str, Any]] = { + 200: { + "query": "User query", + "answer": "LLM ansert", + }, +} + + +@router.get("/query", responses=query_response) +def info_endpoint_handler(request: Request) -> QueryResponse: + return QueryResponse(query="foo", response="bar")