diff --git a/src/app/endpoints/info.py b/src/app/endpoints/info.py index 4605a95b..3197477f 100644 --- a/src/app/endpoints/info.py +++ b/src/app/endpoints/info.py @@ -1,8 +1,7 @@ """Handler for REST API call to provide info.""" -import asyncio import logging -from typing import Any, Optional +from typing import Any from fastapi import APIRouter, Request diff --git a/src/app/endpoints/models.py b/src/app/endpoints/models.py index 6fc2830f..546f7ad6 100644 --- a/src/app/endpoints/models.py +++ b/src/app/endpoints/models.py @@ -7,12 +7,41 @@ from fastapi import APIRouter, Request from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient +from models.responses import ModelsResponse + logger = logging.getLogger(__name__) router = APIRouter(tags=["models"]) -@router.get("/models") -def models_endpoint_handler(request: Request) -> list[dict]: +models_responses: dict[int | str, dict[str, Any]] = { + 200: { + "models": [ + { + "identifier": "all-MiniLM-L6-v2", + "metadata": {"embedding_dimension": 384}, + "api_model_type": "embedding", + "provider_id": "ollama", + "provider_resource_id": "all-minilm:latest", + "type": "model", + "model_type": "embedding", + }, + { + "identifier": "llama3.2:3b-instruct-fp16", + "metadata": {}, + "api_model_type": "llm", + "provider_id": "ollama", + "provider_resource_id": "llama3.2:3b-instruct-fp16", + "type": "model", + "model_type": "llm", + }, + ] + }, +} + + +@router.get("/models", responses=models_responses) +def models_endpoint_handler(request: Request) -> ModelsResponse: client = LlamaStackClient(base_url="http://localhost:8321") models = client.models.list() - return [dict(m) for m in models] + m = [dict(m) for m in models] + return ModelsResponse(models=m) diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py new file mode 100644 index 00000000..a2b4e357 --- /dev/null +++ b/src/app/endpoints/query.py @@ -0,0 +1,25 @@ +"""Handler for REST API call to provide answer to query.""" + +import logging +from typing import Any + +from fastapi import APIRouter, Request + +from version import __version__ +from models.responses import QueryResponse + +logger = logging.getLogger(__name__) +router = APIRouter(tags=["models"]) + + +query_response: dict[int | str, dict[str, Any]] = { + 200: { + "query": "User query", + "answer": "LLM ansert", + }, +} + + +@router.get("/query", responses=query_response) +def info_endpoint_handler(request: Request) -> QueryResponse: + return QueryResponse(query="foo", response="bar") diff --git a/src/app/endpoints/root.py b/src/app/endpoints/root.py index 742e5968..f3bf6c8d 100644 --- a/src/app/endpoints/root.py +++ b/src/app/endpoints/root.py @@ -1,8 +1,6 @@ """Handler for the / endpoint.""" -import asyncio import logging -from typing import Any, Optional from fastapi import APIRouter, Request from fastapi.responses import HTMLResponse @@ -13,4 +11,4 @@ @router.get("/", response_class=HTMLResponse) def root_endpoint_handler(request: Request) -> HTMLResponse: - return "foo" + return HTMLResponse("foo") diff --git a/src/app/main.py b/src/app/main.py index 607d3413..3527022e 100644 --- a/src/app/main.py +++ b/src/app/main.py @@ -1,4 +1,4 @@ -from fastapi import FastAPI, Request, Response +from fastapi import FastAPI from app import routers import version diff --git a/src/app/routers.py b/src/app/routers.py index 8b651844..9aa1dee2 100644 --- a/src/app/routers.py +++ b/src/app/routers.py @@ -2,7 +2,7 @@ from fastapi import FastAPI -from app.endpoints import info, models, root +from app.endpoints import info, models, root, query def include_routers(app: FastAPI) -> None: @@ -14,3 +14,4 @@ def include_routers(app: FastAPI) -> None: app.include_router(root.router) app.include_router(info.router, prefix="/v1") app.include_router(models.router, prefix="/v1") + app.include_router(query.router, prefix="/v1") diff --git a/src/models/config.py b/src/models/config.py index dd60a447..3888569c 100644 --- a/src/models/config.py +++ b/src/models/config.py @@ -1,7 +1,16 @@ +from typing import Optional + from pydantic import BaseModel +class LLamaStackConfiguration(BaseModel): + """Llama stack configuration.""" + + url: str + + class Configuration(BaseModel): """Global service configuration.""" name: str + llama_stack: LLamaStackConfiguration diff --git a/src/models/responses.py b/src/models/responses.py index eacdc137..30f921c9 100644 --- a/src/models/responses.py +++ b/src/models/responses.py @@ -1,6 +1,19 @@ from pydantic import BaseModel +class ModelsResponse(BaseModel): + """Model representing a response to models request.""" + + models: list[dict] + + +class QueryResponse(BaseModel): + """Model representing LLM response to a query.""" + + query: str + response: str + + class InfoResponse(BaseModel): """Model representing a response to a info request.