Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/app/endpoints/info.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Handler for REST API call to provide info."""

import asyncio
import logging
from typing import Any, Optional
from typing import Any

from fastapi import APIRouter, Request

Expand Down
35 changes: 32 additions & 3 deletions src/app/endpoints/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,41 @@
from fastapi import APIRouter, Request
from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient

from models.responses import ModelsResponse

logger = logging.getLogger(__name__)
router = APIRouter(tags=["models"])


@router.get("/models")
def models_endpoint_handler(request: Request) -> list[dict]:
models_responses: dict[int | str, dict[str, Any]] = {
200: {
"models": [
{
"identifier": "all-MiniLM-L6-v2",
"metadata": {"embedding_dimension": 384},
"api_model_type": "embedding",
"provider_id": "ollama",
"provider_resource_id": "all-minilm:latest",
"type": "model",
"model_type": "embedding",
},
{
"identifier": "llama3.2:3b-instruct-fp16",
"metadata": {},
"api_model_type": "llm",
"provider_id": "ollama",
"provider_resource_id": "llama3.2:3b-instruct-fp16",
"type": "model",
"model_type": "llm",
},
]
},
}


@router.get("/models", responses=models_responses)
def models_endpoint_handler(request: Request) -> ModelsResponse:
client = LlamaStackClient(base_url="http://localhost:8321")
models = client.models.list()
return [dict(m) for m in models]
m = [dict(m) for m in models]
return ModelsResponse(models=m)
25 changes: 25 additions & 0 deletions src/app/endpoints/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Handler for REST API call to provide answer to query."""

import logging
from typing import Any

from fastapi import APIRouter, Request

from version import __version__
from models.responses import QueryResponse

logger = logging.getLogger(__name__)
router = APIRouter(tags=["models"])


query_response: dict[int | str, dict[str, Any]] = {
200: {
"query": "User query",
"answer": "LLM ansert",
},
}


@router.get("/query", responses=query_response)
def info_endpoint_handler(request: Request) -> QueryResponse:
return QueryResponse(query="foo", response="bar")
4 changes: 1 addition & 3 deletions src/app/endpoints/root.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
"""Handler for the / endpoint."""

import asyncio
import logging
from typing import Any, Optional

from fastapi import APIRouter, Request
from fastapi.responses import HTMLResponse
Expand All @@ -13,4 +11,4 @@

@router.get("/", response_class=HTMLResponse)
def root_endpoint_handler(request: Request) -> HTMLResponse:
return "<html>foo</html>"
return HTMLResponse("<html>foo</html>")
2 changes: 1 addition & 1 deletion src/app/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from fastapi import FastAPI, Request, Response
from fastapi import FastAPI
from app import routers
import version

Expand Down
3 changes: 2 additions & 1 deletion src/app/routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from fastapi import FastAPI

from app.endpoints import info, models, root
from app.endpoints import info, models, root, query


def include_routers(app: FastAPI) -> None:
Expand All @@ -14,3 +14,4 @@ def include_routers(app: FastAPI) -> None:
app.include_router(root.router)
app.include_router(info.router, prefix="/v1")
app.include_router(models.router, prefix="/v1")
app.include_router(query.router, prefix="/v1")
9 changes: 9 additions & 0 deletions src/models/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
from typing import Optional

from pydantic import BaseModel


class LLamaStackConfiguration(BaseModel):
"""Llama stack configuration."""

url: str


class Configuration(BaseModel):
"""Global service configuration."""

name: str
llama_stack: LLamaStackConfiguration
13 changes: 13 additions & 0 deletions src/models/responses.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
from pydantic import BaseModel


class ModelsResponse(BaseModel):
"""Model representing a response to models request."""

models: list[dict]


class QueryResponse(BaseModel):
"""Model representing LLM response to a query."""

query: str
response: str


class InfoResponse(BaseModel):
"""Model representing a response to a info request.

Expand Down
Loading