Skip to content

Commit

Permalink
feat: receive both request and body
Browse files Browse the repository at this point in the history
Signed-off-by: TomeHirata <tomu.hirata@gmail.com>
  • Loading branch information
TomeHirata committed Jan 22, 2024
1 parent bd3e0ae commit e0287cb
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 9 deletions.
2 changes: 1 addition & 1 deletion examples/deployments/deployments_server/openai/config.yaml
Expand Up @@ -8,7 +8,7 @@ endpoints:
openai_api_key: $OPENAI_API_KEY
limit:
renewal_period: "minute"
calls: 1
calls: 10

- name: completions
endpoint_type: llm/v1/completions
Expand Down
11 changes: 5 additions & 6 deletions mlflow/deployments/server/app.py
Expand Up @@ -91,9 +91,8 @@ def _create_chat_endpoint(config: RouteConfig):

# https://slowapi.readthedocs.io/en/latest/#limitations-and-known-issues
async def _chat(
request: Request,
request: Request, payload: chat.RequestPayload
) -> Union[chat.ResponsePayload, chat.StreamResponsePayload]:
payload = await parse_request_schema(request, chat.RequestPayload)
if payload.stream:
return await make_streaming_response(prov.chat_stream(payload))
else:
Expand All @@ -106,9 +105,8 @@ def _create_completions_endpoint(config: RouteConfig):
prov = get_provider(config.model.provider)(config)

async def _completions(
request: Request,
request: Request, payload: completions.RequestPayload
) -> Union[completions.ResponsePayload, completions.StreamResponsePayload]:
payload = await parse_request_schema(request, completions.RequestPayload)
if payload.stream:
return await make_streaming_response(prov.completions_stream(payload))
else:
Expand All @@ -120,8 +118,9 @@ async def _completions(
def _create_embeddings_endpoint(config: RouteConfig):
prov = get_provider(config.model.provider)(config)

async def _embeddings(request: Request) -> embeddings.ResponsePayload:
payload = await parse_request_schema(request, embeddings.RequestPayload)
async def _embeddings(
request: Request, payload: embeddings.RequestPayload
) -> embeddings.ResponsePayload:
return await prov.embeddings(payload)

return _embeddings
Expand Down
5 changes: 3 additions & 2 deletions mlflow/gateway/config.py
Expand Up @@ -8,7 +8,6 @@

import pydantic
import yaml
from limits import parse
from packaging import version
from packaging.version import Version
from pydantic import ConfigDict, Field, ValidationError, root_validator, validator
Expand Down Expand Up @@ -401,6 +400,8 @@ def validate_route_type(cls, value):

@validator("limit", pre=True)
def validate_limit(cls, value):
from limits import parse

if value:
limit = Limit(**value)
try:
Expand Down Expand Up @@ -452,7 +453,7 @@ class Route(ConfigModel):
route_type: str
model: RouteModelInfo
route_url: str
limit: Optional[Limit]
limit: Optional[Limit] = None

class Config:
if IS_PYDANTIC_V2:
Expand Down

0 comments on commit e0287cb

Please sign in to comment.