Skip to content

Commit

Permalink
Remove generate endpoints
Browse files Browse the repository at this point in the history
Signed-off-by: Curtis Maddalozzo <cmaddalozzo@bloomberg.net>
  • Loading branch information
cmaddalozzo committed Apr 29, 2024
1 parent 622f32f commit d8bccc2
Show file tree
Hide file tree
Showing 5 changed files with 0 additions and 370 deletions.
6 changes: 0 additions & 6 deletions python/kserve/kserve/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,12 +417,6 @@ async def predict(
else res
)

async def generate(
self, payload: GenerateRequest, headers: Dict[str, str] = None
) -> Union[GenerateResponse, AsyncIterator[Any]]:
"""`generate` handler can be overridden to implement text generation."""
raise NotImplementedError("generate is not implemented")

async def explain(self, payload: Dict, headers: Dict[str, str] = None) -> Dict:
"""`explain` handler can be overridden to implement the model explanation.
The default implementation makes call to the explainer if ``explainer_host`` is specified.
Expand Down
25 changes: 0 additions & 25 deletions python/kserve/kserve/protocol/dataplane.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from ..model_repository import ModelRepository
from ..utils.utils import create_response_cloudevent, is_structured_cloudevent
from .infer_type import InferRequest, InferResponse
from .rest.v2_datamodels import GenerateRequest, GenerateResponse

JSON_HEADERS = [
"application/json",
Expand Down Expand Up @@ -340,30 +339,6 @@ async def infer(
response = await model(request, headers=headers)
return response, headers

async def generate(
self,
model_name: str,
request: Union[Dict, GenerateRequest],
headers: Optional[Dict[str, str]] = None,
) -> Tuple[Union[GenerateResponse, AsyncIterator[Any]], Dict[str, str]]:
"""Generate the text with the provided text prompt.
Args:
model_name (str): Model name.
request (bytes|GenerateRequest): Generate Request / ChatCompletion Request body data.
headers: (Optional[Dict[str, str]]): Request headers.
Returns:
response: The generated output or output stream.
response_headers: Headers to construct the HTTP response.
Raises:
InvalidInput: An error when the body bytes can't be decoded as JSON.
"""
model = self.get_model(model_name)
response = await model.generate(request, headers=headers)
return response, headers

async def explain(
self,
model_name: str,
Expand Down
12 changes: 0 additions & 12 deletions python/kserve/kserve/protocol/rest/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,18 +177,6 @@ def create_application(self) -> FastAPI:
response_model=InferenceResponse,
tags=["V2"],
),
FastAPIRoute(
r"/v2/models/{model_name}/generate",
v2_endpoints.generate,
methods=["POST"],
tags=["V2"],
),
FastAPIRoute(
r"/v2/models/{model_name}/generate_stream",
v2_endpoints.generate_stream,
methods=["POST"],
tags=["V2"],
),
FastAPIRoute(
r"/v2/models/{model_name}/versions/{model_version}/infer",
v2_endpoints.infer,
Expand Down
217 changes: 0 additions & 217 deletions python/kserve/kserve/protocol/rest/v2_datamodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,220 +283,3 @@ class InferenceResponse(BaseModel):

class Config:
schema_extra = inference_response_schema_extra


generate_request_schema_extra = {
"example": {
"text_input": "Tell me about the AI",
"parameters": {
"temperature": 0.8,
"top_p": 0.9,
},
}
}


class GenerateRequest(BaseModel):
"""GenerateRequest Model
$generate_request =
{
"text_input" : $string,
"parameters" : $string #optional,
}
"""

text_input: str
parameters: Optional[Parameters] = None

if is_pydantic_2:
model_config = ConfigDict(json_schema_extra=generate_request_schema_extra)
else:

class Config:
json_loads = orjson.loads
schema_extra = generate_request_schema_extra


token_schema_extra = {
"example": {
"id": 267,
"logprob": -2.0723474,
"special": False,
"text": " a",
}
}


class Token(BaseModel):
"""Token Data Model"""

id: int
logprob: float
special: bool
text: str

if is_pydantic_2:
model_config = ConfigDict(json_schema_extra=token_schema_extra)
else:

class Config:
json_loads = orjson.loads
schema_extra = token_schema_extra


details_schema_extra = {
"example": {
"finish_reason": "stop",
"logprobs": [
{
"id": 267,
"logprob": -2.0723474,
"special": False,
"text": " a",
}
],
}
}


class Details(BaseModel):
"""Generate response details"""

finish_reason: str
logprobs: List[Token]

if is_pydantic_2:
model_config = ConfigDict(
json_schema_extra=details_schema_extra,
)
else:

class Config:
json_loads = orjson.loads
schema_extra = details_schema_extra


streaming_details_schema_extra = {
"example": {
"finish_reason": "stop",
"logprobs": {
"id": 267,
"logprob": -2.0723474,
"special": False,
"text": " a",
},
}
}


class StreamingDetails(BaseModel):
"""Generate response details"""

finish_reason: str
logprobs: Token

if is_pydantic_2:
model_config = ConfigDict(
json_schema_extra=streaming_details_schema_extra,
)
else:

class Config:
json_loads = orjson.loads
schema_extra = streaming_details_schema_extra


generate_response_schema_extra = {
"example": {
"text_output": "Tell me about the AI",
"model_name": "bloom7b1",
"details": {
"finish_reason": "stop",
"logprobs": [
{
"id": "267",
"logprob": -2.0723474,
"special": False,
"text": " a",
}
],
},
}
}


class GenerateResponse(BaseModel):
"""GenerateResponse Model
$generate_response =
{
"text_output" : $string,
"model_name" : $string,
"model_version" : $string #optional,
"details": $Details #optional
}
"""

text_output: str
model_name: str
model_version: Optional[str] = None
details: Optional[Details] = None

if is_pydantic_2:
model_config = ConfigDict(
protected_namespaces=(),
json_schema_extra=generate_response_schema_extra,
)
else:

class Config:
json_loads = orjson.loads
schema_extra = generate_response_schema_extra


generate_streaming_response_schema_extra = {
"example": {
"text_output": "Tell me about the AI",
"model_name": "bloom7b1",
"details": {
"finish_reason": "stop",
"logprobs": {
"id": "267",
"logprob": -2.0723474,
"special": False,
"text": " a",
},
},
}
}


class GenerateStreamingResponse(BaseModel):
"""GenerateStreamingResponse Model
$generate_response =
{
"text_output" : $string,
"model_name" : $string,
"model_version" : $string #optional,
"details": $Details #optional
}
"""

text_output: str
model_name: str
model_version: Optional[str] = None
details: Optional[StreamingDetails] = None

if is_pydantic_2:
model_config = ConfigDict(
protected_namespaces=(),
json_schema_extra=generate_streaming_response_schema_extra,
)

else:

class Config:
json_loads = orjson.loads
schema_extra = generate_streaming_response_schema_extra
Loading

0 comments on commit d8bccc2

Please sign in to comment.