Skip to content

Commit

Permalink
align vllm-ray response format to tgi response format (#452)
Browse files Browse the repository at this point in the history
* align vllm-ray response format to tgi response format

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: changwangss <sys_lpot_val@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Aug 11, 2024
1 parent ed776ac commit ac4a777
Showing 1 changed file with 4 additions and 22 deletions.
26 changes: 4 additions & 22 deletions comps/llms/text-generation/vllm-ray/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,6 @@
from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice


@traceable(run_type="tool")
def post_process_text(text: str):
if text == " ":
return "data: @#$\n\n"
if text == "\n":
return "data: <br/>\n\n"
if text.isspace():
return None
new_text = text.replace(" ", "@#$")
return f"data: {new_text}\n\n"


@register_microservice(
name="opea_service@llm_vllm_ray",
service_type=ServiceType.LLM,
Expand All @@ -56,19 +44,13 @@ def llm_generate(input: LLMParamsDoc):

if input.streaming:

async def stream_generator():
def stream_generator():
chat_response = ""
async for text in llm.astream(input.query):
for text in llm.stream(input.query):
text = text.content
chat_response += text
processed_text = post_process_text(text)
if text and processed_text:
if "</s>" in text:
res = text.split("</s>")[0]
if res != "":
yield res
break
yield processed_text
chunk_repr = repr(text.encode("utf-8"))
yield f"data: {chunk_repr}\n\n"
print(f"[llm - chat_stream] stream response: {chat_response}")
yield "data: [DONE]\n\n"

Expand Down

0 comments on commit ac4a777

Please sign in to comment.