Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

poor quality output for qwen 72b #37

Open
pseudotensor opened this issue May 25, 2024 · 3 comments
Open

poor quality output for qwen 72b #37

pseudotensor opened this issue May 25, 2024 · 3 comments

Comments

@pseudotensor
Copy link

pseudotensor commented May 25, 2024

server:

export CUDA_VISIBLE_DEVICES="3,4,5,6"
python -m sglang.launch_server --model-path lmms-lab/llava-next-72b --tokenizer-path lmms-lab/llavanext-qwen-tokenizer --port=30010 --host="0.0.0.0" --tp-size=4 --random-seed=1234 --context-length=32768 &> 72b.log &

client:

"""
Usage:
# Installing latest llava-next: pip install git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
# Installing latest sglang.

# Endpoint Service CLI:
# python -m sglang.launch_server --model-path lmms-lab/llava-next-72b --tokenizer-path lmms-lab/llavanext-qwen-tokenizer --port=30000 --host="127.0.0.1" --tp-size=4

python3 http_qwen_llava_test.py

Output:
"Two children pose with a large teddy bear, one holding a smaller stuffed bear, in a room with an American flag and potted plants."
"""

import argparse
import asyncio
import json
import time
import copy

import aiohttp
import requests

from llava.conversation import (
    default_conversation,
    conv_templates,
    SeparatorStyle,
    conv_llava_llama_3,
    conv_qwen,
)


async def send_request(url, data, delay=0):
    await asyncio.sleep(delay)
    async with aiohttp.ClientSession() as session:
        async with session.post(url, json=data) as resp:
            output = await resp.json()
    return output


async def test_concurrent(args):
    url = f"{args.host}:{args.port}"

    prompt = "<image>\nPlease generate caption towards this image."
    conv_template = copy.deepcopy(conv_qwen)
    conv_template.append_message(role="user", message=prompt)
    prompt_with_template = conv_template.get_prompt()
    response = []
    for i in range(1):
        response.append(
            send_request(
                url + "/generate",
                {
                    "text": prompt_with_template,
                    "image_data": "https://farm4.staticflickr.com/3175/2653711032_804ff86d81_z.jpg",
                    "sampling_params": {
                        "max_new_tokens": 1024,
                        "temperature": 0,
                        "top_p": 1.0,
                        "presence_penalty": 2,
                        "frequency_penalty": 2,
                        "stop": "<|im_end|>",
                    },
                },
            )
        )

    rets = await asyncio.gather(*response)
    for ret in rets:
        print(ret["text"])


def test_streaming(args):
    url = f"{args.host}:{args.port}"
    prompt = "<image>\nGive detailed information."
    conv_template = copy.deepcopy(conv_qwen)
    conv_template.append_message(role="user", message=prompt)
    prompt_with_template = conv_template.get_prompt()
    pload = {
        "text": prompt_with_template,
        "sampling_params": {
            "max_new_tokens": 1024,
            "temperature": 0,
            "top_p": 1.0,
            "presence_penalty": 2,
            "frequency_penalty": 2,
            "stop": "<|im_end|>",
        },
        #"image_data": "https://farm4.staticflickr.com/3175/2653711032_804ff86d81_z.jpg",
        "image_data": "https://h2o-release.s3.amazonaws.com/h2ogpt/bigben.png",
        "stream": True,
    }
    response = requests.post(
        url + "/generate",
        json=pload,
        stream=True,
    )

    prev = 0
    for chunk in response.iter_lines(decode_unicode=False):
        chunk = chunk.decode("utf-8")
        if chunk and chunk.startswith("data:"):
            if chunk == "data: [DONE]":
                break
            data = json.loads(chunk[5:].strip("\n"))
            output = data["text"].strip()
            print(output[prev:], end="", flush=True)
            prev = len(output)
    print("")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--host", type=str, default="http://0.0.0.0")
    parser.add_argument("--port", type=int, default=80)
    args = parser.parse_args()
    # asyncio.run(test_concurrent(args))
    test_streaming(args)

just gives:

Big Ben

No matter how I prompt, the output is extremely terse even if accurate.

I changed the image, but otherwise this is the default script from sglang: https://github.com/sgl-project/sglang/blob/main/examples/usage/llava/http_qwen_llava_test.py

If I try increasing temperature to 0.5, I get no response at all and it just fails:

INFO:     172.16.0.42:27134 - "POST /generate HTTP/1.1" 200 OK
ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/responses.py", line 265, in __call__
    await wrap(partial(self.listen_for_disconnect, receive))
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/responses.py", line 261, in wrap
    await func()
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/responses.py", line 238, in listen_for_disconnect
    message = await receive()
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/uvicorn/protocols/http/httptools_impl.py", line 568, in receive
    await self.message_event.wait()
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/asyncio/locks.py", line 214, in wait
    await fut
asyncio.exceptions.CancelledError: Cancelled by cancel scope 7dcc7e79ada0

During handling of the above exception, another exception occurred:

  + Exception Group Traceback (most recent call last):
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/uvicorn/protocols/http/httptools_impl.py", line 411, in run_asgi
  |     result = await app(  # type: ignore[func-returns-value]
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 69, in __call__
  |     return await self.app(scope, receive, send)
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/fastapi/applications.py", line 1054, in __call__
  |     await super().__call__(scope, receive, send)
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/applications.py", line 123, in __call__
  |     await self.middleware_stack(scope, receive, send)
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/middleware/errors.py", line 186, in __call__
  |     raise exc
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/middleware/errors.py", line 164, in __call__
  |     await self.app(scope, receive, _send)
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 65, in __call__
  |     await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
  |     raise exc
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  |     await app(scope, receive, sender)
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/routing.py", line 756, in __call__
  |     await self.middleware_stack(scope, receive, send)
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/routing.py", line 776, in app
  |     await route.handle(scope, receive, send)
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/routing.py", line 297, in handle
  |     await self.app(scope, receive, send)
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/routing.py", line 77, in app
  |     await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
  |     raise exc
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  |     await app(scope, receive, sender)
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/routing.py", line 75, in app
  |     await response(scope, receive, send)
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/responses.py", line 258, in __call__
  |     async with anyio.create_task_group() as task_group:
  |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 678, in __aexit__
  |     raise BaseExceptionGroup(
  | exceptiongroup.ExceptionGroup: unhandled errors in a TaskGroup (1 sub-exception)
  +-+---------------- 1 ----------------
  +-+---------------- 1 ----------------
    | Traceback (most recent call last):
    |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/responses.py", line 261, in wrap
    |     await func()
    |   File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/starlette/responses.py", line 250, in stream_response
    |     async for chunk in self.body_iterator:
    |   File "/home/ubuntu/sglang/python/sglang/srt/server.py", line 89, in stream_results
    |     async for out in tokenizer_manager.generate_request(obj, request):
    |   File "/home/ubuntu/sglang/python/sglang/srt/managers/tokenizer_manager.py", line 143, in generate_request
    |     pixel_values, image_hash, image_size = await self.get_pixel_values(
    | TypeError: cannot unpack non-iterable NoneType object
    +------------------------------------

I don't understand what it means by the TypeError just because of temperature=0.5, very odd. Is that sglang's fault?

@Luodian
Copy link
Contributor

Luodian commented May 27, 2024

@pseudotensor

I made a fix of the template issue at sglang. You could check the new examples folder with updated code.

https://github.com/sgl-project/sglang/blob/main/examples/usage/llava/

@pseudotensor
Copy link
Author

Thanks, trying latest sglang, latest llava-next, and what I think your change to the script was -- i.e. assistant None as last appended thing.

@pseudotensor
Copy link
Author

Yes, that maybe fixed the problem. Any idea about the TypeError or size error?

sgl-project/sglang#474
sgl-project/sglang#473

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants