From ae03b7a908f20c81b47286a6b8d67cb876390ce9 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 20 Dec 2024 16:38:34 +0800 Subject: [PATCH] fix --- examples/deploy/server/demo.sh | 3 ++- examples/eval/vlm/eval.sh | 2 +- examples/train/tuners/galore/train_qgalore.sh | 1 + examples/train/tuners/unsloth/train.sh | 2 +- swift/llm/infer/deploy.py | 8 ++++++++ 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/examples/deploy/server/demo.sh b/examples/deploy/server/demo.sh index dc81ec4c17..39c61d22e8 100644 --- a/examples/deploy/server/demo.sh +++ b/examples/deploy/server/demo.sh @@ -1,6 +1,7 @@ CUDA_VISIBLE_DEVICES=0 swift deploy \ --model Qwen/Qwen2.5-7B-Instruct \ - --infer_backend vllm + --infer_backend vllm \ + --served_model_name Qwen2.5-7B-Instruct # After the server-side deployment above is successful, use the command below to perform a client call test. diff --git a/examples/eval/vlm/eval.sh b/examples/eval/vlm/eval.sh index de23258cd7..e92ad8e960 100644 --- a/examples/eval/vlm/eval.sh +++ b/examples/eval/vlm/eval.sh @@ -3,4 +3,4 @@ swift eval \ --model Qwen/Qwen2-VL-2B-Instruct \ --infer_backend pt \ --eval_limit 100 \ - --eval_dataset MME + --eval_dataset realWorldQA diff --git a/examples/train/tuners/galore/train_qgalore.sh b/examples/train/tuners/galore/train_qgalore.sh index 366e6da099..a250c3a175 100644 --- a/examples/train/tuners/galore/train_qgalore.sh +++ b/examples/train/tuners/galore/train_qgalore.sh @@ -1,4 +1,5 @@ # 35GiB +# pip install bitsandbytes==0.40.0 CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ diff --git a/examples/train/tuners/unsloth/train.sh b/examples/train/tuners/unsloth/train.sh index 4f742eec4e..87adf7ff12 100644 --- a/examples/train/tuners/unsloth/train.sh +++ b/examples/train/tuners/unsloth/train.sh @@ -1,4 +1,4 @@ -# 9GiB +# 17GiB CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ diff --git a/swift/llm/infer/deploy.py b/swift/llm/infer/deploy.py index 6adfcc5674..1d1956da13 100644 --- a/swift/llm/infer/deploy.py +++ b/swift/llm/infer/deploy.py @@ -119,6 +119,13 @@ def _post_process(self, request_info, response, return_cmpl_response: bool = Fal self.jsonl_writer.append(data) return response + def _set_request_config(self, request_config) -> None: + default_request_config = self.args.get_request_config() + for key, val in asdict(request_config).items(): + default_val = getattr(default_request_config, key) + if default_val is not None and (val is None or isinstance(val, (list, tuple)) and len(val) == 0): + setattr(request_config, key, default_val) + async def create_chat_completion(self, request: ChatCompletionRequest, raw_request: Request, @@ -135,6 +142,7 @@ async def create_chat_completion(self, infer_kwargs['adapter_request'] = adapter_request infer_request, request_config = request.parse() + self._set_request_config(request_config) request_info = {'infer_request': infer_request.to_printable()} def pre_infer_hook(kwargs):