From ae03b7a908f20c81b47286a6b8d67cb876390ce9 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Fri, 20 Dec 2024 16:38:34 +0800
Subject: [PATCH] fix

---
 examples/deploy/server/demo.sh                | 3 ++-
 examples/eval/vlm/eval.sh                     | 2 +-
 examples/train/tuners/galore/train_qgalore.sh | 1 +
 examples/train/tuners/unsloth/train.sh        | 2 +-
 swift/llm/infer/deploy.py                     | 8 ++++++++
 5 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/examples/deploy/server/demo.sh b/examples/deploy/server/demo.sh
index dc81ec4c17..39c61d22e8 100644
--- a/examples/deploy/server/demo.sh
+++ b/examples/deploy/server/demo.sh
@@ -1,6 +1,7 @@
 CUDA_VISIBLE_DEVICES=0 swift deploy \
     --model Qwen/Qwen2.5-7B-Instruct \
-    --infer_backend vllm
+    --infer_backend vllm \
+    --served_model_name Qwen2.5-7B-Instruct
 
 # After the server-side deployment above is successful, use the command below to perform a client call test.
 
diff --git a/examples/eval/vlm/eval.sh b/examples/eval/vlm/eval.sh
index de23258cd7..e92ad8e960 100644
--- a/examples/eval/vlm/eval.sh
+++ b/examples/eval/vlm/eval.sh
@@ -3,4 +3,4 @@ swift eval \
   --model Qwen/Qwen2-VL-2B-Instruct \
   --infer_backend pt \
   --eval_limit 100 \
-  --eval_dataset MME
+  --eval_dataset realWorldQA
diff --git a/examples/train/tuners/galore/train_qgalore.sh b/examples/train/tuners/galore/train_qgalore.sh
index 366e6da099..a250c3a175 100644
--- a/examples/train/tuners/galore/train_qgalore.sh
+++ b/examples/train/tuners/galore/train_qgalore.sh
@@ -1,4 +1,5 @@
 # 35GiB
+# pip install bitsandbytes==0.40.0
 CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
diff --git a/examples/train/tuners/unsloth/train.sh b/examples/train/tuners/unsloth/train.sh
index 4f742eec4e..87adf7ff12 100644
--- a/examples/train/tuners/unsloth/train.sh
+++ b/examples/train/tuners/unsloth/train.sh
@@ -1,4 +1,4 @@
-# 9GiB
+# 17GiB
 CUDA_VISIBLE_DEVICES=0 \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
diff --git a/swift/llm/infer/deploy.py b/swift/llm/infer/deploy.py
index 6adfcc5674..1d1956da13 100644
--- a/swift/llm/infer/deploy.py
+++ b/swift/llm/infer/deploy.py
@@ -119,6 +119,13 @@ def _post_process(self, request_info, response, return_cmpl_response: bool = Fal
                 self.jsonl_writer.append(data)
         return response
 
+    def _set_request_config(self, request_config) -> None:
+        default_request_config = self.args.get_request_config()
+        for key, val in asdict(request_config).items():
+            default_val = getattr(default_request_config, key)
+            if default_val is not None and (val is None or isinstance(val, (list, tuple)) and len(val) == 0):
+                setattr(request_config, key, default_val)
+
     async def create_chat_completion(self,
                                      request: ChatCompletionRequest,
                                      raw_request: Request,
@@ -135,6 +142,7 @@ async def create_chat_completion(self,
             infer_kwargs['adapter_request'] = adapter_request
 
         infer_request, request_config = request.parse()
+        self._set_request_config(request_config)
         request_info = {'infer_request': infer_request.to_printable()}
 
         def pre_infer_hook(kwargs):