Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion examples/deploy/server/demo.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
CUDA_VISIBLE_DEVICES=0 swift deploy \
--model Qwen/Qwen2.5-7B-Instruct \
--infer_backend vllm
--infer_backend vllm \
--served_model_name Qwen2.5-7B-Instruct

# After the server-side deployment above is successful, use the command below to perform a client call test.

Expand Down
2 changes: 1 addition & 1 deletion examples/eval/vlm/eval.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ swift eval \
--model Qwen/Qwen2-VL-2B-Instruct \
--infer_backend pt \
--eval_limit 100 \
--eval_dataset MME
--eval_dataset realWorldQA
1 change: 1 addition & 0 deletions examples/train/tuners/galore/train_qgalore.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# 35GiB
# pip install bitsandbytes==0.40.0
CUDA_VISIBLE_DEVICES=0 \
swift sft \
--model Qwen/Qwen2.5-7B-Instruct \
Expand Down
2 changes: 1 addition & 1 deletion examples/train/tuners/unsloth/train.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# 9GiB
# 17GiB
CUDA_VISIBLE_DEVICES=0 \
swift sft \
--model Qwen/Qwen2.5-7B-Instruct \
Expand Down
8 changes: 8 additions & 0 deletions swift/llm/infer/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,13 @@ def _post_process(self, request_info, response, return_cmpl_response: bool = Fal
self.jsonl_writer.append(data)
return response

def _set_request_config(self, request_config) -> None:
default_request_config = self.args.get_request_config()
for key, val in asdict(request_config).items():
default_val = getattr(default_request_config, key)
if default_val is not None and (val is None or isinstance(val, (list, tuple)) and len(val) == 0):
setattr(request_config, key, default_val)

async def create_chat_completion(self,
request: ChatCompletionRequest,
raw_request: Request,
Expand All @@ -135,6 +142,7 @@ async def create_chat_completion(self,
infer_kwargs['adapter_request'] = adapter_request

infer_request, request_config = request.parse()
self._set_request_config(request_config)
request_info = {'infer_request': infer_request.to_printable()}

def pre_infer_hook(kwargs):
Expand Down
Loading