-
Notifications
You must be signed in to change notification settings - Fork 909
Description
CUDA_VISIBLE_DEVICES=4,5,6 swift infer \
--model ./models/Qwen/Qwen3-8B\
--adapters ./train/weights/v0-20250929-144753/checkpoint-937 \
--merge_lora true \
--infer_backend pt \
--max_new_tokens 4096 \
--result_path lunwen_res.jsonl \
--val_dataset lunwen.json \
--temperature 0.7 \
--max_length 131072 \
--max_batch_size 1 \
报错
max_model_len(40960) - num_tokens(58124) < max_tokens(4096). Setting max_tokens: -17164
Traceback (most recent call last):
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/cli/infer.py", line 5, in
infer_main()
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer.py", line 291, in infer_main
return SwiftInfer(args).main()
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/base.py", line 49, in main
result = self.run()
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer.py", line 91, in run
result = self.infer_dataset()
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer.py", line 247, in infer_dataset
result_list += self._batch_infer(shard_dataset, request_config)
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer.py", line 278, in _batch_infer
resp_list = self.infer(val_dataset, request_config, template=self.template, use_tqdm=True, **self.infer_kwargs)
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer_engine/pt_engine.py", line 562, in infer
res += self._infer(
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer_engine/pt_engine.py", line 494, in _infer
generation_config = self._prepare_generation_config(request_config)
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer_engine/pt_engine.py", line 158, in _prepare_generation_config
generation_config = prepare_generation_config(self.generation_config, request_config, self.tokenizer)
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer_engine/utils.py", line 169, in prepare_generation_config
generation_config = GenerationConfig(**kwargs)
File "x/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/transformers/generation/configuration_utils.py", line 463, in init
self.validate()
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/transformers/generation/configuration_utils.py", line 562, in validate
raise ValueError(f"max_new_tokens
must be greater than 0, but is {self.max_new_tokens}.")
ValueError: max_new_tokens
must be greater than 0, but is -17164.