Skip to content

max_model_len(40960) - num_tokens(58124) < max_tokens(4096) #6021

@mxw20010804

Description

@mxw20010804
CUDA_VISIBLE_DEVICES=4,5,6 swift infer \
    --model ./models/Qwen/Qwen3-8B\
    --adapters ./train/weights/v0-20250929-144753/checkpoint-937 \
    --merge_lora true \
    --infer_backend pt \
    --max_new_tokens 4096 \
    --result_path lunwen_res.jsonl \
    --val_dataset lunwen.json \
    --temperature 0.7 \
    --max_length 131072 \
    --max_batch_size 1 \

报错
max_model_len(40960) - num_tokens(58124) < max_tokens(4096). Setting max_tokens: -17164
Traceback (most recent call last):
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/cli/infer.py", line 5, in
infer_main()
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer.py", line 291, in infer_main
return SwiftInfer(args).main()
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/base.py", line 49, in main
result = self.run()
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer.py", line 91, in run
result = self.infer_dataset()
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer.py", line 247, in infer_dataset
result_list += self._batch_infer(shard_dataset, request_config)
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer.py", line 278, in _batch_infer
resp_list = self.infer(val_dataset, request_config, template=self.template, use_tqdm=True, **self.infer_kwargs)
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer_engine/pt_engine.py", line 562, in infer
res += self._infer(
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer_engine/pt_engine.py", line 494, in _infer
generation_config = self._prepare_generation_config(request_config)
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer_engine/pt_engine.py", line 158, in _prepare_generation_config
generation_config = prepare_generation_config(self.generation_config, request_config, self.tokenizer)
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/swift/llm/infer/infer_engine/utils.py", line 169, in prepare_generation_config
generation_config = GenerationConfig(**kwargs)
File "x/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/transformers/generation/configuration_utils.py", line 463, in init
self.validate()
File "xx/anaconda3/envs/swift_qwen/lib/python3.10/site-packages/transformers/generation/configuration_utils.py", line 562, in validate
raise ValueError(f"max_new_tokens must be greater than 0, but is {self.max_new_tokens}.")
ValueError: max_new_tokens must be greater than 0, but is -17164.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions