Skip to content

Problem on Qwen3-VL lora SFT #6247

@huaiyizhao

Description

@huaiyizhao

[rank1]: Traceback (most recent call last):
[rank1]: File "/app/ms-swift/swift/cli/sft.py", line 10, in
[rank1]: sft_main()
[rank1]: File "/app/ms-swift/swift/llm/train/sft.py", line 340, in sft_main
[rank1]: return SwiftSft(args).main()
[rank1]: ^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/app/ms-swift/swift/llm/base.py", line 49, in main
[rank1]: result = self.run()
[rank1]: ^^^^^^^^^^
[rank1]: File "/app/ms-swift/swift/llm/train/sft.py", line 178, in run
[rank1]: self.model = self.prepare_model(self.args, self.model, template=self.template, train_dataset=train_dataset)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/app/ms-swift/swift/llm/train/tuner.py", line 350, in prepare_model
[rank1]: model = prepare_adapter(
[rank1]: ^^^^^^^^^^^^^^^^
[rank1]: File "/app/ms-swift/swift/llm/train/tuner.py", line 204, in prepare_adapter
[rank1]: model = Swift.prepare_model(model, lora_config)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/app/ms-swift/swift/tuners/base.py", line 716, in prepare_model
[rank1]: return get_peft_model(model, config, **kwargs)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/lib/python3.11/site-packages/peft/mapping_func.py", line 125, in get_peft_model
[rank1]: return MODEL_TYPE_TO_PEFT_MODEL_MAPPING[peft_config.task_type](
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/lib/python3.11/site-packages/peft/peft_model.py", line 1815, in init
[rank1]: super().init(model, peft_config, adapter_name, **kwargs)
[rank1]: File "/usr/local/lib/python3.11/site-packages/peft/peft_model.py", line 130, in init
[rank1]: self.base_model = cls(model, {adapter_name: peft_config}, adapter_name)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/app/ms-swift/swift/tuners/peft.py", line 304, in new_init
[rank1]: self.init_origin(model, config, adapter_name)
[rank1]: File "/usr/local/lib/python3.11/site-packages/peft/tuners/tuners_utils.py", line 209, in init
[rank1]: self.inject_adapter(self.model, adapter_name, low_cpu_mem_usage=low_cpu_mem_usage, state_dict=state_dict)
[rank1]: File "/usr/local/lib/python3.11/site-packages/peft/tuners/tuners_utils.py", line 578, in inject_adapter
[rank1]: self._create_and_replace(
[rank1]: File "/app/ms-swift/swift/tuners/peft.py", line 103, in _create_and_replace_hook
[rank1]: return self._create_and_replace_origin(peft_config, adapter_name, target, *args, **kwargs)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/lib/python3.11/site-packages/peft/tuners/lora/model.py", line 259, in _create_and_replace
[rank1]: new_module = self._create_new_module(lora_config, adapter_name, target, device_map=device_map, **kwargs)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/lib/python3.11/site-packages/peft/tuners/lora/model.py", line 365, in _create_new_module
[rank1]: new_module = dispatcher(target, adapter_name, lora_config=lora_config, **kwargs)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/usr/local/lib/python3.11/site-packages/peft/tuners/lora/awq.py", line 105, in dispatch_awq
[rank1]: from awq.modules.linear import WQLinear_GEMM
[rank1]: File "/usr/local/lib/python3.11/site-packages/awq/init.py", line 24, in
[rank1]: from awq.models.auto import AutoAWQForCausalLM
[rank1]: File "/usr/local/lib/python3.11/site-packages/awq/models/init.py", line 1, in
[rank1]: from .mpt import MptAWQForCausalLM
[rank1]: File "/usr/local/lib/python3.11/site-packages/awq/models/mpt.py", line 1, in
[rank1]: from .base import BaseAWQForCausalLM
[rank1]: File "/usr/local/lib/python3.11/site-packages/awq/models/base.py", line 49, in
[rank1]: from awq.quantize.quantizer import AwqQuantizer
[rank1]: File "/usr/local/lib/python3.11/site-packages/awq/quantize/quantizer.py", line 11, in
[rank1]: from awq.quantize.scale import apply_scale, apply_clip
[rank1]: File "/usr/local/lib/python3.11/site-packages/awq/quantize/scale.py", line 12, in
[rank1]: from transformers.activations import NewGELUActivation, PytorchGELUTanh, GELUActivation
[rank1]: ImportError: cannot import name 'PytorchGELUTanh' from 'transformers.activations' (/usr/local/lib/python3.11/site-packages/transformers/activations.py)

Uninstall autoawq works for me. Maybe we need a more specific environment requirements for training Qwen3-VL or a docker image.
hiyouga/LLaMA-Factory#9247

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions