-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Closed
Description
Describe the bug
What the bug is, and how to reproduce, better with screenshots(描述bug以及复现过程,最好有截图)
- env
docker run -it --gpus all --name ms --shm-size=10G -v /home/server/temp/distill:/home modelscope-registry.cn-hangzhou.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-py311-torch2.3.1-1.24.0 bash
- command
CUDA_VISIBLE_DEVICES=4,5,6,7 swift sft --model /home/Qwen2.5-1.5B-Instruct --train_type full --dataset /home/OpenR1-Math-220k --torch_dtype bfloat16 --num_train_epochs 1 --per_device_train_batch_size 4 --learning_rate 1e-5 --gradient_accumulation_steps 1 --save_steps 500 --save_total_limit 1 --logging_steps 5 --max_length 2048 --output_dir output --warmup_ratio 0.05 --dataloader_num_workers 4 --split_dataset_ratio 0 --deepspeed zero3 --use_liger
- error log
The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.
0it [00:00, ?it/s]
run sh: `/usr/local/bin/python /usr/local/lib/python3.11/site-packages/swift/cli/sft.py --model /home/Qwen2.5-1.5B-Instruct --train_type full --dataset /home/OpenR1-Math-220k --torch_dtype bfloat16 --num_train_epochs 1 --per_device_train_batch_size 4 --learning_rate 1e-5 --gradient_accumulation_steps 1 --save_steps 500 --save_total_limit 1 --logging_steps 5 --max_length 2048 --output_dir output --warmup_ratio 0.05 --dataloader_num_workers 4 --split_dataset_ratio 0 --deepspeed zero3 --use_liger`
Traceback (most recent call last):
File "/usr/local/lib/python3.11/site-packages/transformers/utils/import_utils.py", line 1817, in _get_module
return importlib.import_module("." + module_name, self.__name__)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<frozen importlib._bootstrap>", line 1204, in _gcd_import
File "<frozen importlib._bootstrap>", line 1176, in _find_and_load
File "<frozen importlib._bootstrap>", line 1147, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 690, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 940, in exec_module
File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
File "/usr/local/lib/python3.11/site-packages/transformers/trainer.py", line 192, in <module>
from apex import amp
File "/usr/local/lib/python3.11/site-packages/apex/__init__.py", line 13, in <module>
from pyramid.session import UnencryptedCookieSessionFactoryConfig
ImportError: cannot import name 'UnencryptedCookieSessionFactoryConfig' from 'pyramid.session' (unknown location)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.11/site-packages/swift/cli/sft.py", line 2, in <module>
from swift.llm import sft_main
File "<frozen importlib._bootstrap>", line 1229, in _handle_fromlist
File "/usr/local/lib/python3.11/site-packages/swift/utils/import_utils.py", line 90, in __getattr__
module = self._get_module(self._class_to_module[name])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/swift/utils/import_utils.py", line 99, in _get_module
return importlib.import_module('.' + module_name, self.__name__)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/swift/llm/train/__init__.py", line 2, in <module>
from .pt import SwiftPt, pt_main
File "/usr/local/lib/python3.11/site-packages/swift/llm/train/pt.py", line 4, in <module>
from ..argument import TrainArguments
File "/usr/local/lib/python3.11/site-packages/swift/llm/argument/__init__.py", line 2, in <module>
from .app_args import AppArguments
File "/usr/local/lib/python3.11/site-packages/swift/llm/argument/app_args.py", line 6, in <module>
from ..model import get_matched_model_meta
File "/usr/local/lib/python3.11/site-packages/swift/llm/model/__init__.py", line 2, in <module>
from . import model
File "/usr/local/lib/python3.11/site-packages/swift/llm/model/model/__init__.py", line 1, in <module>
from . import (baai, baichuan, bert, codefuse, deepseek, gemma, glm, internlm, llama, llava, llm, mamba, microsoft,
File "/usr/local/lib/python3.11/site-packages/swift/llm/model/model/baai.py", line 12, in <module>
from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_with_flash_attn, register_model
File "/usr/local/lib/python3.11/site-packages/swift/llm/model/register.py", line 22, in <module>
from .patcher import (patch_automodel, patch_automodel_for_sequence_classification, patch_get_dynamic_module,
File "/usr/local/lib/python3.11/site-packages/swift/llm/model/patcher.py", line 15, in <module>
from transformers import PreTrainedModel, dynamic_module_utils, trainer
File "<frozen importlib._bootstrap>", line 1229, in _handle_fromlist
File "/usr/local/lib/python3.11/site-packages/transformers/utils/import_utils.py", line 1808, in __getattr__
value = self._get_module(name)
^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/transformers/utils/import_utils.py", line 1819, in _get_module
raise RuntimeError(
RuntimeError: Failed to import transformers.trainer because of the following error (look up to see its traceback):
cannot import name 'UnencryptedCookieSessionFactoryConfig' from 'pyramid.session' (unknown location)
Metadata
Metadata
Assignees
Labels
No labels