Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .dev_scripts/ci_container_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then

pip install -r requirements/framework.txt -U -i https://mirrors.aliyun.com/pypi/simple/
pip install diffusers decord einops -U -i https://mirrors.aliyun.com/pypi/simple/
pip install autoawq!=0.2.7.post3 -U --no-deps

# test with install
pip install .
Expand Down
4 changes: 2 additions & 2 deletions docs/source/Instruction/命令行参数.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@
- 🔥gradient_accumulation_steps: 梯度累加,默认为1
- 🔥save_strategy: 保存模型的策略, 可选为'no', 'steps', 'epoch', 默认为'steps'
- 🔥save_steps: 默认为500
- 🔥save_total_limit: 默认为None, 保存所有的checkpoint
- 🔥eval_strategy: 评估策略, 跟随`save_strategy`策略
- 🔥eval_strategy: 默认为None, 评估策略, 跟随`save_strategy`的策略
- 🔥eval_steps: 默认为None, 如果存在评估数据集, 则跟随`save_steps`的策略
- 🔥save_total_limit: 默认为None, 保存所有的checkpoint
- max_steps: 默认为-1,最大训练的steps数,在数据集为流式时需要设置
- 🔥warmup_ratio: 默认为0.
- save_on_each_node: 默认为False. 在多机训练时需要被考虑
Expand Down
4 changes: 2 additions & 2 deletions docs/source_en/Instruction/Command-line-parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ Other important parameters:
- 🔥gradient_accumulation_steps: Gradient accumulation, default is 1.
- 🔥save_strategy: Strategy for saving the model, options are 'no', 'steps', 'epoch', default is 'steps'.
- 🔥save_steps: Default is 500.
- 🔥save_total_limit: Default is None, saving all checkpoints.
- 🔥eval_strategy: Evaluation strategy, follows `save_strategy`.
- 🔥eval_strategy: Default is None. Evaluation strategy, follows `save_strategy`.
- 🔥eval_steps: Default is None. If evaluation dataset exists, follows `save_steps`.
- 🔥save_total_limit: Default is None, saving all checkpoints.
- max_steps: Default is -1, maximum number of training steps. Must be set when the dataset is streaming.
- 🔥warmup_ratio: Default is 0.
- save_on_each_node: Default is False. To be considered in multi-machine training.
Expand Down
2 changes: 1 addition & 1 deletion requirements/framework.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ sentencepiece
tensorboard
tiktoken
tqdm
transformers>=4.33,<4.47
transformers>=4.33,<4.48
transformers_stream_generator
trl>=0.11,<0.12
uvicorn
23 changes: 13 additions & 10 deletions swift/llm/argument/train_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,24 @@ class Seq2SeqTrainingOverrideArguments(Seq2SeqTrainingArguments):
report_to: List[str] = field(default_factory=lambda: ['tensorboard'])
remove_unused_columns: bool = False
logging_first_step: bool = True
eval_strategy: Optional[str] = None # steps, epoch

def _init_output_dir(self):
if self.output_dir is not None:
return
self.output_dir = f'output/{self.model_suffix}'

def _init_eval_strategy(self):
if self.eval_strategy is None:
self.eval_strategy = self.save_strategy
if self.eval_strategy == 'no':
self.eval_steps = None
self.split_dataset_ratio = 0.
logger.info(f'Setting args.split_dataset_ratio: {self.split_dataset_ratio}')
elif self.eval_strategy == 'steps' and self.eval_steps is None:
self.eval_steps = self.save_steps
self.evaluation_strategy = self.eval_strategy

def __post_init__(self):
self._init_output_dir()
if self.metric_for_best_model is None:
Expand All @@ -56,16 +68,7 @@ def __post_init__(self):
self.lr_scheduler_kwargs = self.parse_to_dict(self.lr_scheduler_kwargs)
if getattr(self, 'gradient_checkpointing_kwargs', None):
self.gradient_checkpointing_kwargs = self.parse_to_dict(self.gradient_checkpointing_kwargs)

if len(self.val_dataset) == 0 and self.split_dataset_ratio == 0:
self.evaluation_strategy = IntervalStrategy.NO
self.eval_strategy = IntervalStrategy.NO
self.eval_steps = None
else:
self.evaluation_strategy = self.save_strategy
self.eval_strategy = self.save_strategy
if self.eval_steps is None:
self.eval_steps = self.save_steps
self._init_eval_strategy()


@dataclass
Expand Down
34 changes: 33 additions & 1 deletion tests/train/test_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,36 @@ def test_emu3_gen():
infer_main(args)


def test_eval_strategy():
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='qwen/Qwen2-7B-Instruct',
eval_strategy='no',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))


def test_epoch():
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments

train_kwargs = kwargs.copy()
train_kwargs['num_train_epochs'] = 3
# train_kwargs['save_steps'] = 2 # not use
sft_main(
TrainArguments(
model='qwen/Qwen2-7B-Instruct',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#50', 'AI-ModelScope/alpaca-gpt4-data-en#50'],
save_strategy='epoch',
**train_kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))


if __name__ == '__main__':
# test_llm_ddp()
# test_mllm_mp()
Expand All @@ -325,5 +355,7 @@ def test_emu3_gen():
# test_template()
# test_qwen_vl()
# test_qwen2_audio()
test_emu3_gen()
# test_emu3_gen()
# test_unsloth()
# test_eval_strategy()
test_epoch()
Loading