diff --git "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" index 6c3571cef5..b57b894a65 100644 --- "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" +++ "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" @@ -46,8 +46,8 @@ - download_mode: 数据集下载模式,包含`reuse_dataset_if_exists`和`force_redownload`,默认为reuse_dataset_if_exists - columns: 用于对数据集进行列映射,使数据集满足AutoPreprocessor可以处理的样式,具体查看[这里](../Customization/自定义数据集.md)。你可以传入json字符串,例如:`'{"text1": "query", "text2": "response"}'`,默认为None。 - strict: 如果为True,则数据集只要某行有问题直接抛错,否则会丢弃出错数据样本。默认False -- 🔥model_name: 仅用于自我认知任务,传入模型中文名和英文名,以空格分隔,例如:`--model_name 小黄 'Xiao Huang'`。默认为None -- 🔥model_author: 仅用于自我认知任务,传入模型作者的中文名和英文名,以空格分隔,例如:`--model_author '魔搭' 'ModelScope'`。默认为None +- 🔥model_name: 仅用于自我认知任务,只对`swift/self-cognition`数据集生效,替换掉数据集中的`{{NAME}}`通配符。传入模型中文名和英文名,以空格分隔,例如:`--model_name 小黄 'Xiao Huang'`。默认为None +- 🔥model_author: 仅用于自我认知任务,只对`swift/self-cognition`数据集生效,替换掉数据集中的`{{AUTHOR}}`通配符。传入模型作者的中文名和英文名,以空格分隔,例如:`--model_author '魔搭' 'ModelScope'`。默认为None - custom_dataset_info: 自定义数据集注册的json文件路径,参考[自定义数据集](../Customization/自定义数据集.md)。默认为`[]` ### 模板参数 @@ -113,6 +113,7 @@ - remove_unused_columns: 是否删除数据集中不被使用的列,默认为False - logging_first_step: 是否记录第一个step的日志,默认为True - logging_steps: 日志打印间隔,默认为5 +- predict_with_generate: 验证时使用生成式的方式,默认为False。 - metric_for_best_model: 默认为None,即当`predict_with_generate`设置为False时,设置为'loss',否则设置为'rouge-l' - greater_is_better: 默认为None,即当`metric_for_best_model`含'loss'时,设置为False,否则设置为True. @@ -330,6 +331,7 @@ RLHF参数继承于[训练参数](#训练参数) - simpo_gamma: SimPO算法中的reward margin项,论文建议设置为0.5-1.5,默认为`1.` - desirable_weight: KTO算法中对desirable response的loss权重 $\lambda_D$,默认为`1.` - undesirable_weight: KTO算法中对undesirable response的loss权重 $\lambda_U$,默认为`1.` +- loss_scale: 覆盖模板参数,默认为'last_round' #### PPO参数 - reward_model: 默认为None diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md index 708fff7bb3..59da2bf6b4 100644 --- a/docs/source_en/Instruction/Command-line-parameters.md +++ b/docs/source_en/Instruction/Command-line-parameters.md @@ -47,8 +47,8 @@ Hints: - download_mode: Dataset download mode, including `reuse_dataset_if_exists` and `force_redownload`, default is reuse_dataset_if_exists. - columns: Used for column mapping of the dataset to ensure that the dataset conforms to the format that AutoPreprocessor can handle. For more details, see [here](../Customization/Custom-dataset.md). You can pass in a JSON string, for example: `'{"text1": "query", "text2": "response"}'`, with the default being None. - strict: If set to True, any row with an issue in the dataset will throw an error immediately, otherwise, erroneous data samples will be discarded. Default is False. -- 🔥model_name: Used only for self-awareness tasks, pass in the Chinese and English names of the model, separated by a space, e.g., `--model_name Xiao Huang 'Xiao Huang'`. Default is None. -- 🔥model_author: Used only for self-awareness tasks, pass in the Chinese and English names of the model author, separated by a space, e.g., `--model_author '魔搭' 'ModelScope'`. Default is None. +- 🔥model_name: Only applicable to the self-cognition task and effective only on the `swift/self-cognition` dataset. It replaces the `{{NAME}}` placeholder in the dataset. Input the model's name in both Chinese and English, separated by a space, for example: `--model_name 小黄 'Xiao Huang'`. Default is None. +- 🔥model_author: Only applicable to the self-cognition task and effective only on the `swift/self-cognition` dataset. It replaces the `{{AUTHOR}}` placeholder in the dataset. Input the model author's name in both Chinese and English, separated by a space, for example: `--model_author '魔搭' 'ModelScope'`. Default is None. - custom_dataset_info: The path to the JSON file for custom dataset registration. Refer to [Custom Dataset](../Customization/Custom-dataset.md). Default is `[]`. @@ -117,6 +117,7 @@ This parameter list inherits from transformers `Seq2SeqTrainingArguments`, with - remove_unused_columns: Whether to remove unused columns in the dataset, defaults to False. - logging_first_step: Whether to log the first step, defaults to True. - logging_steps: Interval for logging, defaults to 5. +- predict_with_generate: Whether to use generative method during validation, default is False. - metric_for_best_model: Defaults to None, which sets it to 'loss' when `predict_with_generate` is False, otherwise sets it to 'rouge-l'. - greater_is_better: Defaults to None, which sets it to False when `metric_for_best_model` contains 'loss', otherwise sets to True. @@ -339,6 +340,7 @@ RLHF arguments inherit from the [training arguments](#training-arguments). - simpo_gamma: Reward margin term in the SimPO algorithm, with a paper-suggested setting of 0.5-1.5, default is `1.`. - desirable_weight: Loss weight $\lambda_D$ for desirable response in the KTO algorithm, default is `1.`. - undesirable_weight: Loss weight $\lambda_U$ for undesirable response in the KTO algorithm, default is `1.`. +- loss_scale: Override template arguments, default is 'last_round'. #### PPO Arguments diff --git a/examples/train/multi-node/dlc/train.sh b/examples/train/multi-node/dlc/train.sh index 182088eb26..accbe6e716 100644 --- a/examples/train/multi-node/dlc/train.sh +++ b/examples/train/multi-node/dlc/train.sh @@ -1,19 +1,24 @@ +# https://help.aliyun.com/zh/pai/user-guide/general-environment-variables NNODES=$WORLD_SIZE \ NODE_RANK=$RANK \ swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ - --train_type lora \ - --dataset 'swift/self-cognition#1000' \ + --train_type full \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \ + 'AI-ModelScope/alpaca-gpt4-data-en#20000' \ + --torch_dtype bfloat16 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ - --lora_rank 8 \ - --lora_alpha 32 \ - --learning_rate 1e-4 \ - --gradient_accumulation_steps 16 \ + --per_device_eval_batch_size 1 \ + --learning_rate 1e-5 \ + --gradient_accumulation_steps 4 \ --eval_steps 100 \ --save_steps 100 \ --save_total_limit 2 \ --logging_steps 5 \ - --deepspeed zero3 \ - --model_author swift \ - --model_name swift-robot + --max_length 8192 \ + --output_dir output \ + --system 'You are a helpful assistant.' \ + --warmup_ratio 0.05 \ + --dataloader_num_workers 4 \ + --deepspeed zero2 diff --git a/examples/train/multi-node/swift/train_node1.sh b/examples/train/multi-node/swift/train_node1.sh index 976f757ab8..f6e66edbf5 100644 --- a/examples/train/multi-node/swift/train_node1.sh +++ b/examples/train/multi-node/swift/train_node1.sh @@ -1,22 +1,30 @@ +nnodes=2 +nproc_per_node=4 + CUDA_VISIBLE_DEVICES=0,1,2,3 \ -NNODES=2 \ +NNODES=$nnodes \ NODE_RANK=0 \ MASTER_ADDR=127.0.0.1 \ -NPROC_PER_NODE=4 \ +MASTER_PORT=29500 \ +NPROC_PER_NODE=$nproc_per_node \ swift sft \ - --model Qwen/Qwen2.5-7B-Instruct \ - --train_type lora \ - --torch_dtype bfloat16 \ - --dataset 'swift/self-cognition#1000' \ - --num_train_epochs 1 \ - --lora_rank 8 \ - --lora_alpha 32 \ - --learning_rate 1e-4 \ - --gradient_accumulation_steps 16 \ - --gradient_checkpointing_kwargs '{"use_reentrant": false}' \ - --eval_steps 100 \ - --save_steps 100 \ - --save_total_limit 2 \ - --logging_steps 5 \ - --model_author swift \ - --model_name swift-robot + --model Qwen/Qwen2.5-7B-Instruct \ + --train_type full \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \ + 'AI-ModelScope/alpaca-gpt4-data-en#20000' \ + --torch_dtype bfloat16 \ + --num_train_epochs 1 \ + --per_device_train_batch_size 1 \ + --per_device_eval_batch_size 1 \ + --learning_rate 1e-5 \ + --gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \ + --eval_steps 100 \ + --save_steps 100 \ + --save_total_limit 2 \ + --logging_steps 5 \ + --max_length 8192 \ + --output_dir output \ + --system 'You are a helpful assistant.' \ + --warmup_ratio 0.05 \ + --dataloader_num_workers 4 \ + --deepspeed zero2 diff --git a/examples/train/multi-node/swift/train_node2.sh b/examples/train/multi-node/swift/train_node2.sh index 22e4eee12a..e2f5e5e9cf 100644 --- a/examples/train/multi-node/swift/train_node2.sh +++ b/examples/train/multi-node/swift/train_node2.sh @@ -1,22 +1,30 @@ +nnodes=2 +nproc_per_node=4 + CUDA_VISIBLE_DEVICES=0,1,2,3 \ -NNODES=2 \ +NNODES=$nnodes \ NODE_RANK=1 \ MASTER_ADDR=xxx.xxx.xxx.xxx \ -NPROC_PER_NODE=4 \ +MASTER_PORT=29500 \ +NPROC_PER_NODE=$nproc_per_node \ swift sft \ - --model Qwen/Qwen2.5-7B-Instruct \ - --train_type lora \ - --torch_dtype bfloat16 \ - --dataset 'swift/self-cognition#1000' \ - --num_train_epochs 1 \ - --lora_rank 8 \ - --lora_alpha 32 \ - --learning_rate 1e-4 \ - --gradient_accumulation_steps 16 \ - --gradient_checkpointing_kwargs '{"use_reentrant": false}' \ - --eval_steps 100 \ - --save_steps 100 \ - --save_total_limit 2 \ - --logging_steps 5 \ - --model_author swift \ - --model_name swift-robot + --model Qwen/Qwen2.5-7B-Instruct \ + --train_type full \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \ + 'AI-ModelScope/alpaca-gpt4-data-en#20000' \ + --torch_dtype bfloat16 \ + --num_train_epochs 1 \ + --per_device_train_batch_size 1 \ + --per_device_eval_batch_size 1 \ + --learning_rate 1e-5 \ + --gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \ + --eval_steps 100 \ + --save_steps 100 \ + --save_total_limit 2 \ + --logging_steps 5 \ + --max_length 8192 \ + --output_dir output \ + --system 'You are a helpful assistant.' \ + --warmup_ratio 0.05 \ + --dataloader_num_workers 4 \ + --deepspeed zero2 diff --git a/examples/train/multi-node/torchrun/train_node1.sh b/examples/train/multi-node/torchrun/train_node1.sh index 0072ca47cf..28a7b72fc2 100644 --- a/examples/train/multi-node/torchrun/train_node1.sh +++ b/examples/train/multi-node/torchrun/train_node1.sh @@ -1,19 +1,31 @@ +nnodes=2 +nproc_per_node=4 + CUDA_VISIBLE_DEVICES=0,1,2,3 \ -torchrun --master_port 29500 --nproc_per_node=4 --nnodes=2 --node_rank=0 --master_addr=127.0.0.1 \ +torchrun \ + --master_port 29500 \ + --nproc_per_node=$nproc_per_node \ + --nnodes=$nnodes \ + --node_rank=0 \ + --master_addr=127.0.0.1 \ swift/cli/sft.py \ --model Qwen/Qwen2.5-7B-Instruct \ - --train_type lora \ + --train_type full \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \ + 'AI-ModelScope/alpaca-gpt4-data-en#20000' \ --torch_dtype bfloat16 \ - --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ - --lora_rank 8 \ - --lora_alpha 32 \ - --learning_rate 1e-4 \ - --gradient_accumulation_steps 16 \ - --gradient_checkpointing_kwargs '{"use_reentrant": false}' \ + --per_device_train_batch_size 1 \ + --per_device_eval_batch_size 1 \ + --learning_rate 1e-5 \ + --gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \ --eval_steps 100 \ --save_steps 100 \ --save_total_limit 2 \ --logging_steps 5 \ - --model_author swift \ - --model_name swift-robot + --max_length 8192 \ + --output_dir output \ + --system 'You are a helpful assistant.' \ + --warmup_ratio 0.05 \ + --dataloader_num_workers 4 \ + --deepspeed zero2 diff --git a/examples/train/multi-node/torchrun/train_node2.sh b/examples/train/multi-node/torchrun/train_node2.sh index 0de8699c3f..fcd66919e4 100644 --- a/examples/train/multi-node/torchrun/train_node2.sh +++ b/examples/train/multi-node/torchrun/train_node2.sh @@ -1,19 +1,31 @@ +nnodes=2 +nproc_per_node=4 + CUDA_VISIBLE_DEVICES=0,1,2,3 \ -torchrun --master_port 29500 --nproc_per_node=4 --nnodes=2 --node_rank=1 --master_addr=xxx.xxx.xxx.xxx \ +torchrun \ + --master_port 29500 \ + --nproc_per_node=$nproc_per_node \ + --nnodes=$nnodes \ + --node_rank=1 \ + --master_addr=xxx.xxx.xxx.xxx \ swift/cli/sft.py \ --model Qwen/Qwen2.5-7B-Instruct \ - --train_type lora \ + --train_type full \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \ + 'AI-ModelScope/alpaca-gpt4-data-en#20000' \ --torch_dtype bfloat16 \ - --dataset 'swift/self-cognition#1000' \ --num_train_epochs 1 \ - --lora_rank 8 \ - --lora_alpha 32 \ - --learning_rate 1e-4 \ - --gradient_accumulation_steps 16 \ - --gradient_checkpointing_kwargs '{"use_reentrant": false}' \ + --per_device_train_batch_size 1 \ + --per_device_eval_batch_size 1 \ + --learning_rate 1e-5 \ + --gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \ --eval_steps 100 \ --save_steps 100 \ --save_total_limit 2 \ --logging_steps 5 \ - --model_author swift \ - --model_name swift-robot + --max_length 8192 \ + --output_dir output \ + --system 'You are a helpful assistant.' \ + --warmup_ratio 0.05 \ + --dataloader_num_workers 4 \ + --deepspeed zero2 diff --git a/examples/train/predict_with_generate/train.sh b/examples/train/predict_with_generate/train.sh new file mode 100644 index 0000000000..b989954868 --- /dev/null +++ b/examples/train/predict_with_generate/train.sh @@ -0,0 +1,28 @@ +# 20GiB +CUDA_VISIBLE_DEVICES=0 \ +MAX_PIXELS=1003520 \ +swift sft \ + --model Qwen/Qwen2.5-VL-7B-Instruct \ + --dataset 'AI-ModelScope/LaTeX_OCR:human_handwrite#20000' \ + --train_type lora \ + --torch_dtype bfloat16 \ + --num_train_epochs 1 \ + --per_device_train_batch_size 1 \ + --per_device_eval_batch_size 2 \ + --learning_rate 1e-4 \ + --lora_rank 8 \ + --lora_alpha 32 \ + --target_modules all-linear \ + --freeze_vit true \ + --gradient_accumulation_steps 16 \ + --eval_steps 100 \ + --save_steps 100 \ + --save_total_limit 5 \ + --logging_steps 5 \ + --max_length 2048 \ + --output_dir output \ + --warmup_ratio 0.05 \ + --dataloader_num_workers 4 \ + --predict_with_generate true \ + --metric_for_best_model rouge-l \ + --greater_is_better true diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py index 68d4a574b7..6d0682ca5b 100644 --- a/swift/llm/argument/rlhf_args.py +++ b/swift/llm/argument/rlhf_args.py @@ -68,8 +68,7 @@ class RLHFArguments(PPOArguments, TrainArguments): desirable_weight: float = 1.0 undesirable_weight: float = 1.0 - # Use last_round by default - loss_scale: str = 'last_round' + loss_scale: Optional[str] = None def __post_init__(self): self._init_rm() @@ -78,6 +77,13 @@ def __post_init__(self): super().__post_init__() self._init_ppo() + if self.loss_scale is None: + if self.rlhf_type == 'orpo' and not self.model_meta.is_multimodal: + # Avoid padding labels during the model's forward pass in multimodal models. + # Some multimodal models do not expand the image pad token. + self.loss_scale = 'default' + else: + self.loss_scale = 'last_round' if self.rlhf_type in ['dpo', 'kto', 'ppo'] and self.train_type == 'full': self.ref_model = self.ref_model or self.model self.ref_model_type = self.ref_model_type or self.model_type diff --git a/swift/llm/model/utils.py b/swift/llm/model/utils.py index f9b515cea3..2eceef8c0c 100644 --- a/swift/llm/model/utils.py +++ b/swift/llm/model/utils.py @@ -207,6 +207,8 @@ def _get_arch_mapping(): res = {} for model_type, model_meta in MODEL_MAPPING.items(): architectures = model_meta.architectures + if not architectures: + architectures.append('null') for arch in architectures: if arch not in res: res[arch] = [] @@ -216,7 +218,7 @@ def _get_arch_mapping(): @staticmethod def get_matched_model_types(config: Union[PretrainedConfig, Dict[str, Any]]) -> List[str]: """Get possible model_type.""" - arch = HfConfigFactory.get_config_attr(config, 'architectures') + arch = HfConfigFactory.get_config_attr(config, 'architectures') or ['null'] if arch: arch = arch[0] arch_mapping = HfConfigFactory._get_arch_mapping() diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py index cb88c6c88f..b12cba3e20 100644 --- a/swift/llm/template/base.py +++ b/swift/llm/template/base.py @@ -171,6 +171,8 @@ def _preprocess_inputs( self, inputs: StdTemplateInputs, ) -> None: + if self.model_meta.is_multimodal: + self._replace_image_tags(inputs) images = inputs.images load_images = self.load_images or self.mode in {'vllm', 'lmdeploy'} load_images_origin = load_images @@ -197,8 +199,6 @@ def _preprocess_inputs( 'The template does not support multi-round chat. Only use the last round of the conversation.') inputs.messages = inputs.messages[-2:] - if self.model_meta.is_multimodal: - self._replace_image_tags(inputs) if inputs.is_multimodal: self._add_default_tags(inputs) diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py index ea4d9b4e56..17d66982c2 100644 --- a/swift/llm/train/rlhf.py +++ b/swift/llm/train/rlhf.py @@ -63,11 +63,6 @@ def _prepare_template(self) -> None: model_mapping = {'kto': 'kto', 'ppo': 'pt'} self.template.set_mode(model_mapping.get(args.rlhf_type, 'rlhf')) - if args.rlhf_type == 'orpo' and not args.model_meta.is_multimodal: - # Avoid padding labels during the model's forward pass in multimodal models. - args.loss_scale = 'default' - self.template.loss_scale = args.loss_scale - if args.rlhf_type == 'ppo': args.training_args.stop_token_id = self.template.template_meta.stop_token_id