diff --git "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md"
index 6c3571cef5..b57b894a65 100644
--- "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md"
+++ "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md"
@@ -46,8 +46,8 @@
 - download_mode: 数据集下载模式，包含`reuse_dataset_if_exists`和`force_redownload`，默认为reuse_dataset_if_exists
 - columns: 用于对数据集进行列映射，使数据集满足AutoPreprocessor可以处理的样式，具体查看[这里](../Customization/自定义数据集.md)。你可以传入json字符串，例如：`'{"text1": "query", "text2": "response"}'`，默认为None。
 - strict: 如果为True，则数据集只要某行有问题直接抛错，否则会丢弃出错数据样本。默认False
-- 🔥model_name: 仅用于自我认知任务，传入模型中文名和英文名，以空格分隔，例如：`--model_name 小黄 'Xiao Huang'`。默认为None
-- 🔥model_author: 仅用于自我认知任务，传入模型作者的中文名和英文名，以空格分隔，例如：`--model_author '魔搭' 'ModelScope'`。默认为None
+- 🔥model_name: 仅用于自我认知任务，只对`swift/self-cognition`数据集生效，替换掉数据集中的`{{NAME}}`通配符。传入模型中文名和英文名，以空格分隔，例如：`--model_name 小黄 'Xiao Huang'`。默认为None
+- 🔥model_author: 仅用于自我认知任务，只对`swift/self-cognition`数据集生效，替换掉数据集中的`{{AUTHOR}}`通配符。传入模型作者的中文名和英文名，以空格分隔，例如：`--model_author '魔搭' 'ModelScope'`。默认为None
 - custom_dataset_info: 自定义数据集注册的json文件路径，参考[自定义数据集](../Customization/自定义数据集.md)。默认为`[]`
 
 ### 模板参数
@@ -113,6 +113,7 @@
 - remove_unused_columns: 是否删除数据集中不被使用的列，默认为False
 - logging_first_step: 是否记录第一个step的日志，默认为True
 - logging_steps: 日志打印间隔，默认为5
+- predict_with_generate: 验证时使用生成式的方式，默认为False。
 - metric_for_best_model: 默认为None，即当`predict_with_generate`设置为False时，设置为'loss'，否则设置为'rouge-l'
 - greater_is_better: 默认为None，即当`metric_for_best_model`含'loss'时，设置为False，否则设置为True.
 
@@ -330,6 +331,7 @@ RLHF参数继承于[训练参数](#训练参数)
 - simpo_gamma: SimPO算法中的reward margin项，论文建议设置为0.5-1.5，默认为`1.`
 - desirable_weight: KTO算法中对desirable response的loss权重 $\lambda_D$，默认为`1.`
 - undesirable_weight: KTO算法中对undesirable response的loss权重 $\lambda_U$，默认为`1.`
+- loss_scale: 覆盖模板参数，默认为'last_round'
 
 #### PPO参数
 - reward_model: 默认为None
diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md
index 708fff7bb3..59da2bf6b4 100644
--- a/docs/source_en/Instruction/Command-line-parameters.md
+++ b/docs/source_en/Instruction/Command-line-parameters.md
@@ -47,8 +47,8 @@ Hints:
 - download_mode: Dataset download mode, including `reuse_dataset_if_exists` and `force_redownload`, default is reuse_dataset_if_exists.
 - columns: Used for column mapping of the dataset to ensure that the dataset conforms to the format that AutoPreprocessor can handle. For more details, see [here](../Customization/Custom-dataset.md). You can pass in a JSON string, for example: `'{"text1": "query", "text2": "response"}'`, with the default being None.
 - strict: If set to True, any row with an issue in the dataset will throw an error immediately, otherwise, erroneous data samples will be discarded. Default is False.
-- 🔥model_name: Used only for self-awareness tasks, pass in the Chinese and English names of the model, separated by a space, e.g., `--model_name Xiao Huang 'Xiao Huang'`. Default is None.
-- 🔥model_author: Used only for self-awareness tasks, pass in the Chinese and English names of the model author, separated by a space, e.g., `--model_author '魔搭' 'ModelScope'`. Default is None.
+- 🔥model_name: Only applicable to the self-cognition task and effective only on the `swift/self-cognition` dataset. It replaces the `{{NAME}}` placeholder in the dataset. Input the model's name in both Chinese and English, separated by a space, for example: `--model_name 小黄 'Xiao Huang'`. Default is None.
+- 🔥model_author: Only applicable to the self-cognition task and effective only on the `swift/self-cognition` dataset. It replaces the `{{AUTHOR}}` placeholder in the dataset. Input the model author's name in both Chinese and English, separated by a space, for example: `--model_author '魔搭' 'ModelScope'`. Default is None.
 - custom_dataset_info: The path to the JSON file for custom dataset registration. Refer to [Custom Dataset](../Customization/Custom-dataset.md). Default is `[]`.
 
 
@@ -117,6 +117,7 @@ This parameter list inherits from transformers `Seq2SeqTrainingArguments`, with
 - remove_unused_columns: Whether to remove unused columns in the dataset, defaults to False.
 - logging_first_step: Whether to log the first step, defaults to True.
 - logging_steps: Interval for logging, defaults to 5.
+- predict_with_generate: Whether to use generative method during validation, default is False.
 - metric_for_best_model: Defaults to None, which sets it to 'loss' when `predict_with_generate` is False, otherwise sets it to 'rouge-l'.
 - greater_is_better: Defaults to None, which sets it to False when `metric_for_best_model` contains 'loss', otherwise sets to True.
 
@@ -339,6 +340,7 @@ RLHF arguments inherit from the [training arguments](#training-arguments).
 - simpo_gamma: Reward margin term in the SimPO algorithm, with a paper-suggested setting of 0.5-1.5, default is `1.`.
 - desirable_weight: Loss weight $\lambda_D$ for desirable response in the KTO algorithm, default is `1.`.
 - undesirable_weight: Loss weight $\lambda_U$ for undesirable response in the KTO algorithm, default is `1.`.
+- loss_scale: Override template arguments, default is 'last_round'.
 
 #### PPO Arguments
 
diff --git a/examples/train/multi-node/dlc/train.sh b/examples/train/multi-node/dlc/train.sh
index 182088eb26..accbe6e716 100644
--- a/examples/train/multi-node/dlc/train.sh
+++ b/examples/train/multi-node/dlc/train.sh
@@ -1,19 +1,24 @@
+# https://help.aliyun.com/zh/pai/user-guide/general-environment-variables
 NNODES=$WORLD_SIZE \
 NODE_RANK=$RANK \
 swift sft \
     --model Qwen/Qwen2.5-7B-Instruct \
-    --train_type lora \
-    --dataset 'swift/self-cognition#1000' \
+    --train_type full \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \
+              'AI-ModelScope/alpaca-gpt4-data-en#20000' \
+    --torch_dtype bfloat16 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
-    --lora_rank 8 \
-    --lora_alpha 32 \
-    --learning_rate 1e-4 \
-    --gradient_accumulation_steps 16 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 4 \
     --eval_steps 100 \
     --save_steps 100 \
     --save_total_limit 2 \
     --logging_steps 5 \
-    --deepspeed zero3 \
-    --model_author swift \
-    --model_name swift-robot
+    --max_length 8192 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --deepspeed zero2
diff --git a/examples/train/multi-node/swift/train_node1.sh b/examples/train/multi-node/swift/train_node1.sh
index 976f757ab8..f6e66edbf5 100644
--- a/examples/train/multi-node/swift/train_node1.sh
+++ b/examples/train/multi-node/swift/train_node1.sh
@@ -1,22 +1,30 @@
+nnodes=2
+nproc_per_node=4
+
 CUDA_VISIBLE_DEVICES=0,1,2,3 \
-NNODES=2 \
+NNODES=$nnodes \
 NODE_RANK=0 \
 MASTER_ADDR=127.0.0.1 \
-NPROC_PER_NODE=4 \
+MASTER_PORT=29500 \
+NPROC_PER_NODE=$nproc_per_node \
 swift sft \
-      --model Qwen/Qwen2.5-7B-Instruct \
-      --train_type lora \
-      --torch_dtype bfloat16 \
-      --dataset 'swift/self-cognition#1000' \
-      --num_train_epochs 1 \
-      --lora_rank 8 \
-      --lora_alpha 32 \
-      --learning_rate 1e-4 \
-      --gradient_accumulation_steps 16 \
-      --gradient_checkpointing_kwargs '{"use_reentrant": false}' \
-      --eval_steps 100 \
-      --save_steps 100 \
-      --save_total_limit 2 \
-      --logging_steps 5 \
-      --model_author swift \
-      --model_name swift-robot
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type full \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \
+              'AI-ModelScope/alpaca-gpt4-data-en#20000' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --deepspeed zero2
diff --git a/examples/train/multi-node/swift/train_node2.sh b/examples/train/multi-node/swift/train_node2.sh
index 22e4eee12a..e2f5e5e9cf 100644
--- a/examples/train/multi-node/swift/train_node2.sh
+++ b/examples/train/multi-node/swift/train_node2.sh
@@ -1,22 +1,30 @@
+nnodes=2
+nproc_per_node=4
+
 CUDA_VISIBLE_DEVICES=0,1,2,3 \
-NNODES=2 \
+NNODES=$nnodes \
 NODE_RANK=1 \
 MASTER_ADDR=xxx.xxx.xxx.xxx \
-NPROC_PER_NODE=4 \
+MASTER_PORT=29500 \
+NPROC_PER_NODE=$nproc_per_node \
 swift sft \
-      --model Qwen/Qwen2.5-7B-Instruct \
-      --train_type lora \
-      --torch_dtype bfloat16 \
-      --dataset 'swift/self-cognition#1000' \
-      --num_train_epochs 1 \
-      --lora_rank 8 \
-      --lora_alpha 32 \
-      --learning_rate 1e-4 \
-      --gradient_accumulation_steps 16 \
-      --gradient_checkpointing_kwargs '{"use_reentrant": false}' \
-      --eval_steps 100 \
-      --save_steps 100 \
-      --save_total_limit 2 \
-      --logging_steps 5 \
-      --model_author swift \
-      --model_name swift-robot
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type full \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \
+              'AI-ModelScope/alpaca-gpt4-data-en#20000' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --deepspeed zero2
diff --git a/examples/train/multi-node/torchrun/train_node1.sh b/examples/train/multi-node/torchrun/train_node1.sh
index 0072ca47cf..28a7b72fc2 100644
--- a/examples/train/multi-node/torchrun/train_node1.sh
+++ b/examples/train/multi-node/torchrun/train_node1.sh
@@ -1,19 +1,31 @@
+nnodes=2
+nproc_per_node=4
+
 CUDA_VISIBLE_DEVICES=0,1,2,3 \
-torchrun --master_port 29500 --nproc_per_node=4 --nnodes=2 --node_rank=0 --master_addr=127.0.0.1 \
+torchrun \
+    --master_port 29500 \
+    --nproc_per_node=$nproc_per_node \
+    --nnodes=$nnodes \
+    --node_rank=0 \
+    --master_addr=127.0.0.1 \
     swift/cli/sft.py \
     --model Qwen/Qwen2.5-7B-Instruct \
-    --train_type lora \
+    --train_type full \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \
+              'AI-ModelScope/alpaca-gpt4-data-en#20000' \
     --torch_dtype bfloat16 \
-    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
-    --lora_rank 8 \
-    --lora_alpha 32 \
-    --learning_rate 1e-4 \
-    --gradient_accumulation_steps 16 \
-    --gradient_checkpointing_kwargs '{"use_reentrant": false}' \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \
     --eval_steps 100 \
     --save_steps 100 \
     --save_total_limit 2 \
     --logging_steps 5 \
-    --model_author swift \
-    --model_name swift-robot
+    --max_length 8192 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --deepspeed zero2
diff --git a/examples/train/multi-node/torchrun/train_node2.sh b/examples/train/multi-node/torchrun/train_node2.sh
index 0de8699c3f..fcd66919e4 100644
--- a/examples/train/multi-node/torchrun/train_node2.sh
+++ b/examples/train/multi-node/torchrun/train_node2.sh
@@ -1,19 +1,31 @@
+nnodes=2
+nproc_per_node=4
+
 CUDA_VISIBLE_DEVICES=0,1,2,3 \
-torchrun --master_port 29500 --nproc_per_node=4 --nnodes=2 --node_rank=1 --master_addr=xxx.xxx.xxx.xxx \
+torchrun \
+    --master_port 29500 \
+    --nproc_per_node=$nproc_per_node \
+    --nnodes=$nnodes \
+    --node_rank=1 \
+    --master_addr=xxx.xxx.xxx.xxx \
     swift/cli/sft.py \
     --model Qwen/Qwen2.5-7B-Instruct \
-    --train_type lora \
+    --train_type full \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \
+              'AI-ModelScope/alpaca-gpt4-data-en#20000' \
     --torch_dtype bfloat16 \
-    --dataset 'swift/self-cognition#1000' \
     --num_train_epochs 1 \
-    --lora_rank 8 \
-    --lora_alpha 32 \
-    --learning_rate 1e-4 \
-    --gradient_accumulation_steps 16 \
-    --gradient_checkpointing_kwargs '{"use_reentrant": false}' \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \
     --eval_steps 100 \
     --save_steps 100 \
     --save_total_limit 2 \
     --logging_steps 5 \
-    --model_author swift \
-    --model_name swift-robot
+    --max_length 8192 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --deepspeed zero2
diff --git a/examples/train/predict_with_generate/train.sh b/examples/train/predict_with_generate/train.sh
new file mode 100644
index 0000000000..b989954868
--- /dev/null
+++ b/examples/train/predict_with_generate/train.sh
@@ -0,0 +1,28 @@
+# 20GiB
+CUDA_VISIBLE_DEVICES=0 \
+MAX_PIXELS=1003520 \
+swift sft \
+    --model Qwen/Qwen2.5-VL-7B-Instruct \
+    --dataset 'AI-ModelScope/LaTeX_OCR:human_handwrite#20000' \
+    --train_type lora \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 2 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --freeze_vit true \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 5 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --predict_with_generate true \
+    --metric_for_best_model rouge-l \
+    --greater_is_better true
diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py
index 68d4a574b7..6d0682ca5b 100644
--- a/swift/llm/argument/rlhf_args.py
+++ b/swift/llm/argument/rlhf_args.py
@@ -68,8 +68,7 @@ class RLHFArguments(PPOArguments, TrainArguments):
     desirable_weight: float = 1.0
     undesirable_weight: float = 1.0
 
-    # Use last_round by default
-    loss_scale: str = 'last_round'
+    loss_scale: Optional[str] = None
 
     def __post_init__(self):
         self._init_rm()
@@ -78,6 +77,13 @@ def __post_init__(self):
         super().__post_init__()
         self._init_ppo()
 
+        if self.loss_scale is None:
+            if self.rlhf_type == 'orpo' and not self.model_meta.is_multimodal:
+                # Avoid padding labels during the model's forward pass in multimodal models.
+                # Some multimodal models do not expand the image pad token.
+                self.loss_scale = 'default'
+            else:
+                self.loss_scale = 'last_round'
         if self.rlhf_type in ['dpo', 'kto', 'ppo'] and self.train_type == 'full':
             self.ref_model = self.ref_model or self.model
             self.ref_model_type = self.ref_model_type or self.model_type
diff --git a/swift/llm/model/utils.py b/swift/llm/model/utils.py
index f9b515cea3..2eceef8c0c 100644
--- a/swift/llm/model/utils.py
+++ b/swift/llm/model/utils.py
@@ -207,6 +207,8 @@ def _get_arch_mapping():
         res = {}
         for model_type, model_meta in MODEL_MAPPING.items():
             architectures = model_meta.architectures
+            if not architectures:
+                architectures.append('null')
             for arch in architectures:
                 if arch not in res:
                     res[arch] = []
@@ -216,7 +218,7 @@ def _get_arch_mapping():
     @staticmethod
     def get_matched_model_types(config: Union[PretrainedConfig, Dict[str, Any]]) -> List[str]:
         """Get possible model_type."""
-        arch = HfConfigFactory.get_config_attr(config, 'architectures')
+        arch = HfConfigFactory.get_config_attr(config, 'architectures') or ['null']
         if arch:
             arch = arch[0]
         arch_mapping = HfConfigFactory._get_arch_mapping()
diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py
index cb88c6c88f..b12cba3e20 100644
--- a/swift/llm/template/base.py
+++ b/swift/llm/template/base.py
@@ -171,6 +171,8 @@ def _preprocess_inputs(
         self,
         inputs: StdTemplateInputs,
     ) -> None:
+        if self.model_meta.is_multimodal:
+            self._replace_image_tags(inputs)
         images = inputs.images
         load_images = self.load_images or self.mode in {'vllm', 'lmdeploy'}
         load_images_origin = load_images
@@ -197,8 +199,6 @@ def _preprocess_inputs(
                 'The template does not support multi-round chat. Only use the last round of the conversation.')
             inputs.messages = inputs.messages[-2:]
 
-        if self.model_meta.is_multimodal:
-            self._replace_image_tags(inputs)
         if inputs.is_multimodal:
             self._add_default_tags(inputs)
 
diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py
index ea4d9b4e56..17d66982c2 100644
--- a/swift/llm/train/rlhf.py
+++ b/swift/llm/train/rlhf.py
@@ -63,11 +63,6 @@ def _prepare_template(self) -> None:
         model_mapping = {'kto': 'kto', 'ppo': 'pt'}
         self.template.set_mode(model_mapping.get(args.rlhf_type, 'rlhf'))
 
-        if args.rlhf_type == 'orpo' and not args.model_meta.is_multimodal:
-            # Avoid padding labels during the model's forward pass in multimodal models.
-            args.loss_scale = 'default'
-        self.template.loss_scale = args.loss_scale
-
         if args.rlhf_type == 'ppo':
             args.training_args.stop_token_id = self.template.template_meta.stop_token_id