modelscope · Jintao-Huang · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/README.md b/README.md
@@ -145,7 +145,7 @@ Running Environment:
 | modelscope   | >=1.23       |                     |                                           |
 | peft         | >=0.11,<0.20 |                     |                                           |
 | flash_attn   |              | 2.8.3/3.0.0b1 |                                           |
-| trl          | >=0.15,<0.30 | 0.29.1              | RLHF                                      |
+| trl          | >=0.15,<1.0 | 0.29.1              | RLHF                                      |
 | deepspeed    | >=0.14       | 0.18.9              | Training                                  |
 | vllm         | >=0.5.1      | 0.11.0/0.19.1       | Inference/Deployment                      |
 | sglang       | >=0.4.6      |          | Inference/Deployment                      |

diff --git a/README_CN.md b/README_CN.md
@@ -141,7 +141,7 @@ uv pip install -e . --torch-backend=auto
 | modelscope   | >=1.23       |                     |                    |
 | peft         | >=0.11,<0.20 |                     |                    |
 | flash_attn   |              | 2.8.3/3.0.0b1 |                    |
-| trl          | >=0.15,<0.30 | 0.29.1              | RLHF               |
+| trl          | >=0.15,<1.0 | 0.29.1              | RLHF               |
 | deepspeed    | >=0.14       | 0.18.9              | 训练                 |
 | vllm         | >=0.5.1      | 0.11.0/0.19.1        | 推理/部署              |
 | sglang       | >=0.4.6      |          | 推理/部署              |

diff --git a/docs/source/GetStarted/SWIFT-installation.md b/docs/source/GetStarted/SWIFT-installation.md
@@ -151,7 +151,7 @@ modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu2
 | modelscope   | >=1.23       |                     |                    |
 | peft         | >=0.11,<0.20 |                     |                    |
 | flash_attn   |              | 2.8.3/3.0.0b1 |                    |
-| trl          | >=0.15,<0.30 | 0.29.1              | RLHF               |
+| trl          | >=0.15,<1.0 | 0.29.1              | RLHF               |
 | deepspeed    | >=0.14       | 0.18.9              | 训练                 |
 | vllm         | >=0.5.1      | 0.11.0/0.19.1        | 推理/部署              |
 | sglang       | >=0.4.6      |          | 推理/部署              |

diff --git a/docs/source/Megatron-SWIFT/Custom-Model.md b/docs/source/Megatron-SWIFT/Custom-Model.md
@@ -1,7 +1,7 @@
-# Megatron-SWIFT 自定义模型
+# 自定义Megatron模型
 
 
-这里介绍如何在Mcore-Bridge中注册模型，以支持新模型在Megatron-SWIFT中的训练。我们将以MiniMax-M2.7为例子介绍。
+这里介绍如何在[Mcore-Bridge](https://github.com/modelscope/mcore-bridge)中注册模型，以支持新模型在Megatron-SWIFT中的训练。我们将以MiniMax-M2.7为例子介绍。
 
 ## 下载模型
 
@@ -14,7 +14,7 @@ model_dir = safe_snapshot_download('MiniMax/MiniMax-M2.7', download_model=False)
 print(f'model_dir: {model_dir}')
 ```
 
-由于模型权重很大，为了加速支持模型的效率，我们采用懒下载的方式，并只下载`num_layers`层的权重，构建mini版本的模型，用于做接入测试。以MiniMax-M2.7为例，我们构建了一层的BF16版本的权重。若有些模型出现前3层为Dense，之后为MoE，则你可以构建4层的权重。
+由于模型权重很大，为了加速支持模型的效率，我们采用懒下载的方式，并只下载`num_layers`层的权重，构建mini版本的模型，用于做接入测试。以MiniMax-M2.7为例，我们构建了一层的BF16版本的权重。若有些模型出现前3层为Dense，之后为MoE，则你可以构建4层的权重。若出现Attention交替的情况，例如Qwen3.5采用linear-attention和full-attention交替，你也需要更多的层数。
 
 ```python
 import os

diff --git a/docs/source/Megatron-SWIFT/Quick-start.md b/docs/source/Megatron-SWIFT/Quick-start.md
@@ -78,7 +78,7 @@ modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu2
 | transformers | >=4.33       | 4.57.6/5.6.2   |                    |
 | modelscope   | >=1.23       |             |                    |
 | peft         | >=0.11,<0.20 |             |      LoRA          |
-| trl          | >=0.15,<0.30 |       |      RLHF        |
+| trl          | >=0.15,<1.0 |       |      RLHF        |
-| trl          | >=0.15,<1.0 |       |      RLHF        |
+| trl          | >=0.15,<1.0 | 0.29.1 |      RLHF        |
-| trl          | >=0.15,<1.0 |       |      RLHF        |
+| trl          | >=0.15,<1.0 | 0.29.1 |      RLHF        |
 
 
 ## 快速入门案例

diff --git a/docs/source_en/GetStarted/SWIFT-installation.md b/docs/source_en/GetStarted/SWIFT-installation.md
@@ -150,7 +150,7 @@ More images can be found [here](https://modelscope.cn/docs/intro/environment-set
 | modelscope   | >=1.23       |                     |                                           |
 | peft         | >=0.11,<0.20 |                     |                                           |
 | flash_attn   |              | 2.8.3/3.0.0b1 |                                           |
-| trl          | >=0.15,<0.30 | 0.29.1              | RLHF                                      |
+| trl          | >=0.15,<1.0 | 0.29.1              | RLHF                                      |
 | deepspeed    | >=0.14       | 0.18.9              | Training                                  |
 | vllm         | >=0.5.1      | 0.11.0/0.19.1       | Inference/Deployment                      |
 | sglang       | >=0.4.6      |          | Inference/Deployment                      |

diff --git a/docs/source_en/Megatron-SWIFT/Custom-Model.md b/docs/source_en/Megatron-SWIFT/Custom-Model.md
@@ -1,7 +1,7 @@
-# Megatron-SWIFT Custom Model
+# Custom Megatron Model
 
 
-This guide explains how to register a model in Mcore-Bridge to support training new models in Megatron-SWIFT. We will use MiniMax-M2.7 as an example.
+This guide explains how to register a model in [Mcore-Bridge](https://github.com/modelscope/mcore-bridge) to support training new models in Megatron-SWIFT. We will use MiniMax-M2.7 as an example.
 
 ## Download the Model
 
@@ -14,7 +14,7 @@ model_dir = safe_snapshot_download('MiniMax/MiniMax-M2.7', download_model=False)
 print(f'model_dir: {model_dir}')
 ```
 
-Since model weights are very large, to speed up the model integration process, we use lazy downloading and only download weights for `num_layers` layers, building a mini version of the model for integration testing. Taking MiniMax-M2.7 as an example, we build a one-layer BF16 version of the weights. If some models have the first 3 layers as Dense and the rest as MoE, you can build 4 layers of weights.
+Since model weights are very large, to speed up the model integration process, we use lazy downloading and only download weights for `num_layers` layers, building a mini version of the model for integration testing. Taking MiniMax-M2.7 as an example, we build a one-layer BF16 version of the weights. If some models have the first 3 layers as Dense and the rest as MoE, you can build 4 layers of weights. If alternating attention types are used, for example Qwen3.5 alternates between linear attention and full attention, you will also need more layers.
 
 ```python
 import os

diff --git a/docs/source_en/Megatron-SWIFT/Quick-start.md b/docs/source_en/Megatron-SWIFT/Quick-start.md
@@ -78,7 +78,7 @@ Recommended Operating Environment:
 | transformers | >=4.33       | 4.57.6/5.6.2    |                    |
 | modelscope   | >=1.23       |             |                    |
 | peft         | >=0.11,<0.20 |             |      LoRA          |
-| trl          | >=0.15,<0.30 |       |      RLHF        |
+| trl          | >=0.15,<1.0 |       |      RLHF        |
-| trl          | >=0.15,<1.0 |       |      RLHF        |
+| trl          | >=0.15,<1.0 | 0.29.1 |      RLHF        |
-| trl          | >=0.15,<1.0 |       |      RLHF        |
+| trl          | >=0.15,<1.0 | 0.29.1 |      RLHF        |
 
 
 ## Quick Start Example

diff --git a/requirements/framework.txt b/requirements/framework.txt
@@ -32,8 +32,8 @@ sortedcontainers>=1.5.9
 tensorboard
 tiktoken
 tqdm
-transformers>=4.33,<5.7.0
+transformers>=4.33,<5.8.0
 transformers_stream_generator
-trl>=0.15,<0.30
+trl>=0.15,<1.0
 uvicorn
 zstandard
diff --git a/requirements/install_all.sh b/requirements/install_all.sh
@@ -3,7 +3,7 @@
 # pip install sglang -U
 pip install "vllm>=0.5.1" -U
 pip install "lmdeploy>=0.5,<0.10.2" -U --no-deps
-pip install "transformers<5.7" "trl<0.30" peft -U
+pip install "transformers<5.8" "trl<1.0" peft -U
-pip install "transformers<5.8" "trl<1.0" peft -U
+pip install "transformers>=4.33,<5.8.0" "trl>=0.15,<1.0" "peft>=0.11,<0.20" -U
-pip install "transformers<5.8" "trl<1.0" peft -U
+pip install "transformers>=4.33,<5.8.0" "trl>=0.15,<1.0" "peft>=0.11,<0.20" -U
 pip install auto_gptq optimum bitsandbytes "gradio<5.33" -U
 pip install git+https://github.com/modelscope/ms-swift.git#egg=ms-swift[all]
 pip install timm "deepspeed<0.19" -U

diff --git a/swift/dataset/preprocessor/core.py b/swift/dataset/preprocessor/core.py
@@ -262,8 +262,8 @@ def _new_init(self, schema=None, features=None, *args, **kwargs):
                 }]
                 features['messages'] = messages_feature_with_loss
                 features['rejected_messages'] = messages_feature_with_loss
-                features['positive_messages'] = [messages_feature]
-                features['negative_messages'] = [messages_feature]
+                features['positive_messages'] = messages_feature
+                features['negative_messages'] = messages_feature
                 features['images'] = [{'bytes': Value(dtype='binary'), 'path': Value(dtype='string')}]
                 features['objects'] = {
                     'ref': Sequence(feature=Value(dtype='string'), length=-1),