From 5455e98f3ae6c57649caeaad2a56fa1b9077dc27 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Fri, 10 Jan 2025 21:13:44 +0800
Subject: [PATCH] fix link & bug

---
 ...\222\214\346\225\260\346\215\256\351\233\206.md" |  8 ++++----
 .../Instruction/Supported-models-and-datasets.md    |  8 ++++----
 swift/llm/argument/base_args/base_args.py           | 10 +++++++++-
 swift/llm/model/model/llava.py                      |  6 +++---
 swift/llm/train/rlhf.py                             | 13 +++++++++++--
 tests/llm/test_run.py                               |  5 +++--
 6 files changed, 34 insertions(+), 16 deletions(-)

diff --git "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
index f36055b293..c84077a542 100644
--- "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
+++ "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
@@ -413,7 +413,7 @@
 |[LLM-Research/Phi-3-medium-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-medium-128k-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3-medium-128k-instruct](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct)|
 |[LLM-Research/Phi-3.5-mini-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-mini-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)|
 |[LLM-Research/Phi-3.5-MoE-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-MoE-instruct)|phi3_moe|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-MoE-instruct](https://huggingface.co/microsoft/Phi-3.5-MoE-instruct)|
-|[microsoft/phi-4](https://modelscope.cn/models/microsoft/phi-4)|phi4|phi4|transformers>=4.36|-|[LLM-Research/phi-4](https://huggingface.co/LLM-Research/phi-4)|
+|[LLM-Research/phi-4](https://modelscope.cn/models/LLM-Research/phi-4)|phi4|phi4|transformers>=4.36|-|[microsoft/phi-4](https://huggingface.co/microsoft/phi-4)|
 |[AI-ModelScope/gemma-2b-it](https://modelscope.cn/models/AI-ModelScope/gemma-2b-it)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it)|
 |[AI-ModelScope/gemma-2b](https://modelscope.cn/models/AI-ModelScope/gemma-2b)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b](https://huggingface.co/google/gemma-2b)|
 |[AI-ModelScope/gemma-7b](https://modelscope.cn/models/AI-ModelScope/gemma-7b)|gemma|gemma|transformers>=4.38|-|[google/gemma-7b](https://huggingface.co/google/gemma-7b)|
@@ -593,9 +593,9 @@
 |[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers)|llava_llama3_hf|llava_llama3_hf|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
 |[AI-ModelScope/llava-v1.6-mistral-7b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-mistral-7b)|llava1_6_mistral|llava1_6_mistral|transformers>=4.34|vision|[liuhaotian/llava-v1.6-mistral-7b](https://huggingface.co/liuhaotian/llava-v1.6-mistral-7b)|
 |[AI-ModelScope/llava-v1.6-34b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-34b)|llava1_6_yi|llava1_6_yi|transformers>=4.34|vision|[liuhaotian/llava-v1.6-34b](https://huggingface.co/liuhaotian/llava-v1.6-34b)|
-|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
-|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
-|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b)|llama3_llava_next|llama3_llava_next|transformers>=4.42, av|vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
+|[AI-ModelScope/llava-next-72b](https://modelscope.cn/models/AI-ModelScope/llava-next-72b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
+|[AI-ModelScope/llava-next-110b](https://modelscope.cn/models/AI-ModelScope/llava-next-110b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
+|[AI-ModelScope/llama3-llava-next-8b](https://modelscope.cn/models/AI-ModelScope/llama3-llava-next-8b)|llama3_llava_next|llama3_llava_next|transformers>=4.42, av|vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
 |[deepseek-ai/deepseek-vl-1.3b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-1.3b-chat)|deepseek_vl|deepseek_vl|-|vision|[deepseek-ai/deepseek-vl-1.3b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-1.3b-chat)|
 |[deepseek-ai/deepseek-vl-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-7b-chat)|deepseek_vl|deepseek_vl|-|vision|[deepseek-ai/deepseek-vl-7b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-7b-chat)|
 |[deepseek-ai/deepseek-vl2-tiny](https://modelscope.cn/models/deepseek-ai/deepseek-vl2-tiny)|deepseek_vl2|deepseek_vl2|transformers<4.42|vision|[deepseek-ai/deepseek-vl2-tiny](https://huggingface.co/deepseek-ai/deepseek-vl2-tiny)|
diff --git a/docs/source_en/Instruction/Supported-models-and-datasets.md b/docs/source_en/Instruction/Supported-models-and-datasets.md
index cc3ad9ed3c..04aebafe5e 100644
--- a/docs/source_en/Instruction/Supported-models-and-datasets.md
+++ b/docs/source_en/Instruction/Supported-models-and-datasets.md
@@ -413,7 +413,7 @@ The table below introduces the models integrated with ms-swift:
 |[LLM-Research/Phi-3-medium-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-medium-128k-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3-medium-128k-instruct](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct)|
 |[LLM-Research/Phi-3.5-mini-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-mini-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)|
 |[LLM-Research/Phi-3.5-MoE-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-MoE-instruct)|phi3_moe|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-MoE-instruct](https://huggingface.co/microsoft/Phi-3.5-MoE-instruct)|
-|[microsoft/phi-4](https://modelscope.cn/models/microsoft/phi-4)|phi4|phi4|transformers>=4.36|-|[LLM-Research/phi-4](https://huggingface.co/LLM-Research/phi-4)|
+|[LLM-Research/phi-4](https://modelscope.cn/models/LLM-Research/phi-4)|phi4|phi4|transformers>=4.36|-|[microsoft/phi-4](https://huggingface.co/microsoft/phi-4)|
 |[AI-ModelScope/gemma-2b-it](https://modelscope.cn/models/AI-ModelScope/gemma-2b-it)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it)|
 |[AI-ModelScope/gemma-2b](https://modelscope.cn/models/AI-ModelScope/gemma-2b)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b](https://huggingface.co/google/gemma-2b)|
 |[AI-ModelScope/gemma-7b](https://modelscope.cn/models/AI-ModelScope/gemma-7b)|gemma|gemma|transformers>=4.38|-|[google/gemma-7b](https://huggingface.co/google/gemma-7b)|
@@ -593,9 +593,9 @@ The table below introduces the models integrated with ms-swift:
 |[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers)|llava_llama3_hf|llava_llama3_hf|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)|
 |[AI-ModelScope/llava-v1.6-mistral-7b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-mistral-7b)|llava1_6_mistral|llava1_6_mistral|transformers>=4.34|vision|[liuhaotian/llava-v1.6-mistral-7b](https://huggingface.co/liuhaotian/llava-v1.6-mistral-7b)|
 |[AI-ModelScope/llava-v1.6-34b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-34b)|llava1_6_yi|llava1_6_yi|transformers>=4.34|vision|[liuhaotian/llava-v1.6-34b](https://huggingface.co/liuhaotian/llava-v1.6-34b)|
-|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
-|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
-|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b)|llama3_llava_next|llama3_llava_next|transformers>=4.42, av|vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
+|[AI-ModelScope/llava-next-72b](https://modelscope.cn/models/AI-ModelScope/llava-next-72b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)|
+|[AI-ModelScope/llava-next-110b](https://modelscope.cn/models/AI-ModelScope/llava-next-110b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)|
+|[AI-ModelScope/llama3-llava-next-8b](https://modelscope.cn/models/AI-ModelScope/llama3-llava-next-8b)|llama3_llava_next|llama3_llava_next|transformers>=4.42, av|vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)|
 |[deepseek-ai/deepseek-vl-1.3b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-1.3b-chat)|deepseek_vl|deepseek_vl|-|vision|[deepseek-ai/deepseek-vl-1.3b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-1.3b-chat)|
 |[deepseek-ai/deepseek-vl-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-7b-chat)|deepseek_vl|deepseek_vl|-|vision|[deepseek-ai/deepseek-vl-7b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-7b-chat)|
 |[deepseek-ai/deepseek-vl2-tiny](https://modelscope.cn/models/deepseek-ai/deepseek-vl2-tiny)|deepseek_vl2|deepseek_vl2|transformers<4.42|vision|[deepseek-ai/deepseek-vl2-tiny](https://huggingface.co/deepseek-ai/deepseek-vl2-tiny)|
diff --git a/swift/llm/argument/base_args/base_args.py b/swift/llm/argument/base_args/base_args.py
index a39bac4ad3..82f3e82989 100644
--- a/swift/llm/argument/base_args/base_args.py
+++ b/swift/llm/argument/base_args/base_args.py
@@ -241,7 +241,14 @@ def get_template(self, processor: 'Processor') -> 'Template':
         logger.info(f'default_system: {template.template_meta.default_system}')
         return template
 
-    def get_model_processor(self, *, model=None, model_type=None, model_revision=None, task_type=None, **kwargs):
+    def get_model_processor(self,
+                            *,
+                            model=None,
+                            model_type=None,
+                            model_revision=None,
+                            task_type=None,
+                            num_labels=None,
+                            **kwargs):
         if self.tuner_backend == 'unsloth':
             return load_by_unsloth(self)
         kwargs.update(self.get_model_kwargs())
@@ -250,5 +257,6 @@ def get_model_processor(self, *, model=None, model_type=None, model_revision=Non
         kwargs['model_type'] = model_type or self.model_type
         kwargs['model_revision'] = model_revision or self.model_revision
         kwargs['task_type'] = task_type or self.task_type
+        kwargs['num_labels'] = num_labels or self.num_labels
 
         return get_model_tokenizer(**kwargs)
diff --git a/swift/llm/model/model/llava.py b/swift/llm/model/model/llava.py
index 0b5c29e51b..811a49b793 100644
--- a/swift/llm/model/model/llava.py
+++ b/swift/llm/model/model/llava.py
@@ -334,7 +334,7 @@ def _new_forward(*args, **kwargs):
         MLLMModelType.llama3_llava_next,
         [
             ModelGroup([
-                Model('AI-Modelscope/llama3-llava-next-8b', 'lmms-lab/llama3-llava-next-8b'),
+                Model('AI-ModelScope/llama3-llava-next-8b', 'lmms-lab/llama3-llava-next-8b'),
             ], ),
         ],
         TemplateType.llama3_llava_next,
@@ -379,8 +379,8 @@ def _new_forward(*args, **kwargs):
     ModelMeta(
         MLLMModelType.llava_next_qwen, [
             ModelGroup([
-                Model('AI-Modelscope/llava-next-72b', 'lmms-lab/llava-next-72b'),
-                Model('AI-Modelscope/llava-next-110b', 'lmms-lab/llava-next-110b'),
+                Model('AI-ModelScope/llava-next-72b', 'lmms-lab/llava-next-72b'),
+                Model('AI-ModelScope/llava-next-110b', 'lmms-lab/llava-next-110b'),
             ], ),
         ],
         TemplateType.llava_next_qwen,
diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py
index aa5ce07091..00ab04a7c1 100644
--- a/swift/llm/train/rlhf.py
+++ b/swift/llm/train/rlhf.py
@@ -30,10 +30,19 @@ def _prepare_model_tokenizer(self):
             model_type = getattr(args, f'{key}_model_type')
             model_revision = getattr(args, f'{key}_model_revision')
             adapters = args.adapters if key == 'ref' else args.reward_adapters
-            task_type = args.task_type if origin_key == 'ref' else 'seq_cls'
+            if origin_key == 'ref':
+                task_type = args.task_type
+                num_labels = None
+            else:
+                task_type = 'seq_cls'
+                num_labels = 1
             # Be aware of the unexpected behavior caused by double monkey patching.
             model = args.get_model_processor(
-                model=model_id_or_path, model_type=model_type, model_revision=model_revision, task_type=task_type)[0]
+                model=model_id_or_path,
+                model_type=model_type,
+                model_revision=model_revision,
+                task_type=task_type,
+                num_labels=num_labels)[0]
 
             model = prepare_adapter(args, model, adapters)
             if origin_key in {'ref', 'reward'}:
diff --git a/tests/llm/test_run.py b/tests/llm/test_run.py
index 862710357f..b5ef6330d5 100644
--- a/tests/llm/test_run.py
+++ b/tests/llm/test_run.py
@@ -1,6 +1,6 @@
 if __name__ == '__main__':
     import os
-    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
     os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
 
 import os
@@ -242,13 +242,14 @@ def test_rlhf(self):
                        if rlhf_type != 'kto' else 'AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#100')
             train_kwargs = {}
             if rlhf_type == 'ppo':
-                train_kwargs['reward_model_type'] = 'Qwen/Qwen2-1.5B-Instruct'
+                train_kwargs['reward_model'] = 'Qwen/Qwen2-1.5B-Instruct'
             output = rlhf_main(
                 RLHFArguments(
                     rlhf_type=rlhf_type,
                     model='Qwen/Qwen2-1.5B-Instruct',
                     dataset=dataset,
                     eval_steps=5,
+                    split_dataset_ratio=0.05,
                     **train_kwargs,
                     **kwargs))
             if rlhf_type == 'ppo':