From 5455e98f3ae6c57649caeaad2a56fa1b9077dc27 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 10 Jan 2025 21:13:44 +0800 Subject: [PATCH] fix link & bug --- ...\222\214\346\225\260\346\215\256\351\233\206.md" | 8 ++++---- .../Instruction/Supported-models-and-datasets.md | 8 ++++---- swift/llm/argument/base_args/base_args.py | 10 +++++++++- swift/llm/model/model/llava.py | 6 +++--- swift/llm/train/rlhf.py | 13 +++++++++++-- tests/llm/test_run.py | 5 +++-- 6 files changed, 34 insertions(+), 16 deletions(-) diff --git "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" index f36055b293..c84077a542 100644 --- "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" +++ "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" @@ -413,7 +413,7 @@ |[LLM-Research/Phi-3-medium-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-medium-128k-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3-medium-128k-instruct](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct)| |[LLM-Research/Phi-3.5-mini-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-mini-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)| |[LLM-Research/Phi-3.5-MoE-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-MoE-instruct)|phi3_moe|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-MoE-instruct](https://huggingface.co/microsoft/Phi-3.5-MoE-instruct)| -|[microsoft/phi-4](https://modelscope.cn/models/microsoft/phi-4)|phi4|phi4|transformers>=4.36|-|[LLM-Research/phi-4](https://huggingface.co/LLM-Research/phi-4)| +|[LLM-Research/phi-4](https://modelscope.cn/models/LLM-Research/phi-4)|phi4|phi4|transformers>=4.36|-|[microsoft/phi-4](https://huggingface.co/microsoft/phi-4)| |[AI-ModelScope/gemma-2b-it](https://modelscope.cn/models/AI-ModelScope/gemma-2b-it)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it)| |[AI-ModelScope/gemma-2b](https://modelscope.cn/models/AI-ModelScope/gemma-2b)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b](https://huggingface.co/google/gemma-2b)| |[AI-ModelScope/gemma-7b](https://modelscope.cn/models/AI-ModelScope/gemma-7b)|gemma|gemma|transformers>=4.38|-|[google/gemma-7b](https://huggingface.co/google/gemma-7b)| @@ -593,9 +593,9 @@ |[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers)|llava_llama3_hf|llava_llama3_hf|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)| |[AI-ModelScope/llava-v1.6-mistral-7b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-mistral-7b)|llava1_6_mistral|llava1_6_mistral|transformers>=4.34|vision|[liuhaotian/llava-v1.6-mistral-7b](https://huggingface.co/liuhaotian/llava-v1.6-mistral-7b)| |[AI-ModelScope/llava-v1.6-34b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-34b)|llava1_6_yi|llava1_6_yi|transformers>=4.34|vision|[liuhaotian/llava-v1.6-34b](https://huggingface.co/liuhaotian/llava-v1.6-34b)| -|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)| -|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)| -|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b)|llama3_llava_next|llama3_llava_next|transformers>=4.42, av|vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)| +|[AI-ModelScope/llava-next-72b](https://modelscope.cn/models/AI-ModelScope/llava-next-72b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)| +|[AI-ModelScope/llava-next-110b](https://modelscope.cn/models/AI-ModelScope/llava-next-110b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)| +|[AI-ModelScope/llama3-llava-next-8b](https://modelscope.cn/models/AI-ModelScope/llama3-llava-next-8b)|llama3_llava_next|llama3_llava_next|transformers>=4.42, av|vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)| |[deepseek-ai/deepseek-vl-1.3b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-1.3b-chat)|deepseek_vl|deepseek_vl|-|vision|[deepseek-ai/deepseek-vl-1.3b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-1.3b-chat)| |[deepseek-ai/deepseek-vl-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-7b-chat)|deepseek_vl|deepseek_vl|-|vision|[deepseek-ai/deepseek-vl-7b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-7b-chat)| |[deepseek-ai/deepseek-vl2-tiny](https://modelscope.cn/models/deepseek-ai/deepseek-vl2-tiny)|deepseek_vl2|deepseek_vl2|transformers<4.42|vision|[deepseek-ai/deepseek-vl2-tiny](https://huggingface.co/deepseek-ai/deepseek-vl2-tiny)| diff --git a/docs/source_en/Instruction/Supported-models-and-datasets.md b/docs/source_en/Instruction/Supported-models-and-datasets.md index cc3ad9ed3c..04aebafe5e 100644 --- a/docs/source_en/Instruction/Supported-models-and-datasets.md +++ b/docs/source_en/Instruction/Supported-models-and-datasets.md @@ -413,7 +413,7 @@ The table below introduces the models integrated with ms-swift: |[LLM-Research/Phi-3-medium-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-medium-128k-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3-medium-128k-instruct](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct)| |[LLM-Research/Phi-3.5-mini-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-mini-instruct)|phi3|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct)| |[LLM-Research/Phi-3.5-MoE-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-MoE-instruct)|phi3_moe|phi3|transformers>=4.36|-|[microsoft/Phi-3.5-MoE-instruct](https://huggingface.co/microsoft/Phi-3.5-MoE-instruct)| -|[microsoft/phi-4](https://modelscope.cn/models/microsoft/phi-4)|phi4|phi4|transformers>=4.36|-|[LLM-Research/phi-4](https://huggingface.co/LLM-Research/phi-4)| +|[LLM-Research/phi-4](https://modelscope.cn/models/LLM-Research/phi-4)|phi4|phi4|transformers>=4.36|-|[microsoft/phi-4](https://huggingface.co/microsoft/phi-4)| |[AI-ModelScope/gemma-2b-it](https://modelscope.cn/models/AI-ModelScope/gemma-2b-it)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it)| |[AI-ModelScope/gemma-2b](https://modelscope.cn/models/AI-ModelScope/gemma-2b)|gemma|gemma|transformers>=4.38|-|[google/gemma-2b](https://huggingface.co/google/gemma-2b)| |[AI-ModelScope/gemma-7b](https://modelscope.cn/models/AI-ModelScope/gemma-7b)|gemma|gemma|transformers>=4.38|-|[google/gemma-7b](https://huggingface.co/google/gemma-7b)| @@ -593,9 +593,9 @@ The table below introduces the models integrated with ms-swift: |[AI-ModelScope/llava-llama-3-8b-v1_1-transformers](https://modelscope.cn/models/AI-ModelScope/llava-llama-3-8b-v1_1-transformers)|llava_llama3_hf|llava_llama3_hf|transformers>=4.36|vision|[xtuner/llava-llama-3-8b-v1_1-transformers](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers)| |[AI-ModelScope/llava-v1.6-mistral-7b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-mistral-7b)|llava1_6_mistral|llava1_6_mistral|transformers>=4.34|vision|[liuhaotian/llava-v1.6-mistral-7b](https://huggingface.co/liuhaotian/llava-v1.6-mistral-7b)| |[AI-ModelScope/llava-v1.6-34b](https://modelscope.cn/models/AI-ModelScope/llava-v1.6-34b)|llava1_6_yi|llava1_6_yi|transformers>=4.34|vision|[liuhaotian/llava-v1.6-34b](https://huggingface.co/liuhaotian/llava-v1.6-34b)| -|[AI-Modelscope/llava-next-72b](https://modelscope.cn/models/AI-Modelscope/llava-next-72b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)| -|[AI-Modelscope/llava-next-110b](https://modelscope.cn/models/AI-Modelscope/llava-next-110b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)| -|[AI-Modelscope/llama3-llava-next-8b](https://modelscope.cn/models/AI-Modelscope/llama3-llava-next-8b)|llama3_llava_next|llama3_llava_next|transformers>=4.42, av|vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)| +|[AI-ModelScope/llava-next-72b](https://modelscope.cn/models/AI-ModelScope/llava-next-72b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-72b](https://huggingface.co/lmms-lab/llava-next-72b)| +|[AI-ModelScope/llava-next-110b](https://modelscope.cn/models/AI-ModelScope/llava-next-110b)|llava_next_qwen|llava_next_qwen|transformers>=4.42, av|vision|[lmms-lab/llava-next-110b](https://huggingface.co/lmms-lab/llava-next-110b)| +|[AI-ModelScope/llama3-llava-next-8b](https://modelscope.cn/models/AI-ModelScope/llama3-llava-next-8b)|llama3_llava_next|llama3_llava_next|transformers>=4.42, av|vision|[lmms-lab/llama3-llava-next-8b](https://huggingface.co/lmms-lab/llama3-llava-next-8b)| |[deepseek-ai/deepseek-vl-1.3b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-1.3b-chat)|deepseek_vl|deepseek_vl|-|vision|[deepseek-ai/deepseek-vl-1.3b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-1.3b-chat)| |[deepseek-ai/deepseek-vl-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-vl-7b-chat)|deepseek_vl|deepseek_vl|-|vision|[deepseek-ai/deepseek-vl-7b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-7b-chat)| |[deepseek-ai/deepseek-vl2-tiny](https://modelscope.cn/models/deepseek-ai/deepseek-vl2-tiny)|deepseek_vl2|deepseek_vl2|transformers<4.42|vision|[deepseek-ai/deepseek-vl2-tiny](https://huggingface.co/deepseek-ai/deepseek-vl2-tiny)| diff --git a/swift/llm/argument/base_args/base_args.py b/swift/llm/argument/base_args/base_args.py index a39bac4ad3..82f3e82989 100644 --- a/swift/llm/argument/base_args/base_args.py +++ b/swift/llm/argument/base_args/base_args.py @@ -241,7 +241,14 @@ def get_template(self, processor: 'Processor') -> 'Template': logger.info(f'default_system: {template.template_meta.default_system}') return template - def get_model_processor(self, *, model=None, model_type=None, model_revision=None, task_type=None, **kwargs): + def get_model_processor(self, + *, + model=None, + model_type=None, + model_revision=None, + task_type=None, + num_labels=None, + **kwargs): if self.tuner_backend == 'unsloth': return load_by_unsloth(self) kwargs.update(self.get_model_kwargs()) @@ -250,5 +257,6 @@ def get_model_processor(self, *, model=None, model_type=None, model_revision=Non kwargs['model_type'] = model_type or self.model_type kwargs['model_revision'] = model_revision or self.model_revision kwargs['task_type'] = task_type or self.task_type + kwargs['num_labels'] = num_labels or self.num_labels return get_model_tokenizer(**kwargs) diff --git a/swift/llm/model/model/llava.py b/swift/llm/model/model/llava.py index 0b5c29e51b..811a49b793 100644 --- a/swift/llm/model/model/llava.py +++ b/swift/llm/model/model/llava.py @@ -334,7 +334,7 @@ def _new_forward(*args, **kwargs): MLLMModelType.llama3_llava_next, [ ModelGroup([ - Model('AI-Modelscope/llama3-llava-next-8b', 'lmms-lab/llama3-llava-next-8b'), + Model('AI-ModelScope/llama3-llava-next-8b', 'lmms-lab/llama3-llava-next-8b'), ], ), ], TemplateType.llama3_llava_next, @@ -379,8 +379,8 @@ def _new_forward(*args, **kwargs): ModelMeta( MLLMModelType.llava_next_qwen, [ ModelGroup([ - Model('AI-Modelscope/llava-next-72b', 'lmms-lab/llava-next-72b'), - Model('AI-Modelscope/llava-next-110b', 'lmms-lab/llava-next-110b'), + Model('AI-ModelScope/llava-next-72b', 'lmms-lab/llava-next-72b'), + Model('AI-ModelScope/llava-next-110b', 'lmms-lab/llava-next-110b'), ], ), ], TemplateType.llava_next_qwen, diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py index aa5ce07091..00ab04a7c1 100644 --- a/swift/llm/train/rlhf.py +++ b/swift/llm/train/rlhf.py @@ -30,10 +30,19 @@ def _prepare_model_tokenizer(self): model_type = getattr(args, f'{key}_model_type') model_revision = getattr(args, f'{key}_model_revision') adapters = args.adapters if key == 'ref' else args.reward_adapters - task_type = args.task_type if origin_key == 'ref' else 'seq_cls' + if origin_key == 'ref': + task_type = args.task_type + num_labels = None + else: + task_type = 'seq_cls' + num_labels = 1 # Be aware of the unexpected behavior caused by double monkey patching. model = args.get_model_processor( - model=model_id_or_path, model_type=model_type, model_revision=model_revision, task_type=task_type)[0] + model=model_id_or_path, + model_type=model_type, + model_revision=model_revision, + task_type=task_type, + num_labels=num_labels)[0] model = prepare_adapter(args, model, adapters) if origin_key in {'ref', 'reward'}: diff --git a/tests/llm/test_run.py b/tests/llm/test_run.py index 862710357f..b5ef6330d5 100644 --- a/tests/llm/test_run.py +++ b/tests/llm/test_run.py @@ -1,6 +1,6 @@ if __name__ == '__main__': import os - os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' + os.environ['CUDA_VISIBLE_DEVICES'] = '0' os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com' import os @@ -242,13 +242,14 @@ def test_rlhf(self): if rlhf_type != 'kto' else 'AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#100') train_kwargs = {} if rlhf_type == 'ppo': - train_kwargs['reward_model_type'] = 'Qwen/Qwen2-1.5B-Instruct' + train_kwargs['reward_model'] = 'Qwen/Qwen2-1.5B-Instruct' output = rlhf_main( RLHFArguments( rlhf_type=rlhf_type, model='Qwen/Qwen2-1.5B-Instruct', dataset=dataset, eval_steps=5, + split_dataset_ratio=0.05, **train_kwargs, **kwargs)) if rlhf_type == 'ppo':