From ad3f9b1c618793eed93e0910e4ec57e622cf1237 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 27 Dec 2024 10:13:38 +0800 Subject: [PATCH 01/47] support ppo --- swift/llm/argument/rlhf_args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py index 8d54f9eb78..68dbd9f7d6 100644 --- a/swift/llm/argument/rlhf_args.py +++ b/swift/llm/argument/rlhf_args.py @@ -48,7 +48,7 @@ def __post_init__(self): self._set_default() super().__post_init__() - if self.rlhf_type not in ['cpo', 'orpo', 'rm'] and (self.train_type == 'full' or self.rlhf_type == 'ppo'): + if self.rlhf_type in ['dpo', 'kto'] and self.train_type == 'full' or self.rlhf_type == 'ppo': self.ref_model = self.ref_model or self.model self.ref_model_type = self.ref_model_type or self.model_type self.ref_model_revision = self.ref_model_revision or self.model_revision From 1e0e17ddac7feebea3f187f726e24bdbb0e07e66 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 27 Dec 2024 10:33:35 +0800 Subject: [PATCH 02/47] update --- README.md | 4 ++-- README_CN.md | 4 ++-- ...71\211\346\225\260\346\215\256\351\233\206.md" | 2 +- ...77\253\351\200\237\345\274\200\345\247\213.md" | 2 +- docs/source/Instruction/ReleaseNote3.0.md | 7 +++---- ...73\244\350\241\214\345\217\202\346\225\260.md" | 2 +- docs/source_en/Customization/Custom-dataset.md | 2 +- docs/source_en/GetStarted/Quick-start.md | 2 +- .../Instruction/Command-line-parameters.md | 2 +- docs/source_en/Instruction/ReleaseNote3.0.md | 7 +++---- swift/llm/argument/rlhf_args.py | 15 ++++++++++++++- 11 files changed, 30 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index a572beaed2..a3ed32bb47 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ You can contact us and communicate with us by adding our group: - 🍊 **Lightweight Training**: Supports lightweight fine-tuning methods like LoRA, QLoRA, DoRA, LoRA+, ReFT, RS-LoRA, LLaMAPro, Adapter, GaLore, Q-Galore, LISA, UnSloth, Liger-Kernel. - **Distributed Training**: Supports distributed data parallel (DDP), device_map simple model parallelism, DeepSpeed ZeRO2/ZeRO3, FSDP, and other distributed training techniques. - **Quantization Training**: Supports training quantized models like BNB, AWQ, GPTQ, AQLM, HQQ, EETQ. -- **RLHF Training**: Supports human alignment training methods such as DPO, CPO, SimPO, ORPO, KTO, RM for both pure text and multi-modal large models. +- **RLHF Training**: Supports human alignment training methods such as DPO, CPO, SimPO, ORPO, KTO, RM, PPO for both pure text and multi-modal large models. - 🍓 **Multi-Modal Training**: Supports training on different modalities like images, videos, and audio, for tasks like VQA, captioning, OCR, and grounding. - **Interface Training**: Provides capabilities for training, inference, evaluation, quantization through an interface, completing the whole large model pipeline. - **Plugin and Extension**: Supports custom model and dataset extensions, as well as customization of components like loss, metric, trainer, loss-scale, callback, optimizer. @@ -83,7 +83,7 @@ You can contact us and communicate with us by adding our group: - 🎉 2024.08.12: The SWIFT paper has been published on arXiv, and you can read it [here](https://arxiv.org/abs/2408.05517). - 🔥 2024.08.05: Support for using [evalscope](https://github.com/modelscope/evalscope/) as a backend for evaluating large models and multimodal models. - 🔥 2024.07.29: Support for using [vllm](https://github.com/vllm-project/vllm) and [lmdeploy](https://github.com/InternLM/lmdeploy) to accelerate inference for large models and multimodal models. When performing infer/deploy/eval, you can specify `--infer_backend vllm/lmdeploy`. -- 🔥 2024.07.24: Support for human preference alignment training for multimodal large models, including DPO/ORPO/SimPO/CPO/KTO/RM. +- 🔥 2024.07.24: Support for human preference alignment training for multimodal large models, including DPO/ORPO/SimPO/CPO/KTO/RM/PPO. - 🔥 2024.02.01: Support for Agent training! The training algorithm is derived from [this paper](https://arxiv.org/pdf/2309.00986.pdf). diff --git a/README_CN.md b/README_CN.md index c3796e0a5f..fdfe9d5ed0 100644 --- a/README_CN.md +++ b/README_CN.md @@ -64,7 +64,7 @@ - 🍊 **轻量训练**:支持了LoRA、QLoRA、DoRA、LoRA+、ReFT、RS-LoRA、LLaMAPro、Adapter、GaLore、Q-Galore、LISA、UnSloth、Liger-Kernel等轻量微调方式。 - **分布式训练**:支持分布式数据并行(DDP)、device_map简易模型并行、DeepSpeed ZeRO2 ZeRO3、FSDP等分布式训练技术。 - **量化训练**:支持对BNB、AWQ、GPTQ、AQLM、HQQ、EETQ量化模型进行训练。 -- **RLHF训练**:支持纯文本大模型和多模态大模型的DPO、CPO、SimPO、ORPO、KTO、RM等人类对齐训练方法。 +- **RLHF训练**:支持纯文本大模型和多模态大模型的DPO、CPO、SimPO、ORPO、KTO、RM、PPO等人类对齐训练方法。 - 🍓 **多模态训练**:支持对图像、视频和语音不同模态模型进行训练,支持VQA、Caption、OCR、Grounding任务的训练。 - **界面训练**:以界面的方式提供训练、推理、评测、量化的能力,完成大模型的全链路。 - **插件化与拓展**:支持自定义模型和数据集拓展,支持对loss、metric、trainer、loss-scale、callback、optimizer等组件进行自定义。 @@ -78,7 +78,7 @@ - 🎉 2024.08.12: SWIFT论文已经发布到arXiv上,可以点击[这里](https://arxiv.org/abs/2408.05517)阅读。 - 🔥 2024.08.05: 支持使用[evalscope](https://github.com/modelscope/evalscope/)作为后端进行大模型和多模态模型的评测。 - 🔥 2024.07.29: 支持使用[vllm](https://github.com/vllm-project/vllm), [lmdeploy](https://github.com/InternLM/lmdeploy)对大模型和多模态大模型进行推理加速,在infer/deploy/eval时额外指定`--infer_backend vllm/lmdeploy`即可。 -- 🔥 2024.07.24: 支持对多模态大模型进行人类偏好对齐训练,包括DPO/ORPO/SimPO/CPO/KTO/RM。 +- 🔥 2024.07.24: 支持对多模态大模型进行人类偏好对齐训练,包括DPO/ORPO/SimPO/CPO/KTO/RM/PPO。 - 🔥 2024.02.01: 支持Agent训练!训练算法源自这篇[论文](https://arxiv.org/pdf/2309.00986.pdf)。 ## 🛠️ 安装 diff --git "a/docs/source/Customization/\350\207\252\345\256\232\344\271\211\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Customization/\350\207\252\345\256\232\344\271\211\346\225\260\346\215\256\351\233\206.md" index 16b54234be..df19d99391 100644 --- "a/docs/source/Customization/\350\207\252\345\256\232\344\271\211\346\225\260\346\215\256\351\233\206.md" +++ "b/docs/source/Customization/\350\207\252\345\256\232\344\271\211\346\225\260\346\215\256\351\233\206.md" @@ -53,7 +53,7 @@ query-response格式: ### RLHF -#### DPO/ORPO/CPO/SimPO/RM +#### DPO/ORPO/CPO/SimPO/RM/PPO ```jsonl {"messages": [{"role": "system", "content": "你是个有用无害的助手"}, {"role": "user", "content": "告诉我明天的天气"}, {"role": "assistant", "content": "明天天气晴朗"}], "rejected_response": "我不知道"} diff --git "a/docs/source/GetStarted/\345\277\253\351\200\237\345\274\200\345\247\213.md" "b/docs/source/GetStarted/\345\277\253\351\200\237\345\274\200\345\247\213.md" index c69597316e..9715f248c3 100644 --- "a/docs/source/GetStarted/\345\277\253\351\200\237\345\274\200\345\247\213.md" +++ "b/docs/source/GetStarted/\345\277\253\351\200\237\345\274\200\345\247\213.md" @@ -8,7 +8,7 @@ ms-swift是魔搭社区提供的大模型与多模态大模型训练部署框架 - 🍊 轻量训练:支持了LoRA、QLoRA、DoRA、LoRA+、ReFT、RS-LoRA、LLaMAPro、Adapter、GaLore、Q-Galore、LISA、UnSloth、Liger-Kernel等轻量微调方式。 - 分布式训练:支持分布式数据并行(DDP)、device_map简易模型并行、DeepSpeed ZeRO2 ZeRO3、FSDP等分布式训练技术。 - 量化训练:支持对BNB、AWQ、GPTQ、AQLM、HQQ、EETQ量化模型进行训练。 -- RLHF训练:支持纯文本大模型和多模态大模型的DPO、CPO、SimPO、ORPO、KTO、RM等人类对齐训练方法。 +- RLHF训练:支持纯文本大模型和多模态大模型的DPO、CPO、SimPO、ORPO、KTO、RM、PPO等人类对齐训练方法。 - 🍓 多模态训练:支持对图像、视频和语音不同模态模型进行训练,支持VQA、Caption、OCR、Grounding任务的训练。 - 界面训练:以界面的方式提供训练、推理、评测、量化的能力,完成大模型的全链路。 - 插件化与拓展:支持自定义模型和数据集拓展,支持对loss、metric、trainer、loss-scale、callback、optimizer等组件进行自定义。 diff --git a/docs/source/Instruction/ReleaseNote3.0.md b/docs/source/Instruction/ReleaseNote3.0.md index feda24658a..85a8f7e8ad 100644 --- a/docs/source/Instruction/ReleaseNote3.0.md +++ b/docs/source/Instruction/ReleaseNote3.0.md @@ -81,7 +81,6 @@ ## 待完成 -1. RM/PPO能力3.0版本尚不支持,请使用2.6.1版本 -2. 自定义数据集评测3.0版本尚不支持,请使用2.6.1版本 -3. Megatron预训练能力3.0版本尚不支持,请使用2.6.1版本 -4. 文档和README,尤其是英文部分暂时未更新完整 +1. 自定义数据集评测3.0版本尚不支持,请使用2.6.1版本 +2. Megatron预训练能力3.0版本尚不支持,请使用2.6.1版本 +3. 文档和README暂时未更新完整 diff --git "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" index db8dd42017..1c652751ff 100644 --- "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" +++ "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" @@ -304,7 +304,7 @@ Vera使用`target_modules`, `target_regex`, `modules_to_save`三个参数. ### RLHF参数 RLHF参数继承于[训练参数](#训练参数) -- 🔥rlhf_type: 对齐算法类型,支持`dpo`, `orpo`, `simpo`, `kto`, `cpo` +- 🔥rlhf_type: 对齐算法类型,支持`dpo`, `orpo`, `simpo`, `kto`, `cpo`, `rm`, `ppo` - ref_model: DPO等算法中的原始对比模型 - ref_model_type: 同model_type - ref_model_revision: 同model_revision diff --git a/docs/source_en/Customization/Custom-dataset.md b/docs/source_en/Customization/Custom-dataset.md index f7b883f46a..3bb38cfe3c 100644 --- a/docs/source_en/Customization/Custom-dataset.md +++ b/docs/source_en/Customization/Custom-dataset.md @@ -52,7 +52,7 @@ The following provides the recommended dataset format for ms-swift, where the sy ### RLHF -#### DPO/ORPO/CPO/SimPO/RM +#### DPO/ORPO/CPO/SimPO/RM/PPO ```jsonl {"messages": [{"role": "system", "content": "You are a useful and harmless assistant"}, {"role": "user", "content": "Tell me tomorrow's weather"}, {"role": "assistant", "content": "Tomorrow's weather will be sunny"}], "rejected_response": "I don't know"} diff --git a/docs/source_en/GetStarted/Quick-start.md b/docs/source_en/GetStarted/Quick-start.md index c410e4484e..38e69b32e9 100644 --- a/docs/source_en/GetStarted/Quick-start.md +++ b/docs/source_en/GetStarted/Quick-start.md @@ -8,7 +8,7 @@ ms-swift is a comprehensive training and deployment framework for large language - 🍊 Lightweight Training: Supports lightweight fine-tuning methods like LoRA, QLoRA, DoRA, LoRA+, ReFT, RS-LoRA, LLaMAPro, Adapter, GaLore, Q-Galore, LISA, UnSloth, Liger-Kernel, and more. - Distributed Training: Supports distributed data parallel (DDP), simple model parallelism via device_map, DeepSpeed ZeRO2 ZeRO3, FSDP, and other distributed training technologies. - Quantization Training: Provides training for quantized models like BNB, AWQ, GPTQ, AQLM, HQQ, EETQ. -- RLHF Training: Supports human alignment training methods like DPO, CPO, SimPO, ORPO, KTO, RM for both text-based and multimodal large models. +- RLHF Training: Supports human alignment training methods like DPO, CPO, SimPO, ORPO, KTO, RM, PPO for both text-based and multimodal large models. - 🍓 Multimodal Training: Capable of training models for different modalities such as images, videos, and audios; supports tasks like VQA (Visual Question Answering), Captioning, OCR (Optical Character Recognition), and Grounding. - Interface-driven Training: Offers training, inference, evaluation, and quantization capabilities through an interface, enabling a complete workflow for large models. - Plugins and Extensions: Allows customization and extension of models and datasets, and supports customizations for components like loss, metric, trainer, loss-scale, callback, optimizer, etc. diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md index 28be6ae393..d163b10a61 100644 --- a/docs/source_en/Instruction/Command-line-parameters.md +++ b/docs/source_en/Instruction/Command-line-parameters.md @@ -308,7 +308,7 @@ Training arguments include the [base arguments](#base-arguments), [Seq2SeqTraine RLHF arguments inherit from the [training arguments](#training-arguments). -- 🔥rlhf_type: Alignment algorithm type, supports `dpo`, `orpo`, `simpo`, `kto`, `cpo`. +- 🔥rlhf_type: Alignment algorithm type, supports `dpo`, `orpo`, `simpo`, `kto`, `cpo`, `rm`, `ppo`. - ref_model: Original comparison model in algorithms like DPO. - ref_model_type: Same as model_type. - ref_model_revision: Same as model_revision. diff --git a/docs/source_en/Instruction/ReleaseNote3.0.md b/docs/source_en/Instruction/ReleaseNote3.0.md index c6c1c9cec8..f46728886c 100644 --- a/docs/source_en/Instruction/ReleaseNote3.0.md +++ b/docs/source_en/Instruction/ReleaseNote3.0.md @@ -94,7 +94,6 @@ The parameters marked as compatible in version 2.0 have been entirely removed. ## Pending Tasks -1. RM/PPO capabilities are not supported in version 3.0. Please use version 2.6.1. -2. Custom dataset evaluation is not supported in version 3.0. Please use version 2.6.1. -3. Megatron pre-training capabilities are not supported in version 3.0. Please use version 2.6.1. -4. Documentation and README, especially the English portions, are temporarily incomplete and will be updated. +1. Custom dataset evaluation is not supported in version 3.0. Please use version 2.6.1. +2. Megatron pre-training capabilities are not supported in version 3.0. Please use version 2.6.1. +3. Documentation and README are temporarily incomplete and will be updated. diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py index 68dbd9f7d6..45924c3531 100644 --- a/swift/llm/argument/rlhf_args.py +++ b/swift/llm/argument/rlhf_args.py @@ -25,7 +25,7 @@ class RLHFArguments(TrainArguments): desirable_weight (float): Weight for desirable outcomes in KTO. Default is 1.0. undesirable_weight (float): Weight for undesirable outcomes in KTO. Default is 1.0. """ - rlhf_type: Literal['dpo', 'orpo', 'simpo', 'kto', 'cpo', 'rm'] = 'dpo' + rlhf_type: Literal['dpo', 'orpo', 'simpo', 'kto', 'cpo', 'rm', 'ppo'] = 'dpo' ref_model: Optional[str] = None ref_model_type: Optional[str] = field( default=None, metadata={'help': f'model_type choices: {list(MODEL_MAPPING.keys())}'}) @@ -42,6 +42,19 @@ class RLHFArguments(TrainArguments): # KTO desirable_weight: float = 1.0 undesirable_weight: float = 1.0 + # PPO + reward_model: Optional[str] = None + reward_model_type: Optional[str] = field( + default=None, metadata={'help': f'model_type choices: {list(MODEL_MAPPING.keys())}'}) + reward_model_revision: Optional[str] = None + local_rollout_forward_batch_size: int = 64 + kl_coef: float = 0.05 + cliprange: float = 0.2 + cliprange_value: float = 0.2 + vf_coef: float = 0.1 + gamma: float = 1.0 + lam: float = 0.95 + num_sample_generations: int = 10 def __post_init__(self): self._init_simpo() From 29cd2d05dccae3869da5d5165866108344486c4b Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 27 Dec 2024 10:56:18 +0800 Subject: [PATCH 03/47] update --- requirements/framework.txt | 2 +- swift/llm/argument/rlhf_args.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/requirements/framework.txt b/requirements/framework.txt index ee068f99a5..aa45abbf71 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -30,5 +30,5 @@ tiktoken tqdm transformers>=4.33,<4.49 transformers_stream_generator -trl>=0.11,<0.12 +trl>=0.13,<0.14 uvicorn diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py index 45924c3531..edd6170843 100644 --- a/swift/llm/argument/rlhf_args.py +++ b/swift/llm/argument/rlhf_args.py @@ -50,14 +50,15 @@ class RLHFArguments(TrainArguments): local_rollout_forward_batch_size: int = 64 kl_coef: float = 0.05 cliprange: float = 0.2 - cliprange_value: float = 0.2 vf_coef: float = 0.1 + cliprange_value: float = 0.2 gamma: float = 1.0 lam: float = 0.95 num_sample_generations: int = 10 def __post_init__(self): self._init_simpo() + self._init_ppo() self._set_default() super().__post_init__() @@ -68,6 +69,10 @@ def __post_init__(self): elif self.ref_model is not None: raise ValueError('CPO/ORPO or LoRA training does not require a ref_model to be passed in.') + def _init_ppo(self): + pass + + def _init_simpo(self): if self.rlhf_type != 'simpo': return From 0492943fe04c49cddca97b68b5330ad5d6e9abfa Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 27 Dec 2024 14:52:40 +0800 Subject: [PATCH 04/47] update --- .../notebook/qwen2.5-self-cognition/self-cognition-sft.ipynb | 2 +- swift/llm/argument/base_args/base_args.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/notebook/qwen2.5-self-cognition/self-cognition-sft.ipynb b/examples/notebook/qwen2.5-self-cognition/self-cognition-sft.ipynb index 1ecd96cad3..fee5144b2f 100644 --- a/examples/notebook/qwen2.5-self-cognition/self-cognition-sft.ipynb +++ b/examples/notebook/qwen2.5-self-cognition/self-cognition-sft.ipynb @@ -10,7 +10,7 @@ "\n", "Are you ready? Let's begin the journey...\n", "\n", - "中文版:https://modelscope.cn/notebook/share/ipynb/4340fdeb/self-cognition-sft.ipynb" + "中文版:https://modelscope.cn/notebook/share/ipynb/313f6116/self-cognition-sft.ipynb" ] }, { diff --git a/swift/llm/argument/base_args/base_args.py b/swift/llm/argument/base_args/base_args.py index 5a2187dbe9..7e82845303 100644 --- a/swift/llm/argument/base_args/base_args.py +++ b/swift/llm/argument/base_args/base_args.py @@ -109,7 +109,8 @@ def _init_custom_register(self) -> None: folder, fname = os.path.split(path) sys.path.append(folder) __import__(fname.rstrip('.py')) - logger.info(f'Successfully registered `{self.custom_register_path}`') + if self.custom_register_path: + logger.info(f'Successfully registered `{self.custom_register_path}`') def _init_adapters(self): if isinstance(self.adapters, str): From 0214e497dd62ba231a1bd1c8201a3c3fe6e8d05e Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 27 Dec 2024 15:30:32 +0800 Subject: [PATCH 05/47] fix --- requirements/framework.txt | 2 +- swift/llm/argument/rlhf_args.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements/framework.txt b/requirements/framework.txt index aa45abbf71..ee068f99a5 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -30,5 +30,5 @@ tiktoken tqdm transformers>=4.33,<4.49 transformers_stream_generator -trl>=0.13,<0.14 +trl>=0.11,<0.12 uvicorn diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py index edd6170843..fdc1862954 100644 --- a/swift/llm/argument/rlhf_args.py +++ b/swift/llm/argument/rlhf_args.py @@ -72,7 +72,6 @@ def __post_init__(self): def _init_ppo(self): pass - def _init_simpo(self): if self.rlhf_type != 'simpo': return From df183a24ea9f4d24ab75a948b02808f597a87ef2 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 27 Dec 2024 16:37:10 +0800 Subject: [PATCH 06/47] update --- swift/llm/argument/rlhf_args.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py index fdc1862954..48fba539f4 100644 --- a/swift/llm/argument/rlhf_args.py +++ b/swift/llm/argument/rlhf_args.py @@ -70,7 +70,9 @@ def __post_init__(self): raise ValueError('CPO/ORPO or LoRA training does not require a ref_model to be passed in.') def _init_ppo(self): - pass + self.response_length = self.max_new_tokens + self.num_ppo_epochs = self.num_train_epochs + # TODO: streaming, MLLM def _init_simpo(self): if self.rlhf_type != 'simpo': From 0a54bb8fa20941c2b6cc036fd79011c86b277ee1 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Sat, 28 Dec 2024 10:14:17 +0800 Subject: [PATCH 07/47] update --- swift/llm/model/__init__.py | 2 +- swift/llm/model/constant.py | 4 +- swift/llm/model/model/__init__.py | 2 +- swift/llm/model/model/reward_model.py | 33 ++++++++++++++++ swift/llm/model/register.py | 56 --------------------------- swift/llm/train/rlhf.py | 11 ------ 6 files changed, 38 insertions(+), 70 deletions(-) create mode 100644 swift/llm/model/model/reward_model.py diff --git a/swift/llm/model/__init__.py b/swift/llm/model/__init__.py index 754d715207..939db750a3 100644 --- a/swift/llm/model/__init__.py +++ b/swift/llm/model/__init__.py @@ -4,6 +4,6 @@ from .model_arch import MODEL_ARCH_MAPPING, ModelArch, ModelKeys, MultiModelKeys, get_model_arch, register_model_arch from .register import (MODEL_MAPPING, Model, ModelGroup, ModelMeta, fix_do_sample_warning, get_default_device_map, get_default_torch_dtype, get_model_info_meta, get_model_name, get_model_tokenizer, - get_model_tokenizer_multimodal, get_model_tokenizer_with_flash_attn, get_model_with_value_head, + get_model_tokenizer_multimodal, get_model_tokenizer_with_flash_attn, load_by_unsloth, register_model) from .utils import HfConfigFactory, ModelInfo, git_clone_github, safe_snapshot_download diff --git a/swift/llm/model/constant.py b/swift/llm/model/constant.py index a87f901c76..82a89bd036 100644 --- a/swift/llm/model/constant.py +++ b/swift/llm/model/constant.py @@ -93,9 +93,11 @@ class LLMModelType: mamba = 'mamba' polylm = 'polylm' aya = 'aya' - + # bert modern_bert = 'modern_bert' bert = 'bert' + # reward model + reward_model = 'reward_model' class MLLMModelType: diff --git a/swift/llm/model/model/__init__.py b/swift/llm/model/model/__init__.py index a972ec64ef..82ebf432f9 100644 --- a/swift/llm/model/model/__init__.py +++ b/swift/llm/model/model/__init__.py @@ -1,2 +1,2 @@ from . import (baai, baichuan, bert, codefuse, deepseek, gemma, glm, internlm, llama, llava, llm, mamba, microsoft, - minicpm, mistral, mllm, mplug, openbuddy, qwen, telechat, yi) + minicpm, mistral, mllm, mplug, openbuddy, qwen, telechat, yi, reward_model) diff --git a/swift/llm/model/model/reward_model.py b/swift/llm/model/model/reward_model.py new file mode 100644 index 0000000000..63b0bb0c90 --- /dev/null +++ b/swift/llm/model/model/reward_model.py @@ -0,0 +1,33 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from transformers import AutoConfig +from transformers import AutoModel +from swift.utils import get_logger +from ..constant import LLMModelType +from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_from_local, register_model + +logger = get_logger() + + +def get_model_tokenizer_reward_model(model_dir, *args, **kwargs): + model_config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) + if 'AutoModel' in (getattr(model_config, 'auto_map', None) or {}): + kwargs['automodel_class'] = AutoModel + return get_model_tokenizer_from_local(model_dir, *args, **kwargs) + + +register_model( + ModelMeta( + LLMModelType.reward_model, [ + ModelGroup([ + Model('Qwen/Qwen2.5-Math-RM-72B', 'Qwen/Qwen2.5-Math-RM-72B'), + Model('Qwen/Qwen2-Math-RM-72B', 'Qwen/Qwen2-Math-RM-72B'), + ]), + ModelGroup([ + Model('Shanghai_AI_Laboratory/internlm2-1_8b-reward', 'internlm/internlm2-1_8b-reward'), + Model('Shanghai_AI_Laboratory/internlm2-7b-reward', 'internlm/internlm2-7b-reward'), + Model('Shanghai_AI_Laboratory/internlm2-20b-reward', 'internlm/internlm2-20b-reward'), + ]), + ], + None, + get_model_tokenizer_reward_model, + tags=['reward_model'])) diff --git a/swift/llm/model/register.py b/swift/llm/model/register.py index a98406eb80..8cf83a5547 100644 --- a/swift/llm/model/register.py +++ b/swift/llm/model/register.py @@ -196,62 +196,6 @@ def get_model_tokenizer_from_local(model_dir: str, return model, tokenizer -def get_model_with_value_head(model) -> 'AutoModelForCausalLMWithValueHead': - from trl import AutoModelForCausalLMWithValueHead - lm_head_namings = ['lm_head', 'embed_out'] - if not any(hasattr(model, attribute) for attribute in lm_head_namings): - setattr(model, 'lm_head', None) # avoid ValueError - - model = AutoModelForCausalLMWithValueHead.from_pretrained(model) - - def patch_valuehead_model(model): - attr_list = [ - 'get_input_embeddings', 'vis_processor', 'extract_feature', 'get_rope_index', 'model', 'vision_tower', - 'img2emb', '_encode_image', '_merge_input_ids_with_image_features', 'prepare_inputs_embeds', - 'build_conversation_input_ids', 'config', 'get_slice_image_placeholder', 'transform', 'get_vllm_embedding', - 'forward_image', 'dtype', 'base_model_prefix', 'device', 'visual' - ] - for attr in attr_list: - if hasattr(model.pretrained_model, attr) and not hasattr(model, attr): - setattr(model, attr, getattr(model.pretrained_model, attr)) - - # PPO compatible - if not hasattr(model, 'score'): - setattr(model, 'score', model.v_head) - if model.base_model_prefix == '' and hasattr(model.pretrained_model, 'language_model'): - model.base_model_prefix = model.pretrained_model.language_model.base_model_prefix - - base_model_prefix = model.pretrained_model.base_model_prefix - if hasattr(model.pretrained_model, base_model_prefix): - setattr(model, base_model_prefix, getattr(model.pretrained_model, base_model_prefix)) - - patch_valuehead_model(model) - - # try to load local vhead weights - vhead_params = None - try: - from safetensors import safe_open - vhead_file = os.path.join(model.pretrained_model.model_dir, 'value_head.safetensors') - with safe_open(vhead_file, framework='pt', device='cpu') as f: - vhead_params = {key: f.get_tensor(key) for key in f.keys()} - except Exception: - pass - - try: - vhead_file = os.path.join(model.pretrained_model.model_dir, 'value_head.bin') - vhead_params = torch.load(vhead_file, map_location='cpu') - except Exception: - pass - - if vhead_params is not None: - model.load_state_dict(vhead_params, strict=False) - logger.info(f'Loading value head weights from {vhead_file}') - else: - logger.info('The local value head weight file was not detected.' - 'Ignore it if this is during the reward modeling phase,') - return model - - def get_model_tokenizer_with_flash_attn(model_dir: str, model_info: ModelInfo, model_kwargs: Dict[str, Any], diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py index 906a3e1166..3ec858b1b3 100644 --- a/swift/llm/train/rlhf.py +++ b/swift/llm/train/rlhf.py @@ -32,17 +32,6 @@ def _prepare_template(self) -> None: # Avoid padding labels during the model's forward pass in multimodal models. self.template.loss_scale = 'last_round' - @classmethod - def prepare_model(cls, args, model, *_args, **kwargs): - model = super().prepare_model(args, model, *_args, **kwargs) - if args.rlhf_type == 'rm': - from trl import AutoModelForCausalLMWithValueHead - lm_head_namings = ['lm_head', 'embed_out'] - if not any(hasattr(model, attribute) for attribute in lm_head_namings): - model.lm_head = None # avoid error - model = AutoModelForCausalLMWithValueHead.from_pretrained(model) - patch_getattr(AutoModelForCausalLMWithValueHead, 'pretrained_model') - return model def _get_dataset(self): args = self.args From 8e90c4ea03d8d3c32611d65f285a67fc74eefebb Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 30 Dec 2024 10:52:14 +0800 Subject: [PATCH 08/47] update --- swift/llm/__init__.py | 4 ++-- swift/ui/base.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/swift/llm/__init__.py b/swift/llm/__init__.py index d098e849a8..8397076413 100644 --- a/swift/llm/__init__.py +++ b/swift/llm/__init__.py @@ -20,7 +20,7 @@ HfConfigFactory, ModelInfo, ModelMeta, ModelKeys, register_model_arch, MultiModelKeys, ModelArch, get_model_arch, MODEL_ARCH_MAPPING, get_model_info_meta, get_model_name, ModelGroup, Model, get_model_tokenizer_with_flash_attn, get_model_tokenizer_multimodal, load_by_unsloth, - git_clone_github) + git_clone_github, get_matched_model_meta) from .dataset import (AlpacaPreprocessor, ResponsePreprocessor, MessagesPreprocessor, AutoPreprocessor, DATASET_MAPPING, MediaResource, register_dataset, register_dataset_info, EncodePreprocessor, LazyLLMDataset, ConstantLengthDataset, standard_keys, load_dataset, DATASET_TYPE, @@ -54,7 +54,7 @@ 'ModelInfo', 'ModelMeta', 'ModelKeys', 'register_model_arch', 'MultiModelKeys', 'ModelArch', 'MODEL_ARCH_MAPPING', 'get_model_arch', 'get_model_info_meta', 'get_model_name', 'register_model', 'ModelGroup', 'Model', 'get_model_tokenizer_with_flash_attn', 'get_model_tokenizer_multimodal', - 'load_by_unsloth', 'git_clone_github' + 'load_by_unsloth', 'git_clone_github', 'get_matched_model_meta' ], 'dataset': [ 'AlpacaPreprocessor', 'ClsPreprocessor', 'ComposePreprocessor', 'MessagesPreprocessor', 'DATASET_MAPPING', diff --git a/swift/ui/base.py b/swift/ui/base.py index 508b61f4b4..6e1c847137 100644 --- a/swift/ui/base.py +++ b/swift/ui/base.py @@ -15,8 +15,7 @@ from gradio import Accordion, Audio, Button, Checkbox, Dropdown, File, Image, Slider, Tab, TabItem, Textbox, Video from modelscope.hub.utils.utils import get_cache_dir -from swift.llm import TEMPLATE_MAPPING, BaseArguments -from swift.llm.model.register import get_matched_model_meta +from swift.llm import TEMPLATE_MAPPING, BaseArguments, get_matched_model_meta all_langs = ['zh', 'en'] builder: Type['BaseUI'] = None From 1bdec8ab77f9bbd0cc03ba91fd3aad37d77056a6 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 30 Dec 2024 14:43:55 +0800 Subject: [PATCH 09/47] update --- .gitignore | 1 + docs/source/Instruction/ReleaseNote3.0.md | 2 +- ...44\350\241\214\345\217\202\346\225\260.md" | 2 +- .../Instruction/Command-line-parameters.md | 2 +- docs/source_en/Instruction/ReleaseNote3.0.md | 2 +- swift/llm/argument/base_args/base_args.py | 10 +++---- swift/llm/argument/base_args/data_args.py | 6 ++--- swift/llm/argument/train_args.py | 2 +- swift/llm/dataset/loader.py | 26 +++++++++---------- swift/llm/dataset/preprocessor/core.py | 15 ++++++----- swift/llm/infer/infer.py | 6 ++--- swift/llm/model/__init__.py | 2 +- swift/llm/model/model/glm.py | 1 + swift/llm/model/register.py | 2 +- swift/llm/template/template/glm.py | 1 + swift/llm/train/sft.py | 6 ++--- swift/utils/torchacc_utils.py | 4 +-- 17 files changed, 46 insertions(+), 44 deletions(-) diff --git a/.gitignore b/.gitignore index ab9f4f2efa..7d7f5ab085 100644 --- a/.gitignore +++ b/.gitignore @@ -139,6 +139,7 @@ my_model/ /data result/ images +/custom # Pytorch *.pth diff --git a/docs/source/Instruction/ReleaseNote3.0.md b/docs/source/Instruction/ReleaseNote3.0.md index 2730e45c34..4afd54d6bf 100644 --- a/docs/source/Instruction/ReleaseNote3.0.md +++ b/docs/source/Instruction/ReleaseNote3.0.md @@ -6,7 +6,7 @@ 1. 数据集模块重构。数据集加载速度提升2-20倍,encode速度提升2-4倍,支持streaming模式 - 移除了dataset_name机制,采用dataset_id、dataset_dir、dataset_path方式指定数据集 - - 使用`--dataset_num_proc`支持多进程加速处理、使用`--load_from_cache_file true`支持使用数据前处理缓存 + - 使用`--dataset_num_proc`支持多进程加速处理、使用`--enable_cache true`支持使用数据前处理缓存 - 使用`--streaming`支持流式加载hub端和本地数据集 - 支持`--packing`命令以获得更稳定的训练效率 - 指定`--dataset `支持本地加载开源数据集 diff --git "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" index 91c2b6d567..4470c545cb 100644 --- "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" +++ "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" @@ -34,7 +34,7 @@ - data_seed: 数据集随机种子,默认为42 - 🔥dataset_num_proc: 数据集预处理的进程数,默认为1 - 🔥streaming: 流式读取并处理数据集,默认False -- load_from_cache_file: 数据集预处理使用cache,默认False +- enable_cache: 数据集预处理使用cache,默认False - 注意: 如果改为True,在数据集有更改时可能无法生效,如果修改本参数发现训练不正常请考虑设置为False - download_mode: 数据集下载模式,包含`reuse_dataset_if_exists`和`force_redownload`,默认为reuse_dataset_if_exists - strict: 如果为True,则数据集只要某行有问题直接抛错,否则会丢弃出错行。默认False diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md index 7975cb7eed..af33a1d6c1 100644 --- a/docs/source_en/Instruction/Command-line-parameters.md +++ b/docs/source_en/Instruction/Command-line-parameters.md @@ -34,7 +34,7 @@ The introduction to command line parameters will cover base arguments, atomic ar - data_seed: Random seed for the dataset, default is 42. - 🔥dataset_num_proc: Number of processes for dataset preprocessing, default is 1. - 🔥streaming: Stream read and process the dataset, default is False. -- load_from_cache_file: Use cache for dataset preprocessing, default is False. +- enable_cache: Use cache for dataset preprocessing, default is False. - Note: If set to True, it may not take effect if the dataset changes. If modifying this parameter leads to issues during training, consider setting it to False. - download_mode: Dataset download mode, including `reuse_dataset_if_exists` and `force_redownload`, default is reuse_dataset_if_exists. - strict: If True, the dataset will throw an error if any row has a problem; otherwise, it will discard the erroneous row. Default is False. diff --git a/docs/source_en/Instruction/ReleaseNote3.0.md b/docs/source_en/Instruction/ReleaseNote3.0.md index 0211a97eb3..037137af34 100644 --- a/docs/source_en/Instruction/ReleaseNote3.0.md +++ b/docs/source_en/Instruction/ReleaseNote3.0.md @@ -6,7 +6,7 @@ 1. Dataset module refactoring. The dataset loading speed has improved by 2-20 times, and encoding speed has improved by 2-4 times, with support for streaming mode. - Removed the dataset_name mechanism; now use dataset_id, dataset_dir, or dataset_path to specify the dataset. - - Use `--dataset_num_proc` to support multi-process acceleration and `--load_from_cache_file true` to support cache processing before using the data. + - Use `--dataset_num_proc` to support multi-process acceleration and `--enable_cache true` to support cache processing before using the data. - Use `--streaming` to support streaming loading of hub and local datasets. - Support `--packing` command for more stable training efficiency. - Use `--dataset ` to support local loading of open-source datasets. diff --git a/swift/llm/argument/base_args/base_args.py b/swift/llm/argument/base_args/base_args.py index a2f88deaa9..00e5cb908d 100644 --- a/swift/llm/argument/base_args/base_args.py +++ b/swift/llm/argument/base_args/base_args.py @@ -130,14 +130,10 @@ def __post_init__(self): self._init_ckpt_dir() self._init_custom_register() self._init_model_kwargs() - self.rank, self.local_rank, world_size, self.local_world_size = get_dist_setting() - # The Seq2SeqTrainingArguments has a property called world_size, which cannot be assigned a value. - try: - self.world_size = world_size - except AttributeError: - pass + self.rank, self.local_rank, self.global_world_size, self.local_world_size = get_dist_setting() + # The Seq2SeqTrainingArguments has a property called world_size logger.info(f'rank: {self.rank}, local_rank: {self.local_rank}, ' - f'world_size: {world_size}, local_world_size: {self.local_world_size}') + f'world_size: {self.global_world_size}, local_world_size: {self.local_world_size}') assert len(self.adapters) <= 1, f'args.adapters: {self.adapters}' ModelArguments.__post_init__(self) QuantizeArguments.__post_init__(self) diff --git a/swift/llm/argument/base_args/data_args.py b/swift/llm/argument/base_args/data_args.py index 3afb3683d5..0ab70a8f33 100644 --- a/swift/llm/argument/base_args/data_args.py +++ b/swift/llm/argument/base_args/data_args.py @@ -20,7 +20,7 @@ class DataArguments: data_seed (Optional[int]): Seed for dataset shuffling. Default is None. dataset_num_proc (int): Number of processes to use for data loading and preprocessing. Default is 1. streaming (bool): Flag to enable streaming of datasets. Default is False. - load_from_cache_file (bool): Flag to load dataset from cache file. Default is False. + enable_cache (bool): Flag to load dataset from cache file. Default is False. download_mode (Literal): Mode for downloading datasets. Default is 'reuse_dataset_if_exists'. model_name (List[str]): List containing Chinese and English names of the model. Default is [None, None]. model_author (List[str]): List containing Chinese and English names of the model author. @@ -38,7 +38,7 @@ class DataArguments: dataset_num_proc: int = 1 streaming: bool = False - load_from_cache_file: bool = False + enable_cache: bool = False download_mode: Literal['force_redownload', 'reuse_dataset_if_exists'] = 'reuse_dataset_if_exists' strict: bool = False # Chinese name and English name @@ -74,7 +74,7 @@ def get_dataset_kwargs(self): 'streaming': self.streaming, 'use_hf': self.use_hf, 'hub_token': self.hub_token, - 'load_from_cache_file': self.load_from_cache_file, + 'enable_cache': self.enable_cache, 'download_mode': self.download_mode, 'strict': self.strict, 'model_name': self.model_name, diff --git a/swift/llm/argument/train_args.py b/swift/llm/argument/train_args.py index f99fbe0621..5d3d52b4a7 100644 --- a/swift/llm/argument/train_args.py +++ b/swift/llm/argument/train_args.py @@ -57,7 +57,7 @@ def _init_eval_strategy(self): def __post_init__(self): self._init_output_dir() if self.average_tokens_across_devices is None: - self.average_tokens_across_devices = self.world_size > 1 + self.average_tokens_across_devices = self.global_world_size > 1 if self.metric_for_best_model is None: self.metric_for_best_model = 'rouge-l' if self.predict_with_generate else 'loss' if self.greater_is_better is None: diff --git a/swift/llm/dataset/loader.py b/swift/llm/dataset/loader.py index 7aaa9f9e0b..6e0de90348 100644 --- a/swift/llm/dataset/loader.py +++ b/swift/llm/dataset/loader.py @@ -165,7 +165,7 @@ def _load_dataset_path(dataset_meta: DatasetMeta, *, num_proc: int = 1, strict: bool = False, - load_from_cache_file: bool = False, + enable_cache: bool = False, streaming: bool = False) -> HfDataset: dataset_path = dataset_meta.dataset_path @@ -177,7 +177,7 @@ def _load_dataset_path(dataset_meta: DatasetMeta, dataset = hf_load_dataset(file_type, data_files=dataset_path, **kwargs) dataset = dataset_meta.preprocess_func( - dataset, num_proc=num_proc, strict=strict, load_from_cache_file=load_from_cache_file) + dataset, num_proc=num_proc, strict=strict, enable_cache=enable_cache) dataset = DatasetLoader._remove_useless_columns(dataset) return dataset @@ -191,7 +191,7 @@ def _load_repo_dataset( use_hf: Optional[bool] = None, hub_token: Optional[str] = None, strict: bool = False, - load_from_cache_file: bool = False, + enable_cache: bool = False, revision: Optional[str] = None, download_mode: Literal['force_redownload', 'reuse_dataset_if_exists'] = 'reuse_dataset_if_exists', ) -> HfDataset: @@ -245,7 +245,7 @@ def _load_repo_dataset( if streaming and isinstance(dataset, HfDataset): dataset = dataset.to_iterable_dataset() dataset = subset.preprocess_func( - dataset, num_proc=num_proc, strict=strict, load_from_cache_file=load_from_cache_file) + dataset, num_proc=num_proc, strict=strict, enable_cache=enable_cache) dataset = DatasetLoader._remove_useless_columns(dataset) datasets.append(dataset) return DatasetLoader._concat_datasets(datasets, streaming) @@ -278,7 +278,7 @@ def post_process( split_dataset_ratio: float = 0., streaming: bool = False, random_state: Optional[np.random.RandomState] = None, - load_from_cache_file: bool = False, + enable_cache: bool = False, ) -> Tuple[DATASET_TYPE, Optional[DATASET_TYPE]]: """Split into train/val datasets and perform dataset sampling.""" assert dataset_sample is None or dataset_sample > 0 @@ -319,7 +319,7 @@ def post_process( assert train_sample > 0 train_dataset, val_dataset = train_dataset.train_test_split( test_size=val_sample, seed=get_seed(random_state), - load_from_cache_file=load_from_cache_file).values() + enable_cache=enable_cache).values() train_dataset = sample_dataset(train_dataset, train_sample, random_state) return train_dataset, val_dataset @@ -342,7 +342,7 @@ def load( use_hf: Optional[bool] = None, hub_token: Optional[str] = None, strict: bool = False, - load_from_cache_file: bool = False, + enable_cache: bool = False, download_mode: Literal['force_redownload', 'reuse_dataset_if_exists'] = 'reuse_dataset_if_exists', ) -> HfDataset: @@ -351,7 +351,7 @@ def load( dataset_meta=dataset_meta, num_proc=num_proc, strict=strict, - load_from_cache_file=load_from_cache_file, + enable_cache=enable_cache, streaming=streaming, ) else: @@ -373,7 +373,7 @@ def load( hub_token=hub_token, num_proc=num_proc, strict=strict, - load_from_cache_file=load_from_cache_file, + enable_cache=enable_cache, revision=revision, streaming=streaming, download_mode=download_mode) @@ -407,7 +407,7 @@ def load_dataset( use_hf: Optional[bool] = None, hub_token: Optional[str] = None, strict: bool = False, - load_from_cache_file: bool = False, + enable_cache: bool = False, download_mode: Literal['force_redownload', 'reuse_dataset_if_exists'] = 'reuse_dataset_if_exists', # self-cognition model_name: Union[Tuple[str, str], List[str], None] = None, # zh, en @@ -417,7 +417,7 @@ def load_dataset( Args: download_mode: Download mode, default is `reuse_dataset_if_exists`. - load_from_cache_file: Use cache file or not, Default False. + enable_cache: Use cache file or not, Default False. strict: Raise if any row is not correct. hub_token: The token of the hub. use_hf: Use hf dataset or ms dataset. @@ -444,7 +444,7 @@ def load_dataset( 'num_proc': num_proc, 'use_hf': use_hf, 'strict': strict, - 'load_from_cache_file': load_from_cache_file, + 'enable_cache': enable_cache, 'download_mode': download_mode, 'streaming': streaming, 'hub_token': hub_token @@ -461,7 +461,7 @@ def load_dataset( split_dataset_ratio=split_dataset_ratio, random_state=seed, streaming=streaming, - load_from_cache_file=load_from_cache_file) + enable_cache=enable_cache) if train_dataset is not None: train_datasets.append(train_dataset) if val_dataset is not None: diff --git a/swift/llm/dataset/preprocessor/core.py b/swift/llm/dataset/preprocessor/core.py index c3d692f4c3..9b54df91ee 100644 --- a/swift/llm/dataset/preprocessor/core.py +++ b/swift/llm/dataset/preprocessor/core.py @@ -6,9 +6,8 @@ import numpy as np from datasets import Dataset as HfDataset -from datasets import Image +from datasets import Image, Value, enable_caching, disable_caching from datasets import IterableDataset as HfIterableDataset -from datasets import Value from swift.llm import history_to_messages from swift.utils import get_logger @@ -246,19 +245,23 @@ def __call__( *, num_proc: int = 1, strict: bool = False, - load_from_cache_file: bool = False, + enable_cache: bool = False, batch_size: int = 1000, ) -> DATASET_TYPE: from ..utils import sample_dataset if self.dataset_sample is not None: dataset = sample_dataset(dataset, self.dataset_sample, self.random_state) + if enable_cache: + enable_caching() + else: + disable_caching() dataset = self._rename_columns(dataset) dataset = self.prepare_dataset(dataset) dataset = self._cast_pil_image(dataset) map_kwargs = {} if isinstance(dataset, HfDataset): - map_kwargs.update({'num_proc': num_proc, 'load_from_cache_file': load_from_cache_file}) + map_kwargs.update({'num_proc': num_proc}) with self._patch_arrow_writer(): try: dataset_mapped = dataset.map( @@ -462,9 +465,9 @@ def __call__( *, num_proc: int = 1, strict: bool = False, - load_from_cache_file: bool = False, + enable_cache: bool = False, ) -> DATASET_TYPE: dataset = get_features_dataset(dataset) dataset = dataset.rename_columns(self.columns_mapping) preprocessor = self._get_preprocessor(dataset) - return preprocessor(dataset, num_proc=num_proc, load_from_cache_file=load_from_cache_file, strict=strict) + return preprocessor(dataset, num_proc=num_proc, enable_cache=enable_cache, strict=strict) diff --git a/swift/llm/infer/infer.py b/swift/llm/infer/infer.py index 1802ec402d..9741c042bf 100644 --- a/swift/llm/infer/infer.py +++ b/swift/llm/infer/infer.py @@ -182,9 +182,9 @@ def infer_dataset(self) -> List[Dict[str, Any]]: if self.jsonl_writer: self.jsonl_writer.append(data) else: - is_dist = args.world_size > 1 and dist.is_initialized() + is_dist = args.global_world_size > 1 and dist.is_initialized() if is_dist: - val_dataset = val_dataset.shard(args.world_size, args.rank, contiguous=True) + val_dataset = val_dataset.shard(args.global_world_size, args.rank, contiguous=True) val_dataset = list(val_dataset) labels_list = [InferRequest.remove_response(data['messages']) for data in val_dataset] @@ -197,7 +197,7 @@ def infer_dataset(self) -> List[Dict[str, Any]]: data = {'response': response, 'logprobs': resp.choices[0].logprobs, **data} result_list.append(data) if is_dist: - total_result_list = [None for _ in range(args.world_size)] if args.rank == 0 else None + total_result_list = [None for _ in range(args.global_world_size)] if args.rank == 0 else None dist.gather_object(result_list, total_result_list) result_list = total_result_list and list(chain.from_iterable(total_result_list)) diff --git a/swift/llm/model/__init__.py b/swift/llm/model/__init__.py index 58ba7500c7..5e6a49cbbc 100644 --- a/swift/llm/model/__init__.py +++ b/swift/llm/model/__init__.py @@ -5,5 +5,5 @@ from .register import (MODEL_MAPPING, Model, ModelGroup, ModelMeta, fix_do_sample_warning, get_default_device_map, get_default_torch_dtype, get_matched_model_meta, get_model_info_meta, get_model_name, get_model_tokenizer, get_model_tokenizer_multimodal, get_model_tokenizer_with_flash_attn, - get_model_with_value_head, load_by_unsloth, register_model) + load_by_unsloth, register_model) from .utils import HfConfigFactory, ModelInfo, git_clone_github, safe_snapshot_download diff --git a/swift/llm/model/model/glm.py b/swift/llm/model/model/glm.py index bb22dfc793..3b9a699769 100644 --- a/swift/llm/model/model/glm.py +++ b/swift/llm/model/model/glm.py @@ -180,6 +180,7 @@ def get_model_tokenizer_glm4v(model_dir: str, MLLMModelType.glm4v, [ModelGroup([ Model('ZhipuAI/glm-4v-9b', 'THUDM/glm-4v-9b'), + Model('ZhipuAI/cogagent-9b-20241220', 'THUDM/cogagent-9b-20241220'), ])], TemplateType.glm4v, get_model_tokenizer_glm4v, diff --git a/swift/llm/model/register.py b/swift/llm/model/register.py index 8cf83a5547..8e3428cc6a 100644 --- a/swift/llm/model/register.py +++ b/swift/llm/model/register.py @@ -374,7 +374,7 @@ def get_model_info_meta( if model_meta is None and model_type is not None: model_meta = MODEL_MAPPING[model_type] if model_meta is None: - model_meta = ModelMeta('', [], 'dummy', get_model_tokenizer_from_local, model_arch=None) + model_meta = ModelMeta(None, [], 'dummy', get_model_tokenizer_from_local, model_arch=None) logger.info(f'Temporarily create model_meta: {model_meta}') if torch_dtype is None: diff --git a/swift/llm/template/template/glm.py b/swift/llm/template/template/glm.py index 46b73043dc..e9ee3d008a 100644 --- a/swift/llm/template/template/glm.py +++ b/swift/llm/template/template/glm.py @@ -64,6 +64,7 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: encoded['images'] = inputs2['images'] encoded['input_ids'] = input_ids encoded['labels'] = labels + encoded['position_ids'] = list(range(len(input_ids))) return encoded def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: Optional[int] = None) -> Dict[str, Any]: diff --git a/swift/llm/train/sft.py b/swift/llm/train/sft.py index 56ce3d87cb..c8ffefa33d 100644 --- a/swift/llm/train/sft.py +++ b/swift/llm/train/sft.py @@ -225,7 +225,7 @@ def _prepare_callbacks(self): def _stat_dataset(self, dataset: HfDataset): args = self.args dataset = GetLengthPreprocessor()( - dataset, num_proc=args.dataset_num_proc, load_from_cache_file=args.load_from_cache_file) + dataset, num_proc=args.dataset_num_proc, enable_cache=args.enable_cache) _, stat_str = stat_array(dataset['length']) logger.info(f'Dataset Token Length: {stat_str}') return stat_str @@ -247,13 +247,13 @@ def _encode_dataset(self, train_dataset, val_dataset): train_dataset, num_proc=args.dataset_num_proc, strict=args.strict, - load_from_cache_file=args.load_from_cache_file) + enable_cache=args.enable_cache) if val_dataset is not None and not args.predict_with_generate: val_dataset = preprocessor( val_dataset, num_proc=args.dataset_num_proc, strict=args.strict, - load_from_cache_file=args.load_from_cache_file) + enable_cache=args.enable_cache) inputs = train_dataset[0] if hasattr(train_dataset, '__len__') else next(iter(train_dataset)) template.print_inputs(inputs, tokenizer_kwargs=inputs.pop('tokenizer_kwargs', None) or {}) diff --git a/swift/utils/torchacc_utils.py b/swift/utils/torchacc_utils.py index d009ce65da..cd21f4b7bc 100644 --- a/swift/utils/torchacc_utils.py +++ b/swift/utils/torchacc_utils.py @@ -254,9 +254,9 @@ def save_ta_fsdp_checkpoint(self_model, tokenizer, args, output_dir): 'shard_metadata': self_model._get_underlay_model().get_shard_metadata(), } if isinstance(model, PeftModel): - ckpt_path = os.path.join(output_dir, f'rank{args.process_index}-of-{args.world_size}-adapter_model.bin') + ckpt_path = os.path.join(output_dir, f'rank{args.process_index}-of-{args.global_world_size}-adapter_model.bin') else: - ckpt_path = os.path.join(output_dir, f'rank{args.process_index}-of-{args.world_size}-pytorch_model.bin') + ckpt_path = os.path.join(output_dir, f'rank{args.process_index}-of-{args.global_world_size}-pytorch_model.bin') xm.save(ckpt, ckpt_path, master_only=False) # Make sure all ranks have saved checkpoints xm.rendezvous('save_full_checkpoints') From e19bffce3e22e5c617291caaafc643eea7f43af3 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 31 Dec 2024 17:50:58 +0800 Subject: [PATCH 10/47] update --- swift/llm/dataset/preprocessor/core.py | 7 ++----- swift/llm/model/model/__init__.py | 2 +- swift/llm/model/model/reward_model.py | 4 ++-- swift/llm/train/rlhf.py | 1 - 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/swift/llm/dataset/preprocessor/core.py b/swift/llm/dataset/preprocessor/core.py index d4c01416ab..1f42f5f4a1 100644 --- a/swift/llm/dataset/preprocessor/core.py +++ b/swift/llm/dataset/preprocessor/core.py @@ -6,8 +6,9 @@ import numpy as np from datasets import Dataset as HfDataset -from datasets import Image, Value, enable_caching, disable_caching +from datasets import Image from datasets import IterableDataset as HfIterableDataset +from datasets import Value from swift.llm import history_to_messages from swift.utils import get_logger @@ -251,10 +252,6 @@ def __call__( if self.dataset_sample is not None: dataset = sample_dataset(dataset, self.dataset_sample, self.random_state) - if enable_cache: - enable_caching() - else: - disable_caching() dataset = self._rename_columns(dataset) dataset = self.prepare_dataset(dataset) dataset = self._cast_pil_image(dataset) diff --git a/swift/llm/model/model/__init__.py b/swift/llm/model/model/__init__.py index 82ebf432f9..3f190926e4 100644 --- a/swift/llm/model/model/__init__.py +++ b/swift/llm/model/model/__init__.py @@ -1,2 +1,2 @@ from . import (baai, baichuan, bert, codefuse, deepseek, gemma, glm, internlm, llama, llava, llm, mamba, microsoft, - minicpm, mistral, mllm, mplug, openbuddy, qwen, telechat, yi, reward_model) + minicpm, mistral, mllm, mplug, openbuddy, qwen, reward_model, telechat, yi) diff --git a/swift/llm/model/model/reward_model.py b/swift/llm/model/model/reward_model.py index 63b0bb0c90..522b6a9ac3 100644 --- a/swift/llm/model/model/reward_model.py +++ b/swift/llm/model/model/reward_model.py @@ -1,6 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from transformers import AutoConfig -from transformers import AutoModel +from transformers import AutoConfig, AutoModel + from swift.utils import get_logger from ..constant import LLMModelType from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_from_local, register_model diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py index 3ec858b1b3..2e5bff8910 100644 --- a/swift/llm/train/rlhf.py +++ b/swift/llm/train/rlhf.py @@ -32,7 +32,6 @@ def _prepare_template(self) -> None: # Avoid padding labels during the model's forward pass in multimodal models. self.template.loss_scale = 'last_round' - def _get_dataset(self): args = self.args train_dataset, val_dataset = super()._get_dataset() From b7c28aa8ced422688c795f640cd23588dc37a1d9 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Jan 2025 13:33:52 +0800 Subject: [PATCH 11/47] update --- ...\275\344\273\244\350\241\214\345\217\202\346\225\260.md" | 6 +++--- docs/source_en/Instruction/Command-line-parameters.md | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" index 1f8804dfd5..f26a169257 100644 --- "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" +++ "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" @@ -45,7 +45,7 @@ ### 模板参数 - 🔥template: 对话模板类型,默认使用model对应的template类型。`swift pt`会将对话模版转为生成模板使用 - 🔥system: 自定义system字段,默认为None,使用template的默认system -- 🔥max_length: 单样本的tokens最大长度,默认为None,不做限制 +- 🔥max_length: 单样本的tokens最大长度。默认为None,设置为模型支持的tokens最大长度(max_model_len) - truncation_strategy: 如果超长如何处理,支持`delete`, `left`和`right`,代表删除、左侧裁剪和右侧裁剪,默认为'delete' - 🔥max_pixels: 多模态模型图片前处理的最大像素数(H\*W),默认不缩放。 - tools_prompt: 智能体训练时的工具列表转为system的格式,请参考[智能体训练](./智能体的支持.md),默认为'react_en' @@ -96,7 +96,7 @@ - lr_scheduler_type: lr_scheduler类型,默认为cosine - lr_scheduler_kwargs: lr_scheduler其他参数 - 🔥gradient_checkpointing_kwargs: 传入`torch.utils.checkpoint`中的参数. 例如设置为`--gradient_checkpointing_kwargs '{"use_reentrant": false}'` -- report_to: 默认值为`tensorboard` +- report_to: 默认值为`tensorboard`。你也可以指定`--report_to tensorboard wandb` - remove_unused_columns: 默认值False - logging_first_step: 是否记录第一个step的打印,默认值True - logging_steps: 日志打印间隔,默认值5 @@ -139,7 +139,7 @@ #### 全参 - freeze_parameters: 被冻结参数的前缀, 默认为`[]` - freeze_parameters_ratio: 从下往上冻结的参数比例, 默认为0. 可设置为1将所有参数冻结, 结合`trainable_parameters`设置可训练参数. -- trainable_parameters: 可训练参数的前缀, 默认为`[]` +- trainable_parameters: 可训练参数的前缀, 默认为`[]`. `trainable_parameters`的优先级高于`freeze_parameters`和`freeze_parameters_ratio` #### LoRA - 🔥lora_rank: 默认为`8` diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md index f6f572230f..2325e4aeae 100644 --- a/docs/source_en/Instruction/Command-line-parameters.md +++ b/docs/source_en/Instruction/Command-line-parameters.md @@ -45,7 +45,7 @@ The introduction to command line parameters will cover base arguments, atomic ar ### Template Arguments - 🔥template: Type of dialogue template, which defaults to the template type corresponding to the model. `swift pt` will convert the dialogue template into a generation template for use. - 🔥system: Custom system field, default is None, uses the default system of the template. -- 🔥max_length: Maximum length of tokens for a single sample, default is None (no limit). +- 🔥max_length: The maximum length of tokens for a single sample. Defaults to None, set to the maximum length of tokens supported by the model (max_model_len). - truncation_strategy: How to handle overly long tokens, supports `delete`, `left`, `right`, representing deletion, left trimming, and right trimming, default is 'delete'. - 🔥max_pixels: Maximum pixel count for pre-processing images in multimodal models (H*W), default is no scaling. - tools_prompt: The list of tools for agent training converted to system format, refer to [Agent Training](./Agent-support.md), default is 'react_en'. @@ -97,7 +97,7 @@ This parameter list inherits from transformers `Seq2SeqTrainingArguments`, with - lr_scheduler_type: LR scheduler type, default is cosine. - lr_scheduler_kwargs: Other parameters for the LR scheduler. - 🔥gradient_checkpointing_kwargs: Parameters passed to `torch.utils.checkpoint`. For example, set to `--gradient_checkpointing_kwargs '{"use_reentrant": false}'`. -- report_to: Default is `tensorboard`. +- report_to: Default is `tensorboard`. You can also specify `--report_to tensorboard wandb`. - remove_unused_columns: Default is False. - logging_first_step: Whether to log the first step print, default is True. - logging_steps: Interval for logging prints, default is 5. @@ -141,7 +141,7 @@ Other important parameters: - freeze_parameters: Prefix of parameters to be frozen, default is `[]`. - freeze_parameters_ratio: Ratio of parameters to freeze from the bottom up, default is 0. Setting it to 1 will freeze all parameters. Combine with `trainable_parameters` to set trainable parameters. -- trainable_parameters: Prefix of trainable parameters, default is `[]`. +- trainable_parameters: Prefix of trainable parameters, default is `[]`. The priority of `trainable_parameters` is higher than that of `freeze_parameters` and `freeze_parameters_ratio`. #### LoRA From d8b210532f236dfcabfe4ad6a8fc214d5a3d1cce Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Jan 2025 14:04:34 +0800 Subject: [PATCH 12/47] fix --- examples/train/multimodal/caption.sh | 2 +- examples/train/multimodal/infer.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/train/multimodal/caption.sh b/examples/train/multimodal/caption.sh index e75ce7f78a..c9d496fc0f 100644 --- a/examples/train/multimodal/caption.sh +++ b/examples/train/multimodal/caption.sh @@ -5,7 +5,7 @@ CUDA_VISIBLE_DEVICES=0 \ MAX_PIXELS=1003520 \ swift sft \ --model Qwen/Qwen2-VL-7B-Instruct \ - --dataset 'modelscope/coco_2014_caption#20000' \ + --dataset 'modelscope/coco_2014_caption:validation#20000' \ --train_type lora \ --torch_dtype bfloat16 \ --num_train_epochs 1 \ diff --git a/examples/train/multimodal/infer.sh b/examples/train/multimodal/infer.sh index 2e8627319f..699ede32d7 100644 --- a/examples/train/multimodal/infer.sh +++ b/examples/train/multimodal/infer.sh @@ -1,5 +1,6 @@ # Perform inference using the validation set from the training phase. CUDA_VISIBLE_DEVICES=0 \ +MAX_PIXELS=1003520 \ swift infer \ --adapters output/vx-xxx/checkpoint-xxx \ --stream true \ From f726d0adcc35b50cc991c8d8078515d683dc20a5 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Jan 2025 15:16:44 +0800 Subject: [PATCH 13/47] update --- ...44\350\241\214\345\217\202\346\225\260.md" | 2 +- .../Instruction/Command-line-parameters.md | 2 +- swift/llm/infer/infer_engine/pt_engine.py | 20 ++++++++++++----- swift/llm/model/constant.py | 10 ++++----- swift/llm/model/model/__init__.py | 2 +- swift/llm/model/model/bert.py | 6 ++--- swift/llm/model/model/internlm.py | 22 +++++++++++++++++-- swift/llm/model/model/reward_model.py | 15 ++++++++----- swift/llm/model/register.py | 7 +++++- swift/llm/template/constant.py | 6 ++++- swift/llm/template/template/internlm.py | 3 ++- 11 files changed, 68 insertions(+), 27 deletions(-) diff --git "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" index f26a169257..e7fa6af9d5 100644 --- "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" +++ "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" @@ -19,7 +19,7 @@ - 🔥model: 模型id或模型本地路径。如果是自定义模型请配合`model_type`和`template`使用,具体可以参考[自定义模型](../Customization/自定义模型.md) - model_type: 模型类型。相同的模型架构、template、模型加载过程被定义为一个model_type - model_revision: 模型版本 -- task_type: 默认为'causal_lm'. 可选为'causal_lm', 'seq_cls'. 例子可以查看[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/seq_cls). +- task_type: 默认为'causal_lm'. 可选为'causal_lm', 'seq_cls'. seq_cls的例子可以查看[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/seq_cls). - 🔥torch_dtype: 模型权重的数据类型,支持`float16`,`bfloat16`,`float32`,默认从config文件中读取 - attn_impl: attention类型,支持`flash_attn`, `sdpa`, `eager`,默认使用sdpa - num_labels: 分类模型需要指定。代表标签数量,默认为None diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md index 2325e4aeae..d3a3cc1599 100644 --- a/docs/source_en/Instruction/Command-line-parameters.md +++ b/docs/source_en/Instruction/Command-line-parameters.md @@ -20,7 +20,7 @@ The introduction to command line parameters will cover base arguments, atomic ar - model_type: Model type. The same model architecture, template, and loading process define a model_type. - model_revision: Model version. - 🔥torch_dtype: Data type for model weights, supports `float16`, `bfloat16`, `float32`, default is read from the config file. -- task_type: Defaults to 'causal_lm'. Options include 'causal_lm' and 'seq_cls'. You can view examples [here](https://github.com/modelscope/ms-swift/tree/main/examples/train/seq_cls). +- task_type: Defaults to 'causal_lm'. Options include 'causal_lm' and 'seq_cls'. You can view examples of seq_cls [here](https://github.com/modelscope/ms-swift/tree/main/examples/train/seq_cls). - attn_impl: Attention type, supports `flash_attn`, `sdpa`, `eager`, default is sdpa. - num_labels: To be specified for classification models, representing the number of labels, default is None. - rope_scaling: Rope type, supports `linear` and `dynamic`, to be used with `max_length`. diff --git a/swift/llm/infer/infer_engine/pt_engine.py b/swift/llm/infer/infer_engine/pt_engine.py index f68f32da86..63317a8d20 100644 --- a/swift/llm/infer/infer_engine/pt_engine.py +++ b/swift/llm/infer/infer_engine/pt_engine.py @@ -159,11 +159,13 @@ def _infer_stream(self, raise ValueError(error_msg) streamer = TokensIteratorStreamer() generate_kwargs = { - 'adapter_names': self._get_adapter_names(adapter_request), 'generation_config': generation_config, 'streamer': streamer, **inputs, } + adapter_names = self._get_adapter_names(adapter_request) + if adapter_names is not None: + generate_kwargs['adapter_names'] = adapter_names num_prompt_tokens = self._get_num_tokens(inputs) logits_streamer = None @@ -272,12 +274,18 @@ def _infer_seq_cls(self, inputs: Dict[str, Any], adapter_request: Optional[AdapterRequest] = None, **kwargs): - call_kwargs = {'adapter_names': self._get_adapter_names(adapter_request)} + call_kwargs = {} + adapter_names = self._get_adapter_names(adapter_request) + if adapter_names is not None: + call_kwargs['adapter_names'] = adapter_names num_prompt_tokens = self._get_num_tokens(inputs) inputs.pop('labels') logits = self.model(**inputs, **call_kwargs).logits - logprobs = torch.log_softmax(logits, -1) - preds = torch.argmax(logits, dim=-1).tolist() + if logits.shape[-1] > 1: + logprobs = torch.log_softmax(logits, -1) + preds = torch.argmax(logits, dim=-1).tolist() + else: + preds = logits.squeeze(dim=-1).tolist() res = [] for i, pred in enumerate(preds): usage_info = self._get_usage_info(num_prompt_tokens, 1) @@ -300,10 +308,12 @@ def _infer_full(self, template_inputs=None) -> Union[List[ChatCompletionResponse]]: # bos_token TODO: encoder-decoder generate_kwargs = { - 'adapter_names': self._get_adapter_names(adapter_request), 'generation_config': generation_config, **inputs } + adapter_names = self._get_adapter_names(adapter_request) + if adapter_names is not None: + generate_kwargs['adapter_names'] = adapter_names num_prompt_tokens = self._get_num_tokens(inputs) generate_kwargs = template.prepare_generate_kwargs(generate_kwargs, model=self.model) diff --git a/swift/llm/model/constant.py b/swift/llm/model/constant.py index 82a89bd036..63c0a82f90 100644 --- a/swift/llm/model/constant.py +++ b/swift/llm/model/constant.py @@ -2,7 +2,6 @@ # Classification criteria for model_type: same model architecture, tokenizer (get function), template. from typing import List - class LLMModelType: qwen = 'qwen' qwen2 = 'qwen2' @@ -93,12 +92,13 @@ class LLMModelType: mamba = 'mamba' polylm = 'polylm' aya = 'aya' - # bert + +class BertModelType: modern_bert = 'modern_bert' bert = 'bert' - # reward model - reward_model = 'reward_model' +class RMModelType: + internlm2_reward = 'internlm2_reward' class MLLMModelType: qwen_vl = 'qwen_vl' @@ -176,7 +176,7 @@ class MLLMModelType: megrez_omni = 'megrez_omni' -class ModelType(LLMModelType, MLLMModelType): +class ModelType(LLMModelType, MLLMModelType, BertModelType, RMModelType): @classmethod def get_model_name_list(cls) -> List[str]: diff --git a/swift/llm/model/model/__init__.py b/swift/llm/model/model/__init__.py index 3f190926e4..a972ec64ef 100644 --- a/swift/llm/model/model/__init__.py +++ b/swift/llm/model/model/__init__.py @@ -1,2 +1,2 @@ from . import (baai, baichuan, bert, codefuse, deepseek, gemma, glm, internlm, llama, llava, llm, mamba, microsoft, - minicpm, mistral, mllm, mplug, openbuddy, qwen, reward_model, telechat, yi) + minicpm, mistral, mllm, mplug, openbuddy, qwen, telechat, yi) diff --git a/swift/llm/model/model/bert.py b/swift/llm/model/model/bert.py index f83aef3536..785a3fa137 100644 --- a/swift/llm/model/model/bert.py +++ b/swift/llm/model/model/bert.py @@ -2,7 +2,7 @@ from transformers import AutoConfig from swift.utils import get_logger -from ..constant import LLMModelType +from ..constant import BertModelType from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_from_local, register_model logger = get_logger() @@ -17,7 +17,7 @@ def get_model_tokenizer_modern_bert(model_dir, *args, **kwargs): register_model( ModelMeta( - LLMModelType.modern_bert, [ + BertModelType.modern_bert, [ ModelGroup([ Model('answerdotai/ModernBERT-base', 'answerdotai/ModernBERT-base'), Model('answerdotai/ModernBERT-large', 'answerdotai/ModernBERT-large'), @@ -30,7 +30,7 @@ def get_model_tokenizer_modern_bert(model_dir, *args, **kwargs): register_model( ModelMeta( - LLMModelType.bert, [ModelGroup([ + BertModelType.bert, [ModelGroup([ Model('iic/nlp_structbert_backbone_base_std'), ])], None, diff --git a/swift/llm/model/model/internlm.py b/swift/llm/model/model/internlm.py index 590744455f..45b65f03a9 100644 --- a/swift/llm/model/model/internlm.py +++ b/swift/llm/model/model/internlm.py @@ -5,10 +5,12 @@ from transformers.dynamic_module_utils import get_class_from_dynamic_module from swift.llm import TemplateType -from ..constant import LLMModelType, MLLMModelType +from ..constant import LLMModelType, MLLMModelType, RMModelType from ..model_arch import ModelArch from ..patcher import patch_output_clone, patch_output_to_input_device -from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_with_flash_attn, register_model +from ..register import ( + Model, ModelGroup, ModelMeta, get_model_tokenizer_with_flash_attn, register_model, get_model_tokenizer_reward_model +) from ..utils import ModelInfo, safe_snapshot_download, use_submodel_func register_model( @@ -332,3 +334,19 @@ def get_model_tokenizer_xcomposer_ol(model_dir, *args, **kwargs): model_arch=ModelArch.qwen2_audio, tags=['audio'], )) + + +register_model( + ModelMeta( + RMModelType.internlm2_reward, [ + ModelGroup([ + Model('Shanghai_AI_Laboratory/internlm2-1_8b-reward', 'internlm/internlm2-1_8b-reward'), + Model('Shanghai_AI_Laboratory/internlm2-7b-reward', 'internlm/internlm2-7b-reward'), + Model('Shanghai_AI_Laboratory/internlm2-20b-reward', 'internlm/internlm2-20b-reward'), + ]), + ], + TemplateType.internlm2_reward, + get_model_tokenizer_reward_model, + requires=['transformers>=4.38'], + architectures=['InternLM2ForRewardModel'], + tags=['reward_model'])) diff --git a/swift/llm/model/model/reward_model.py b/swift/llm/model/model/reward_model.py index 522b6a9ac3..85435d3ea1 100644 --- a/swift/llm/model/model/reward_model.py +++ b/swift/llm/model/model/reward_model.py @@ -1,6 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from transformers import AutoConfig, AutoModel +from swift.llm import TemplateType from swift.utils import get_logger from ..constant import LLMModelType from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_from_local, register_model @@ -17,17 +18,19 @@ def get_model_tokenizer_reward_model(model_dir, *args, **kwargs): register_model( ModelMeta( - LLMModelType.reward_model, [ - ModelGroup([ - Model('Qwen/Qwen2.5-Math-RM-72B', 'Qwen/Qwen2.5-Math-RM-72B'), - Model('Qwen/Qwen2-Math-RM-72B', 'Qwen/Qwen2-Math-RM-72B'), - ]), + LLMModelType.internlm2_reward, [ ModelGroup([ Model('Shanghai_AI_Laboratory/internlm2-1_8b-reward', 'internlm/internlm2-1_8b-reward'), Model('Shanghai_AI_Laboratory/internlm2-7b-reward', 'internlm/internlm2-7b-reward'), Model('Shanghai_AI_Laboratory/internlm2-20b-reward', 'internlm/internlm2-20b-reward'), ]), ], - None, + TemplateType.internlm2_reward, get_model_tokenizer_reward_model, tags=['reward_model'])) + + + # ModelGroup([ + # Model('Qwen/Qwen2.5-Math-RM-72B', 'Qwen/Qwen2.5-Math-RM-72B'), + # Model('Qwen/Qwen2-Math-RM-72B', 'Qwen/Qwen2-Math-RM-72B'), + # ]), \ No newline at end of file diff --git a/swift/llm/model/register.py b/swift/llm/model/register.py index 8e3428cc6a..6fed4f7073 100644 --- a/swift/llm/model/register.py +++ b/swift/llm/model/register.py @@ -9,7 +9,7 @@ import torch from peft import PeftModel from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, PretrainedConfig, - PreTrainedModel, PreTrainedTokenizerBase) + PreTrainedModel, PreTrainedTokenizerBase, AutoModel) from transformers.integrations import is_deepspeed_zero3_enabled from transformers.utils import is_torch_bf16_gpu_available, is_torch_cuda_available, is_torch_npu_available, strtobool from transformers.utils.versions import require_version @@ -216,6 +216,11 @@ def get_model_tokenizer_multimodal(model_dir: str, *args, **kwargs): model, _ = get_model_tokenizer_with_flash_attn(model_dir, *args, **kwargs) return model, processor +def get_model_tokenizer_reward_model(model_dir, *args, **kwargs): + model_config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) + if 'AutoModel' in (getattr(model_config, 'auto_map', None) or {}): + kwargs['automodel_class'] = AutoModel + return get_model_tokenizer_with_flash_attn(model_dir, *args, **kwargs) def fix_do_sample_warning(generation_config: GenerationConfig) -> None: # Use the default values of temperature/top_p/top_k in generation_config. diff --git a/swift/llm/template/constant.py b/swift/llm/template/constant.py index 963ad75de3..6b106fb081 100644 --- a/swift/llm/template/constant.py +++ b/swift/llm/template/constant.py @@ -70,6 +70,10 @@ class LLMTemplateType: dbrx = 'dbrx' +class RMTemplateType: + internlm2_reward = 'internlm2_reward' + + class MLLMTemplateType: qwen_vl = 'qwen_vl' qwen_audio = 'qwen_audio' @@ -144,7 +148,7 @@ class MLLMTemplateType: megrez_omni = 'megrez_omni' -class TemplateType(LLMTemplateType, MLLMTemplateType): +class TemplateType(LLMTemplateType, MLLMTemplateType, RMTemplateType): @classmethod def get_template_name_list(cls) -> List[str]: diff --git a/swift/llm/template/template/internlm.py b/swift/llm/template/template/internlm.py index 9d07844089..ba917f766c 100644 --- a/swift/llm/template/template/internlm.py +++ b/swift/llm/template/template/internlm.py @@ -8,7 +8,7 @@ from swift.utils import get_env_args from ..base import Template -from ..constant import LLMTemplateType, MLLMTemplateType +from ..constant import LLMTemplateType, MLLMTemplateType, RMTemplateType from ..register import TemplateMeta, register_template from ..template_inputs import StdTemplateInputs from ..utils import Context, Prompt, Word @@ -34,6 +34,7 @@ register_template(ChatmlTemplateMeta(LLMTemplateType.internlm2, default_system=INTERNLM_SYSTEM)) +register_template(ChatmlTemplateMeta(RMTemplateType.internlm2_reward, default_system=INTERNLM_SYSTEM)) class InternLMXComposer2Template(Template): image_placeholder = [''] From d5dfcabc671e86c593c7ac33a5dd2025a6082346 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Jan 2025 15:16:59 +0800 Subject: [PATCH 14/47] update --- swift/llm/infer/infer_engine/pt_engine.py | 6 ++++-- swift/llm/model/model/reward_model.py | 20 +------------------- 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/swift/llm/infer/infer_engine/pt_engine.py b/swift/llm/infer/infer_engine/pt_engine.py index 63317a8d20..b355abccd7 100644 --- a/swift/llm/infer/infer_engine/pt_engine.py +++ b/swift/llm/infer/infer_engine/pt_engine.py @@ -282,10 +282,12 @@ def _infer_seq_cls(self, inputs.pop('labels') logits = self.model(**inputs, **call_kwargs).logits if logits.shape[-1] > 1: - logprobs = torch.log_softmax(logits, -1) preds = torch.argmax(logits, dim=-1).tolist() + logprobs = torch.log_softmax(logits, -1) + logprobs = [self._get_seq_cls_logprobs(logprobs[i]) for i in range(preds)] else: preds = logits.squeeze(dim=-1).tolist() + logprobs = [None] * len(preds) res = [] for i, pred in enumerate(preds): usage_info = self._get_usage_info(num_prompt_tokens, 1) @@ -294,7 +296,7 @@ def _infer_seq_cls(self, index=0, message=ChatMessage(role='assistant', content=str(pred), tool_calls=None), finish_reason='stop', - logprobs=self._get_seq_cls_logprobs(logprobs[i])) + logprobs=logprobs[i]) ] res.append(ChatCompletionResponse(model=self.model_name, choices=choices, usage=usage_info)) return res diff --git a/swift/llm/model/model/reward_model.py b/swift/llm/model/model/reward_model.py index 85435d3ea1..b5a72c12db 100644 --- a/swift/llm/model/model/reward_model.py +++ b/swift/llm/model/model/reward_model.py @@ -3,32 +3,14 @@ from swift.llm import TemplateType from swift.utils import get_logger -from ..constant import LLMModelType +from ..constant import RMModelType from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_from_local, register_model logger = get_logger() -def get_model_tokenizer_reward_model(model_dir, *args, **kwargs): - model_config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) - if 'AutoModel' in (getattr(model_config, 'auto_map', None) or {}): - kwargs['automodel_class'] = AutoModel - return get_model_tokenizer_from_local(model_dir, *args, **kwargs) -register_model( - ModelMeta( - LLMModelType.internlm2_reward, [ - ModelGroup([ - Model('Shanghai_AI_Laboratory/internlm2-1_8b-reward', 'internlm/internlm2-1_8b-reward'), - Model('Shanghai_AI_Laboratory/internlm2-7b-reward', 'internlm/internlm2-7b-reward'), - Model('Shanghai_AI_Laboratory/internlm2-20b-reward', 'internlm/internlm2-20b-reward'), - ]), - ], - TemplateType.internlm2_reward, - get_model_tokenizer_reward_model, - tags=['reward_model'])) - # ModelGroup([ # Model('Qwen/Qwen2.5-Math-RM-72B', 'Qwen/Qwen2.5-Math-RM-72B'), From d6097529eb7db87a2f24e70459a327124e74a710 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Jan 2025 15:17:44 +0800 Subject: [PATCH 15/47] update --- ...5\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" | 1 + docs/source_en/Instruction/Command-line-parameters.md | 1 + 2 files changed, 2 insertions(+) diff --git "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" index 5197429f21..4ed3931f77 100644 --- "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" +++ "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" @@ -419,6 +419,7 @@ App参数继承于[部署参数](#部署参数), [Web-UI参数](#Web-UI参数) - INPUT_SIZE: 默认为448 ### internvl2, internvl2_phi3, internvl2_5 +参数含义可以查看[这里](https://modelscope.cn/models/OpenGVLab/InternVL2_5-2B) - MAX_NUM: 默认为12 - INPUT_SIZE: 默认为448 - VIDEO_MAX_NUM: 默认为1。视频的MAX_NUM diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md index a0eab37fc4..c4345b6405 100644 --- a/docs/source_en/Instruction/Command-line-parameters.md +++ b/docs/source_en/Instruction/Command-line-parameters.md @@ -419,6 +419,7 @@ For the meaning of the arguments, please refer to [here](https://modelscope.cn/m - INPUT_SIZE: Default is 448 ### internvl2, internvl2_phi3, internvl2_5 +For the meaning of the arguments, please refer to [here](https://modelscope.cn/models/OpenGVLab/InternVL2_5-2B) - MAX_NUM: Default is 12 - INPUT_SIZE: Default is 448 - VIDEO_MAX_NUM: Default is 1, which is the MAX_NUM for videos From 80157a61516d0c3f900a8621861c01a517b98776 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Jan 2025 15:22:29 +0800 Subject: [PATCH 16/47] fix bugs --- ...44\350\241\214\345\217\202\346\225\260.md" | 10 +++---- .../Instruction/Command-line-parameters.md | 10 +++---- examples/train/multimodal/caption.sh | 2 +- examples/train/multimodal/infer.sh | 1 + swift/llm/infer/infer_engine/pt_engine.py | 29 ++++++++++++------- swift/llm/model/constant.py | 6 ++-- swift/llm/model/model/bert.py | 6 ++-- 7 files changed, 38 insertions(+), 26 deletions(-) diff --git "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" index 4ed3931f77..4af7091fee 100644 --- "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" +++ "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" @@ -19,7 +19,7 @@ - 🔥model: 模型id或模型本地路径。如果是自定义模型请配合`model_type`和`template`使用,具体可以参考[自定义模型](../Customization/自定义模型.md) - model_type: 模型类型。相同的模型架构、template、模型加载过程被定义为一个model_type - model_revision: 模型版本 -- task_type: 默认为'causal_lm'. 可选为'causal_lm', 'seq_cls'. 例子可以查看[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/seq_cls). +- task_type: 默认为'causal_lm'. 可选为'causal_lm', 'seq_cls'. seq_cls的例子可以查看[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/seq_cls). - 🔥torch_dtype: 模型权重的数据类型,支持`float16`,`bfloat16`,`float32`,默认从config文件中读取 - attn_impl: attention类型,支持`flash_attn`, `sdpa`, `eager`,默认使用sdpa - num_labels: 分类模型需要指定。代表标签数量,默认为None @@ -45,7 +45,7 @@ ### 模板参数 - 🔥template: 对话模板类型,默认使用model对应的template类型。`swift pt`会将对话模版转为生成模板使用 - 🔥system: 自定义system字段,默认为None,使用template的默认system -- 🔥max_length: 单样本的tokens最大长度,默认为None,不做限制 +- 🔥max_length: 单样本的tokens最大长度。默认为None,设置为模型支持的tokens最大长度(max_model_len) - truncation_strategy: 如果超长如何处理,支持`delete`, `left`和`right`,代表删除、左侧裁剪和右侧裁剪,默认为'delete' - 🔥max_pixels: 多模态模型图片前处理的最大像素数(H\*W),默认不缩放。 - tools_prompt: 智能体训练时的工具列表转为system的格式,请参考[智能体训练](./智能体的支持.md),默认为'react_en' @@ -96,7 +96,7 @@ - lr_scheduler_type: lr_scheduler类型,默认为cosine - lr_scheduler_kwargs: lr_scheduler其他参数 - 🔥gradient_checkpointing_kwargs: 传入`torch.utils.checkpoint`中的参数. 例如设置为`--gradient_checkpointing_kwargs '{"use_reentrant": false}'` -- report_to: 默认值为`tensorboard` +- report_to: 默认值为`tensorboard`。你也可以指定`--report_to tensorboard wandb`, `--report_to all` - remove_unused_columns: 默认值False - logging_first_step: 是否记录第一个step的打印,默认值True - logging_steps: 日志打印间隔,默认值5 @@ -139,7 +139,7 @@ #### 全参 - freeze_parameters: 被冻结参数的前缀, 默认为`[]` - freeze_parameters_ratio: 从下往上冻结的参数比例, 默认为0. 可设置为1将所有参数冻结, 结合`trainable_parameters`设置可训练参数. -- trainable_parameters: 可训练参数的前缀, 默认为`[]` +- trainable_parameters: 可训练参数的前缀, 默认为`[]`. `trainable_parameters`的优先级高于`freeze_parameters`和`freeze_parameters_ratio` #### LoRA - 🔥lora_rank: 默认为`8` @@ -306,7 +306,7 @@ Vera使用`target_modules`, `target_regex`, `modules_to_save`三个参数. ### RLHF参数 RLHF参数继承于[训练参数](#训练参数) -- 🔥rlhf_type: 对齐算法类型,支持`dpo`, `orpo`, `simpo`, `kto`, `cpo` +- 🔥rlhf_type: 对齐算法类型,支持`dpo`, `orpo`, `simpo`, `kto`, `cpo`, `rm` - ref_model: DPO等算法中的原始对比模型 - ref_model_type: 同model_type - ref_model_revision: 同model_revision diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md index c4345b6405..5fba09ac7b 100644 --- a/docs/source_en/Instruction/Command-line-parameters.md +++ b/docs/source_en/Instruction/Command-line-parameters.md @@ -20,7 +20,7 @@ The introduction to command line parameters will cover base arguments, atomic ar - model_type: Model type. The same model architecture, template, and loading process define a model_type. - model_revision: Model version. - 🔥torch_dtype: Data type for model weights, supports `float16`, `bfloat16`, `float32`, default is read from the config file. -- task_type: Defaults to 'causal_lm'. Options include 'causal_lm' and 'seq_cls'. You can view examples [here](https://github.com/modelscope/ms-swift/tree/main/examples/train/seq_cls). +- task_type: Defaults to 'causal_lm'. Options include 'causal_lm' and 'seq_cls'. You can view examples of seq_cls [here](https://github.com/modelscope/ms-swift/tree/main/examples/train/seq_cls). - attn_impl: Attention type, supports `flash_attn`, `sdpa`, `eager`, default is sdpa. - num_labels: To be specified for classification models, representing the number of labels, default is None. - rope_scaling: Rope type, supports `linear` and `dynamic`, to be used with `max_length`. @@ -45,7 +45,7 @@ The introduction to command line parameters will cover base arguments, atomic ar ### Template Arguments - 🔥template: Type of dialogue template, which defaults to the template type corresponding to the model. `swift pt` will convert the dialogue template into a generation template for use. - 🔥system: Custom system field, default is None, uses the default system of the template. -- 🔥max_length: Maximum length of tokens for a single sample, default is None (no limit). +- 🔥max_length: The maximum length of tokens for a single sample. Defaults to None, set to the maximum length of tokens supported by the model (max_model_len). - truncation_strategy: How to handle overly long tokens, supports `delete`, `left`, `right`, representing deletion, left trimming, and right trimming, default is 'delete'. - 🔥max_pixels: Maximum pixel count for pre-processing images in multimodal models (H*W), default is no scaling. - tools_prompt: The list of tools for agent training converted to system format, refer to [Agent Training](./Agent-support.md), default is 'react_en'. @@ -97,7 +97,7 @@ This parameter list inherits from transformers `Seq2SeqTrainingArguments`, with - lr_scheduler_type: LR scheduler type, default is cosine. - lr_scheduler_kwargs: Other parameters for the LR scheduler. - 🔥gradient_checkpointing_kwargs: Parameters passed to `torch.utils.checkpoint`. For example, set to `--gradient_checkpointing_kwargs '{"use_reentrant": false}'`. -- report_to: Default is `tensorboard`. +- report_to: Default is `tensorboard`. You can also specify `--report_to tensorboard wandb`, `--report_to all`. - remove_unused_columns: Default is False. - logging_first_step: Whether to log the first step print, default is True. - logging_steps: Interval for logging prints, default is 5. @@ -141,7 +141,7 @@ Other important parameters: - freeze_parameters: Prefix of parameters to be frozen, default is `[]`. - freeze_parameters_ratio: Ratio of parameters to freeze from the bottom up, default is 0. Setting it to 1 will freeze all parameters. Combine with `trainable_parameters` to set trainable parameters. -- trainable_parameters: Prefix of trainable parameters, default is `[]`. +- trainable_parameters: Prefix of trainable parameters, default is `[]`. The priority of `trainable_parameters` is higher than that of `freeze_parameters` and `freeze_parameters_ratio`. #### LoRA @@ -310,7 +310,7 @@ Training arguments include the [base arguments](#base-arguments), [Seq2SeqTraine RLHF arguments inherit from the [training arguments](#training-arguments). -- 🔥rlhf_type: Alignment algorithm type, supports `dpo`, `orpo`, `simpo`, `kto`, `cpo`. +- 🔥rlhf_type: Alignment algorithm type, supports `dpo`, `orpo`, `simpo`, `kto`, `cpo`, `rm`. - ref_model: Original comparison model in algorithms like DPO. - ref_model_type: Same as model_type. - ref_model_revision: Same as model_revision. diff --git a/examples/train/multimodal/caption.sh b/examples/train/multimodal/caption.sh index e75ce7f78a..c9d496fc0f 100644 --- a/examples/train/multimodal/caption.sh +++ b/examples/train/multimodal/caption.sh @@ -5,7 +5,7 @@ CUDA_VISIBLE_DEVICES=0 \ MAX_PIXELS=1003520 \ swift sft \ --model Qwen/Qwen2-VL-7B-Instruct \ - --dataset 'modelscope/coco_2014_caption#20000' \ + --dataset 'modelscope/coco_2014_caption:validation#20000' \ --train_type lora \ --torch_dtype bfloat16 \ --num_train_epochs 1 \ diff --git a/examples/train/multimodal/infer.sh b/examples/train/multimodal/infer.sh index 2e8627319f..699ede32d7 100644 --- a/examples/train/multimodal/infer.sh +++ b/examples/train/multimodal/infer.sh @@ -1,5 +1,6 @@ # Perform inference using the validation set from the training phase. CUDA_VISIBLE_DEVICES=0 \ +MAX_PIXELS=1003520 \ swift infer \ --adapters output/vx-xxx/checkpoint-xxx \ --stream true \ diff --git a/swift/llm/infer/infer_engine/pt_engine.py b/swift/llm/infer/infer_engine/pt_engine.py index f68f32da86..fdf2063a52 100644 --- a/swift/llm/infer/infer_engine/pt_engine.py +++ b/swift/llm/infer/infer_engine/pt_engine.py @@ -159,11 +159,13 @@ def _infer_stream(self, raise ValueError(error_msg) streamer = TokensIteratorStreamer() generate_kwargs = { - 'adapter_names': self._get_adapter_names(adapter_request), 'generation_config': generation_config, 'streamer': streamer, **inputs, } + adapter_names = self._get_adapter_names(adapter_request) + if adapter_names is not None: + generate_kwargs['adapter_names'] = adapter_names num_prompt_tokens = self._get_num_tokens(inputs) logits_streamer = None @@ -272,12 +274,20 @@ def _infer_seq_cls(self, inputs: Dict[str, Any], adapter_request: Optional[AdapterRequest] = None, **kwargs): - call_kwargs = {'adapter_names': self._get_adapter_names(adapter_request)} + call_kwargs = {} + adapter_names = self._get_adapter_names(adapter_request) + if adapter_names is not None: + call_kwargs['adapter_names'] = adapter_names num_prompt_tokens = self._get_num_tokens(inputs) inputs.pop('labels') logits = self.model(**inputs, **call_kwargs).logits - logprobs = torch.log_softmax(logits, -1) - preds = torch.argmax(logits, dim=-1).tolist() + if logits.shape[-1] > 1: + preds = torch.argmax(logits, dim=-1).tolist() + logprobs = torch.log_softmax(logits, -1) + logprobs = [self._get_seq_cls_logprobs(logprobs[i]) for i in range(preds)] + else: + preds = logits.squeeze(dim=-1).tolist() + logprobs = [None] * len(preds) res = [] for i, pred in enumerate(preds): usage_info = self._get_usage_info(num_prompt_tokens, 1) @@ -286,7 +296,7 @@ def _infer_seq_cls(self, index=0, message=ChatMessage(role='assistant', content=str(pred), tool_calls=None), finish_reason='stop', - logprobs=self._get_seq_cls_logprobs(logprobs[i])) + logprobs=logprobs[i]) ] res.append(ChatCompletionResponse(model=self.model_name, choices=choices, usage=usage_info)) return res @@ -299,11 +309,10 @@ def _infer_full(self, adapter_request: Optional[AdapterRequest] = None, template_inputs=None) -> Union[List[ChatCompletionResponse]]: # bos_token TODO: encoder-decoder - generate_kwargs = { - 'adapter_names': self._get_adapter_names(adapter_request), - 'generation_config': generation_config, - **inputs - } + generate_kwargs = {'generation_config': generation_config, **inputs} + adapter_names = self._get_adapter_names(adapter_request) + if adapter_names is not None: + generate_kwargs['adapter_names'] = adapter_names num_prompt_tokens = self._get_num_tokens(inputs) generate_kwargs = template.prepare_generate_kwargs(generate_kwargs, model=self.model) diff --git a/swift/llm/model/constant.py b/swift/llm/model/constant.py index c7b6dad4c1..ed07cf875b 100644 --- a/swift/llm/model/constant.py +++ b/swift/llm/model/constant.py @@ -93,7 +93,9 @@ class LLMModelType: mamba = 'mamba' polylm = 'polylm' aya = 'aya' - # bert + + +class BertModelType: modern_bert = 'modern_bert' bert = 'bert' @@ -174,7 +176,7 @@ class MLLMModelType: megrez_omni = 'megrez_omni' -class ModelType(LLMModelType, MLLMModelType): +class ModelType(LLMModelType, MLLMModelType, BertModelType): @classmethod def get_model_name_list(cls) -> List[str]: diff --git a/swift/llm/model/model/bert.py b/swift/llm/model/model/bert.py index f83aef3536..785a3fa137 100644 --- a/swift/llm/model/model/bert.py +++ b/swift/llm/model/model/bert.py @@ -2,7 +2,7 @@ from transformers import AutoConfig from swift.utils import get_logger -from ..constant import LLMModelType +from ..constant import BertModelType from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_from_local, register_model logger = get_logger() @@ -17,7 +17,7 @@ def get_model_tokenizer_modern_bert(model_dir, *args, **kwargs): register_model( ModelMeta( - LLMModelType.modern_bert, [ + BertModelType.modern_bert, [ ModelGroup([ Model('answerdotai/ModernBERT-base', 'answerdotai/ModernBERT-base'), Model('answerdotai/ModernBERT-large', 'answerdotai/ModernBERT-large'), @@ -30,7 +30,7 @@ def get_model_tokenizer_modern_bert(model_dir, *args, **kwargs): register_model( ModelMeta( - LLMModelType.bert, [ModelGroup([ + BertModelType.bert, [ModelGroup([ Model('iic/nlp_structbert_backbone_base_std'), ])], None, From 106f588a12acdaa8ff0ff6b78b1b444c5047cf99 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Jan 2025 15:26:47 +0800 Subject: [PATCH 17/47] fix --- swift/llm/infer/infer_engine/pt_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/llm/infer/infer_engine/pt_engine.py b/swift/llm/infer/infer_engine/pt_engine.py index fdf2063a52..227fd13ca9 100644 --- a/swift/llm/infer/infer_engine/pt_engine.py +++ b/swift/llm/infer/infer_engine/pt_engine.py @@ -284,7 +284,7 @@ def _infer_seq_cls(self, if logits.shape[-1] > 1: preds = torch.argmax(logits, dim=-1).tolist() logprobs = torch.log_softmax(logits, -1) - logprobs = [self._get_seq_cls_logprobs(logprobs[i]) for i in range(preds)] + logprobs = [self._get_seq_cls_logprobs(logprobs[i]) for i in range(len(preds))] else: preds = logits.squeeze(dim=-1).tolist() logprobs = [None] * len(preds) From 774b115bd23a89292e7769cbc40e923fa7da6fd4 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Jan 2025 15:51:02 +0800 Subject: [PATCH 18/47] update --- ...\275\344\273\244\350\241\214\345\217\202\346\225\260.md" | 6 +++--- docs/source_en/Instruction/Command-line-parameters.md | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" index 4af7091fee..ed0cc39fc1 100644 --- "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" +++ "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" @@ -403,15 +403,15 @@ App参数继承于[部署参数](#部署参数), [Web-UI参数](#Web-UI参数) - IMAGE_FACTOR: 默认为28 - MIN_PIXELS: 默认为`4 * 28 * 28` -- MAX_PIXELS: 默认为`16384 * 28 * 28`,参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/ocr.sh#L3) +- 🔥MAX_PIXELS: 默认为`16384 * 28 * 28`,参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/ocr.sh#L3) - MAX_RATIO: 默认为200 - VIDEO_MIN_PIXELS: 默认为`128 * 28 * 28` -- VIDEO_MAX_PIXELS: 默认为`768 * 28 * 28`,参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/video.sh#L7) +- 🔥VIDEO_MAX_PIXELS: 默认为`768 * 28 * 28`,参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/video.sh#L7) - VIDEO_TOTAL_PIXELS: 默认为`24576 * 28 * 28` - FRAME_FACTOR: 默认为2 - FPS: 默认为2.0 - FPS_MIN_FRAMES: 默认为4 -- FPS_MAX_FRAMES: 默认为768,参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/video.sh#L8) +- 🔥FPS_MAX_FRAMES: 默认为768,参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/video.sh#L8) ### internvl, internvl_phi3 参数含义可以查看[这里](https://modelscope.cn/models/OpenGVLab/Mini-InternVL-Chat-2B-V1-5) diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md index 5fba09ac7b..310f9b0406 100644 --- a/docs/source_en/Instruction/Command-line-parameters.md +++ b/docs/source_en/Instruction/Command-line-parameters.md @@ -403,15 +403,15 @@ For the meaning of the arguments, please refer to [here](https://github.com/Qwen - IMAGE_FACTOR: Default is 28 - MIN_PIXELS: Default is `4 * 28 * 28` -- MAX_PIXELS: Default is `16384 * 28 * 28`, refer to [here](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/ocr.sh#L3) +- 🔥MAX_PIXELS: Default is `16384 * 28 * 28`, refer to [here](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/ocr.sh#L3) - MAX_RATIO: Default is 200 - VIDEO_MIN_PIXELS: Default is `128 * 28 * 28` -- VIDEO_MAX_PIXELS: Default is `768 * 28 * 28`, refer to [here](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/video.sh#L7) +- 🔥VIDEO_MAX_PIXELS: Default is `768 * 28 * 28`, refer to [here](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/video.sh#L7) - VIDEO_TOTAL_PIXELS: Default is `24576 * 28 * 28` - FRAME_FACTOR: Default is 2 - FPS: Default is 2.0 - FPS_MIN_FRAMES: Default is 4 -- FPS_MAX_FRAMES: Default is 768, refer to [here](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/video.sh#L8) +- 🔥FPS_MAX_FRAMES: Default is 768, refer to [here](https://github.com/modelscope/ms-swift/blob/main/examples/train/multimodal/video.sh#L8) ### internvl, internvl_phi3 For the meaning of the arguments, please refer to [here](https://modelscope.cn/models/OpenGVLab/Mini-InternVL-Chat-2B-V1-5) From 8e00e42536f22e15d7777ec1c5ee8bb9113a332d Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Jan 2025 16:35:42 +0800 Subject: [PATCH 19/47] update --- swift/llm/model/constant.py | 4 ++++ swift/llm/model/model/internlm.py | 6 ++---- swift/llm/model/register.py | 6 ++++-- swift/llm/template/base.py | 3 ++- swift/llm/template/template/internlm.py | 5 ++++- tests/test_align/test_template/test_llm.py | 25 ++++++++++++++++++++-- 6 files changed, 39 insertions(+), 10 deletions(-) diff --git a/swift/llm/model/constant.py b/swift/llm/model/constant.py index 63c0a82f90..83d438ec71 100644 --- a/swift/llm/model/constant.py +++ b/swift/llm/model/constant.py @@ -2,6 +2,7 @@ # Classification criteria for model_type: same model architecture, tokenizer (get function), template. from typing import List + class LLMModelType: qwen = 'qwen' qwen2 = 'qwen2' @@ -93,13 +94,16 @@ class LLMModelType: polylm = 'polylm' aya = 'aya' + class BertModelType: modern_bert = 'modern_bert' bert = 'bert' + class RMModelType: internlm2_reward = 'internlm2_reward' + class MLLMModelType: qwen_vl = 'qwen_vl' qwen_audio = 'qwen_audio' diff --git a/swift/llm/model/model/internlm.py b/swift/llm/model/model/internlm.py index 45b65f03a9..868fa25cb8 100644 --- a/swift/llm/model/model/internlm.py +++ b/swift/llm/model/model/internlm.py @@ -8,9 +8,8 @@ from ..constant import LLMModelType, MLLMModelType, RMModelType from ..model_arch import ModelArch from ..patcher import patch_output_clone, patch_output_to_input_device -from ..register import ( - Model, ModelGroup, ModelMeta, get_model_tokenizer_with_flash_attn, register_model, get_model_tokenizer_reward_model -) +from ..register import (Model, ModelGroup, ModelMeta, get_model_tokenizer_reward_model, + get_model_tokenizer_with_flash_attn, register_model) from ..utils import ModelInfo, safe_snapshot_download, use_submodel_func register_model( @@ -335,7 +334,6 @@ def get_model_tokenizer_xcomposer_ol(model_dir, *args, **kwargs): tags=['audio'], )) - register_model( ModelMeta( RMModelType.internlm2_reward, [ diff --git a/swift/llm/model/register.py b/swift/llm/model/register.py index 6fed4f7073..46da0015d4 100644 --- a/swift/llm/model/register.py +++ b/swift/llm/model/register.py @@ -8,8 +8,8 @@ import torch from peft import PeftModel -from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, PretrainedConfig, - PreTrainedModel, PreTrainedTokenizerBase, AutoModel) +from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, + PretrainedConfig, PreTrainedModel, PreTrainedTokenizerBase) from transformers.integrations import is_deepspeed_zero3_enabled from transformers.utils import is_torch_bf16_gpu_available, is_torch_cuda_available, is_torch_npu_available, strtobool from transformers.utils.versions import require_version @@ -216,12 +216,14 @@ def get_model_tokenizer_multimodal(model_dir: str, *args, **kwargs): model, _ = get_model_tokenizer_with_flash_attn(model_dir, *args, **kwargs) return model, processor + def get_model_tokenizer_reward_model(model_dir, *args, **kwargs): model_config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) if 'AutoModel' in (getattr(model_config, 'auto_map', None) or {}): kwargs['automodel_class'] = AutoModel return get_model_tokenizer_with_flash_attn(model_dir, *args, **kwargs) + def fix_do_sample_warning(generation_config: GenerationConfig) -> None: # Use the default values of temperature/top_p/top_k in generation_config. if generation_config.temperature == 0: diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py index d0ceec287d..ba41907066 100644 --- a/swift/llm/template/base.py +++ b/swift/llm/template/base.py @@ -541,7 +541,8 @@ def _jinja_encode(self, inputs: StdTemplateInputs): messages.insert(0, {'role': 'system', 'content': inputs.system}) if messages[-1]['content'] is None: messages.pop() - text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + add_generation_prompt = messages[-1]['role'] != 'assistant' + text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=add_generation_prompt) answer_len = 1 if self.is_training else 0 return [text], [1.], answer_len diff --git a/swift/llm/template/template/internlm.py b/swift/llm/template/template/internlm.py index ba917f766c..53095b565f 100644 --- a/swift/llm/template/template/internlm.py +++ b/swift/llm/template/template/internlm.py @@ -34,7 +34,10 @@ register_template(ChatmlTemplateMeta(LLMTemplateType.internlm2, default_system=INTERNLM_SYSTEM)) -register_template(ChatmlTemplateMeta(RMTemplateType.internlm2_reward, default_system=INTERNLM_SYSTEM)) +register_template( + ChatmlTemplateMeta( + RMTemplateType.internlm2_reward, default_system=INTERNLM_SYSTEM, suffix=['<|im_end|>\n<|reward|>'])) + class InternLMXComposer2Template(Template): image_placeholder = [''] diff --git a/tests/test_align/test_template/test_llm.py b/tests/test_align/test_template/test_llm.py index b997ec4fb6..3d98b2d20a 100644 --- a/tests/test_align/test_template/test_llm.py +++ b/tests/test_align/test_template/test_llm.py @@ -2,7 +2,7 @@ import torch -os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' +os.environ['CUDA_VISIBLE_DEVICES'] = '0' os.environ['SWIFT_DEBUG'] = '1' @@ -17,6 +17,8 @@ def _infer_model(pt_engine, system=None, messages=None): resp = pt_engine.infer([{'messages': messages}], request_config=request_config) response = resp[0].choices[0].message.content messages += [{'role': 'assistant', 'content': response}, {'role': 'user', 'content': '这是什么'}] + else: + messages = messages.copy() resp = pt_engine.infer([{ 'messages': messages, }], request_config=request_config) @@ -61,6 +63,7 @@ def test_internlm(): def test_internlm2(): + # pt_engine = PtEngine('Shanghai_AI_Laboratory/internlm2-1_8b') pt_engine = PtEngine('Shanghai_AI_Laboratory/internlm2_5-1_8b-chat') _infer_model(pt_engine) pt_engine.default_template.template_backend = 'jinja' @@ -160,6 +163,23 @@ def test_skywork_o1(): '8 + 1 = 9\n \\]\n4. **Apples Split Equally') +def test_internlm2_reward(): + pt_engine = PtEngine('Shanghai_AI_Laboratory/internlm2-1_8b-reward') + messages = [{ + 'role': 'user', + 'content': "Hello! What's your name?" + }, { + 'role': 'assistant', + 'content': 'My name is InternLM2! A helpful AI assistant. What can I do for you?' + }] + pt_engine.task_type = 'seq_cls' + res = _infer_model(pt_engine, messages=messages) + pt_engine.default_template.template_backend = 'jinja' + res2 = _infer_model(pt_engine, messages=messages) + assert res2 == '0.48681640625' + print() + + if __name__ == '__main__': from swift.llm import PtEngine, RequestConfig, get_template, get_model_tokenizer, VllmEngine from swift.utils import get_logger, seed_everything @@ -179,4 +199,5 @@ def test_skywork_o1(): # test_llama() # test_openbuddy() # test_megrez() - test_skywork_o1() + # test_skywork_o1() + test_internlm2_reward() From 1fd06c6b2c2509f328106860ee3de4b1b94f838f Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Jan 2025 16:37:27 +0800 Subject: [PATCH 20/47] update --- swift/llm/infer/infer_engine/pt_engine.py | 3 ++- swift/llm/model/model/reward_model.py | 12 ++++-------- swift/llm/template/base.py | 21 +++++++-------------- swift/llm/template/template/internlm.py | 3 +-- 4 files changed, 14 insertions(+), 25 deletions(-) diff --git a/swift/llm/infer/infer_engine/pt_engine.py b/swift/llm/infer/infer_engine/pt_engine.py index 227fd13ca9..070d8c85b1 100644 --- a/swift/llm/infer/infer_engine/pt_engine.py +++ b/swift/llm/infer/infer_engine/pt_engine.py @@ -314,12 +314,13 @@ def _infer_full(self, if adapter_names is not None: generate_kwargs['adapter_names'] = adapter_names num_prompt_tokens = self._get_num_tokens(inputs) - + template.debug_logger(inputs) # debug generate_kwargs = template.prepare_generate_kwargs(generate_kwargs, model=self.model) output = dict(template.generate(self.model, **generate_kwargs)) output.pop('past_key_values', None) batched_generate_ids = output['sequences'] batched_generate_ids = template.get_generate_ids(batched_generate_ids, num_prompt_tokens) + template.debug_logger({'generate_ids': batched_generate_ids}) # debug batched_logprobs = self.preprocess_logits( output.get('logits'), batched_generate_ids, generation_config.top_logprobs) diff --git a/swift/llm/model/model/reward_model.py b/swift/llm/model/model/reward_model.py index b5a72c12db..30a9d5660d 100644 --- a/swift/llm/model/model/reward_model.py +++ b/swift/llm/model/model/reward_model.py @@ -8,11 +8,7 @@ logger = get_logger() - - - - - # ModelGroup([ - # Model('Qwen/Qwen2.5-Math-RM-72B', 'Qwen/Qwen2.5-Math-RM-72B'), - # Model('Qwen/Qwen2-Math-RM-72B', 'Qwen/Qwen2-Math-RM-72B'), - # ]), \ No newline at end of file +# ModelGroup([ +# Model('Qwen/Qwen2.5-Math-RM-72B', 'Qwen/Qwen2.5-Math-RM-72B'), +# Model('Qwen/Qwen2-Math-RM-72B', 'Qwen/Qwen2-Math-RM-72B'), +# ]), diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py index ba41907066..940f3df642 100644 --- a/swift/llm/template/base.py +++ b/swift/llm/template/base.py @@ -674,23 +674,16 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: encoded[k] = None return encoded - def _debug_logger(self, generate_ids): - if isinstance(generate_ids, list) or isinstance(generate_ids, torch.Tensor) and generate_ids.ndim == 1: - generate_ids = [generate_ids] - for tokens in generate_ids: - if isinstance(tokens, torch.Tensor): - tokens = tokens.tolist() - logger.info(f'[GENERATE_IDS] {tokens}') - logger.info(f'[GENERATE] {self.safe_decode(tokens)}\n' + '-' * 50) + def debug_logger(self, inputs): + if not strtobool(os.getenv('SWIFT_DEBUG', 'false')): + return + self.print_inputs(inputs) def get_generate_ids(self, generate_ids: Union[torch.Tensor, List[int]], num_prompt_tokens: int) -> Union[torch.Tensor, List[int]]: - if strtobool(os.getenv('SWIFT_DEBUG', 'false')): - self._debug_logger(generate_ids) if self.skip_prompt: - return generate_ids[..., num_prompt_tokens:] - else: - return generate_ids + generate_ids = generate_ids[..., num_prompt_tokens:] + return generate_ids def post_process_generate_response(self, response: str, inputs: StdTemplateInputs) -> str: return response @@ -944,7 +937,7 @@ def _torchacc_xtuner_data_collator(self, res, padding_to, tokenizer, padding_sid def print_inputs(self, inputs: Dict[str, Any], tokenizer_kwargs: Optional[Dict[str, Any]] = None) -> None: if tokenizer_kwargs is None: tokenizer_kwargs = {} - for key in ['input', 'labels', 'chosen_input', 'chosen_labels', 'rejected_input', 'rejected_labels']: + for key in ['input', 'labels', 'generate_ids', 'chosen_input', 'chosen_labels', 'rejected_input', 'rejected_labels']: val = inputs.get(key) # fix val is a tensor if val is None: val = inputs.get(f'{key}_ids') diff --git a/swift/llm/template/template/internlm.py b/swift/llm/template/template/internlm.py index 53095b565f..4d596d8a14 100644 --- a/swift/llm/template/template/internlm.py +++ b/swift/llm/template/template/internlm.py @@ -35,8 +35,7 @@ register_template(ChatmlTemplateMeta(LLMTemplateType.internlm2, default_system=INTERNLM_SYSTEM)) register_template( - ChatmlTemplateMeta( - RMTemplateType.internlm2_reward, default_system=INTERNLM_SYSTEM, suffix=['<|im_end|>\n<|reward|>'])) + ChatmlTemplateMeta(RMTemplateType.internlm2_reward, suffix=['<|im_end|>\n<|reward|>'])) class InternLMXComposer2Template(Template): From 3735cbdad803e60fd509195d2c3874de4ac86e50 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Jan 2025 16:37:42 +0800 Subject: [PATCH 21/47] update --- tests/test_align/test_template/test_llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_align/test_template/test_llm.py b/tests/test_align/test_template/test_llm.py index 3d98b2d20a..af6ba95ad2 100644 --- a/tests/test_align/test_template/test_llm.py +++ b/tests/test_align/test_template/test_llm.py @@ -176,7 +176,7 @@ def test_internlm2_reward(): res = _infer_model(pt_engine, messages=messages) pt_engine.default_template.template_backend = 'jinja' res2 = _infer_model(pt_engine, messages=messages) - assert res2 == '0.48681640625' + assert res == res2 == '0.48681640625' print() From c5b7022eb83a44e40f529ddfc2912e52f3fe8c02 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Jan 2025 20:16:34 +0800 Subject: [PATCH 22/47] update --- .../export/{merge_lora => }/merge_lora.sh | 0 examples/export/push_to_hub.sh | 5 ++ examples/infer/demo_rm.py | 33 ++++++++++ examples/train/seq_cls/bert/deploy.sh | 5 +- examples/train/seq_cls/bert/infer.sh | 3 +- examples/train/seq_cls/qwen2_5/deploy.sh | 2 +- examples/train/seq_cls/qwen2_5/sft.sh | 3 +- swift/llm/__init__.py | 7 +-- swift/llm/argument/base_args/base_args.py | 4 +- swift/llm/argument/base_args/model_args.py | 3 +- swift/llm/dataset/dataset/llm.py | 31 +++------- swift/llm/dataset/preprocessor/__init__.py | 6 +- swift/llm/dataset/preprocessor/core.py | 8 +++ swift/llm/dataset/preprocessor/extra.py | 2 +- swift/llm/infer/infer.py | 14 +++-- swift/llm/infer/infer_engine/pt_engine.py | 8 +-- swift/llm/model/model/internlm.py | 2 +- swift/llm/model/register.py | 15 +++-- swift/llm/template/base.py | 17 +++++- swift/llm/template/template/internlm.py | 3 +- swift/llm/template/template_inputs.py | 1 - tests/test_align/test_cls.py | 60 +++++++++++++++++++ tests/test_align/test_template/test_llm.py | 1 - 23 files changed, 173 insertions(+), 60 deletions(-) rename examples/export/{merge_lora => }/merge_lora.sh (100%) create mode 100644 examples/export/push_to_hub.sh create mode 100644 examples/infer/demo_rm.py create mode 100644 tests/test_align/test_cls.py diff --git a/examples/export/merge_lora/merge_lora.sh b/examples/export/merge_lora.sh similarity index 100% rename from examples/export/merge_lora/merge_lora.sh rename to examples/export/merge_lora.sh diff --git a/examples/export/push_to_hub.sh b/examples/export/push_to_hub.sh new file mode 100644 index 0000000000..c4bcef7421 --- /dev/null +++ b/examples/export/push_to_hub.sh @@ -0,0 +1,5 @@ +CUDA_VISIBLE_DEVICES=0 swift export \ + --adapters output/vx-xxx/checkpoint-xxx \ + --push_to_hub true \ + --hub_model_id '' \ + --hub_token '' diff --git a/examples/infer/demo_rm.py b/examples/infer/demo_rm.py new file mode 100644 index 0000000000..7e32932a7c --- /dev/null +++ b/examples/infer/demo_rm.py @@ -0,0 +1,33 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +from typing import List + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +def infer_batch(engine: 'InferEngine', infer_requests: List['InferRequest']): + request_config = RequestConfig(max_tokens=512, temperature=0) + resp_list = engine.infer(infer_requests, request_config) + query0 = infer_requests[0].messages[0]['content'] + print(f'query0: {query0}') + print(f'response0: {resp_list[0].choices[0].message.content}') + + +if __name__ == '__main__': + from swift.llm import InferEngine, InferRequest, PtEngine, RequestConfig, load_dataset + model = 'Shanghai_AI_Laboratory/internlm2-1_8b-reward' + engine = PtEngine(model, max_batch_size=64) + # Here, `load_dataset` is used for convenience; `infer_batch` does not require creating a dataset. + dataset = load_dataset(['AI-ModelScope/alpaca-gpt4-data-zh#1000'], seed=42)[0] + print(f'dataset: {dataset}') + infer_requests = [InferRequest(**data) for data in dataset] + infer_batch(engine, infer_requests) + + messages = [{ + 'role': 'user', + 'content': "Hello! What's your name?" + }, { + 'role': 'assistant', + 'content': 'My name is InternLM2! A helpful AI assistant. What can I do for you?' + }] + infer_batch(engine, [InferRequest(messages=messages)]) diff --git a/examples/train/seq_cls/bert/deploy.sh b/examples/train/seq_cls/bert/deploy.sh index 13825d3491..c2102b6932 100644 --- a/examples/train/seq_cls/bert/deploy.sh +++ b/examples/train/seq_cls/bert/deploy.sh @@ -1,9 +1,10 @@ CUDA_VISIBLE_DEVICES=0 \ swift deploy \ --model output/vx-xxx/checkpoint-xxx \ - --served_model_name bert-base-chinese + --served_model_name bert-base-chinese \ + --truncation_strategy right # curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{ # "model": "bert-base-chinese", -# "messages": [{"role": "user", "content": "Task: Sentiment Classification\nSentence: 包装差,容易被调包。\nCategory: negative, positive\nOutput:"}] +# "messages": [{"role": "user", "content": "包装差,容易被调包。"}] # }' diff --git a/examples/train/seq_cls/bert/infer.sh b/examples/train/seq_cls/bert/infer.sh index abd8f1f02a..13bc1a1fd3 100644 --- a/examples/train/seq_cls/bert/infer.sh +++ b/examples/train/seq_cls/bert/infer.sh @@ -2,4 +2,5 @@ CUDA_VISIBLE_DEVICES=0 \ swift infer \ --model output/vx-xxx/checkpoint-xxx \ --load_data_args true \ - --max_batch_size 16 + --max_batch_size 16 \ + --truncation_strategy right diff --git a/examples/train/seq_cls/qwen2_5/deploy.sh b/examples/train/seq_cls/qwen2_5/deploy.sh index 5476dae499..3bc08c297c 100644 --- a/examples/train/seq_cls/qwen2_5/deploy.sh +++ b/examples/train/seq_cls/qwen2_5/deploy.sh @@ -4,5 +4,5 @@ swift deploy \ # curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{ # "model": "Qwen2.5-7B", -# "messages": [{"role": "user", "content": "Task: Sentiment Classification\nSentence: 包装差,容易被调包。\nCategory: negative, positive\nOutput:"}] +# "messages": [{"role": "user", "content": "包装差,容易被调包。"}] # }' diff --git a/examples/train/seq_cls/qwen2_5/sft.sh b/examples/train/seq_cls/qwen2_5/sft.sh index 067c6664eb..fe33ee3729 100644 --- a/examples/train/seq_cls/qwen2_5/sft.sh +++ b/examples/train/seq_cls/qwen2_5/sft.sh @@ -1,8 +1,9 @@ # If `num_labels` is provided, it will be considered a classification task, # and AutoModelForSequenceClassification will be used to load the model. +# You can also specify `--model Qwen/Qwen2.5-0.5B-Instruct --use_chat_template true`. CUDA_VISIBLE_DEVICES=0 \ swift sft \ - --model Qwen/Qwen2.5-7B \ + --model Qwen/Qwen2.5-0.5B \ --train_type lora \ --dataset 'DAMO_NLP/jd:cls#2000' \ --torch_dtype bfloat16 \ diff --git a/swift/llm/__init__.py b/swift/llm/__init__.py index 8397076413..8279dca957 100644 --- a/swift/llm/__init__.py +++ b/swift/llm/__init__.py @@ -57,10 +57,9 @@ 'load_by_unsloth', 'git_clone_github', 'get_matched_model_meta' ], 'dataset': [ - 'AlpacaPreprocessor', 'ClsPreprocessor', 'ComposePreprocessor', 'MessagesPreprocessor', 'DATASET_MAPPING', - 'MediaResource', 'register_dataset', 'register_dataset_info', 'EncodePreprocessor', 'LazyLLMDataset', - 'ConstantLengthDataset', 'standard_keys', 'load_dataset', 'DATASET_TYPE', 'sample_dataset', - 'RowPreprocessor', 'ResponsePreprocessor', 'DatasetMeta' + 'AlpacaPreprocessor', 'MessagesPreprocessor', 'DATASET_MAPPING', 'MediaResource', 'register_dataset', + 'register_dataset_info', 'EncodePreprocessor', 'LazyLLMDataset', 'ConstantLengthDataset', 'standard_keys', + 'load_dataset', 'DATASET_TYPE', 'sample_dataset', 'RowPreprocessor', 'ResponsePreprocessor', 'DatasetMeta' ], 'utils': [ 'deep_getattr', 'to_device', 'History', 'Messages', 'history_to_messages', 'messages_to_history', diff --git a/swift/llm/argument/base_args/base_args.py b/swift/llm/argument/base_args/base_args.py index 4ca469ec03..086f6349e6 100644 --- a/swift/llm/argument/base_args/base_args.py +++ b/swift/llm/argument/base_args/base_args.py @@ -252,6 +252,4 @@ def get_model_processor(self, *, model=None, model_type=None, model_revision=Non kwargs['model_type'] = model_type or self.model_type kwargs['model_revision'] = model_revision or self.model_revision - model, processor = get_model_tokenizer(**kwargs) - model.model_info.task_type = self.task_type - return model, processor + return get_model_tokenizer(**kwargs) diff --git a/swift/llm/argument/base_args/model_args.py b/swift/llm/argument/base_args/model_args.py index 2f60e86b2d..5cc0bbbbf3 100644 --- a/swift/llm/argument/base_args/model_args.py +++ b/swift/llm/argument/base_args/model_args.py @@ -154,7 +154,6 @@ def get_model_kwargs(self): 'rope_scaling': self.rope_scaling, } if self.task_type == 'seq_cls': - from transformers import AutoModelForSequenceClassification - kwargs['automodel_class'] = AutoModelForSequenceClassification + kwargs['task_type'] = self.task_type kwargs['model_kwargs'] = {'num_labels': self.num_labels} return kwargs diff --git a/swift/llm/dataset/dataset/llm.py b/swift/llm/dataset/dataset/llm.py index 56af8a47e6..6b035ca7d9 100644 --- a/swift/llm/dataset/dataset/llm.py +++ b/swift/llm/dataset/dataset/llm.py @@ -4,8 +4,8 @@ from functools import partial from typing import Any, Dict, List, Optional, Tuple, Union -from ..preprocessor import (AlpacaPreprocessor, ClsPreprocessor, MessagesPreprocessor, ResponsePreprocessor, - RowPreprocessor, TextGenerationPreprocessor) +from ..preprocessor import (AlpacaPreprocessor, ClsGenerationPreprocessor, ClsPreprocessor, MessagesPreprocessor, + ResponsePreprocessor, RowPreprocessor, TextGenerationPreprocessor) from ..register import DatasetMeta, SubsetDataset, register_dataset @@ -165,24 +165,13 @@ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: ms_dataset_id='modelscope/clue', hf_dataset_id='clue', subsets=['cmnli'], - preprocess_func=ClsPreprocessor(['neutral', 'entailment', 'contradiction'], - task='Natural Language Inference', - is_pair_seq=True), + preprocess_func=ClsGenerationPreprocessor(['neutral', 'entailment', 'contradiction'], + task='Natural Language Inference', + is_pair_seq=True), tags=['text-generation', 'classification'], split=['train', 'validation'], )) - -class JdClsPreprocessor(ClsPreprocessor): - - def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: - label = int(row['label']) - res = super().preprocess(row) - res['messages'].pop() - res['label'] = label - return res - - register_dataset( DatasetMeta( ms_dataset_id='DAMO_NLP/jd', @@ -190,15 +179,13 @@ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: SubsetDataset( 'default', 'default', - preprocess_func=ClsPreprocessor(['negative', 'positive'], - task='Sentiment Classification', - is_pair_seq=False)), + preprocess_func=ClsGenerationPreprocessor(['negative', 'positive'], + task='Sentiment Classification', + is_pair_seq=False)), SubsetDataset( 'cls', 'default', - preprocess_func=JdClsPreprocessor(['negative', 'positive'], - task='Sentiment Classification', - is_pair_seq=False), + preprocess_func=ClsPreprocessor(columns_mapping={'sentence': 'query'}), ), ], tags=['text-generation', 'classification', '🔥'], diff --git a/swift/llm/dataset/preprocessor/__init__.py b/swift/llm/dataset/preprocessor/__init__.py index 61b2f8cbcc..f9c5587bfa 100644 --- a/swift/llm/dataset/preprocessor/__init__.py +++ b/swift/llm/dataset/preprocessor/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from .core import (DATASET_TYPE, AlpacaPreprocessor, AutoPreprocessor, MessagesPreprocessor, ResponsePreprocessor, - RowPreprocessor, get_features_dataset, standard_keys) -from .extra import ClsPreprocessor, GroundingMixin, TextGenerationPreprocessor +from .core import (DATASET_TYPE, AlpacaPreprocessor, AutoPreprocessor, ClsPreprocessor, MessagesPreprocessor, + ResponsePreprocessor, RowPreprocessor, get_features_dataset, standard_keys) +from .extra import ClsGenerationPreprocessor, GroundingMixin, TextGenerationPreprocessor diff --git a/swift/llm/dataset/preprocessor/core.py b/swift/llm/dataset/preprocessor/core.py index 1f42f5f4a1..1a47f44a88 100644 --- a/swift/llm/dataset/preprocessor/core.py +++ b/swift/llm/dataset/preprocessor/core.py @@ -440,6 +440,14 @@ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: return row +class ClsPreprocessor(ResponsePreprocessor): + + def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]: + res = super().preprocess(row) + res['label'] = int(res['label']) + return res + + class AutoPreprocessor: def __init__(self, *, columns_mapping: Optional[Dict[str, str]] = None, **kwargs) -> None: diff --git a/swift/llm/dataset/preprocessor/extra.py b/swift/llm/dataset/preprocessor/extra.py index 06f64d104f..aa7ec4f72f 100644 --- a/swift/llm/dataset/preprocessor/extra.py +++ b/swift/llm/dataset/preprocessor/extra.py @@ -70,7 +70,7 @@ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]: return super().preprocess(row) -class ClsPreprocessor(ResponsePreprocessor): +class ClsGenerationPreprocessor(ResponsePreprocessor): def __init__(self, labels: List[str], diff --git a/swift/llm/infer/infer.py b/swift/llm/infer/infer.py index 431f4868a0..f4b2c035fc 100644 --- a/swift/llm/infer/infer.py +++ b/swift/llm/infer/infer.py @@ -185,15 +185,21 @@ def infer_dataset(self) -> List[Dict[str, Any]]: if is_dist: val_dataset = val_dataset.shard(args.global_world_size, args.rank, contiguous=True) val_dataset = list(val_dataset) - labels_list = [InferRequest.remove_response(data['messages']) for data in val_dataset] + labels_list = [] + for data in val_dataset: + if args.task_type == 'causal_lm': + labels = InferRequest.remove_response(data['messages']) + else: + labels = data.pop('label', None) + if labels is not None: + labels = str(int(labels)) + labels_list.append(labels) resp_list = self.infer( val_dataset, request_config, template=self.template, use_tqdm=True, **self.infer_kwargs) for data, resp, labels in zip(val_dataset, resp_list, labels_list): response = resp.choices[0].message.content - if labels: - data['labels'] = labels - data = {'response': response, 'logprobs': resp.choices[0].logprobs, **data} + data = {'response': response, 'labels': labels, 'logprobs': resp.choices[0].logprobs, **data} result_list.append(data) if is_dist: total_result_list = [None for _ in range(args.global_world_size)] if args.rank == 0 else None diff --git a/swift/llm/infer/infer_engine/pt_engine.py b/swift/llm/infer/infer_engine/pt_engine.py index 070d8c85b1..609be33429 100644 --- a/swift/llm/infer/infer_engine/pt_engine.py +++ b/swift/llm/infer/infer_engine/pt_engine.py @@ -79,7 +79,6 @@ def __init__( for adapter in self.adapters: self._add_adapter(safe_snapshot_download(adapter, use_hf=use_hf, hub_token=hub_token)) self._post_init() - self.task_type = 'causal_lm' def _post_init(self): super()._post_init() @@ -97,7 +96,6 @@ def from_model_template(cls, model, template=None, *, max_batch_size: int = 1): self.processor = template.processor self.max_batch_size = max_batch_size self._post_init() - self.task_type = self.model_info.task_type return self def _prepare_generation_config(self, request_config: RequestConfig) -> _GenerationConfig: @@ -279,7 +277,7 @@ def _infer_seq_cls(self, if adapter_names is not None: call_kwargs['adapter_names'] = adapter_names num_prompt_tokens = self._get_num_tokens(inputs) - inputs.pop('labels') + inputs.pop('labels', None) logits = self.model(**inputs, **call_kwargs).logits if logits.shape[-1] > 1: preds = torch.argmax(logits, dim=-1).tolist() @@ -397,7 +395,7 @@ def _infer( template.model = self.model generation_config = None - if self.task_type == 'seq_cls': + if self.model_info.task_type == 'seq_cls': template.set_mode('seq_cls') else: template.set_mode('pt') @@ -414,7 +412,7 @@ def _infer( inputs = to_device(template.data_collator(batched_inputs), self.model.device) if self.model.model_meta.is_multimodal: _, inputs = template.pre_forward_hook(self.model, None, inputs) - if self.task_type != 'seq_cls': + if self.model_info.task_type == 'causal_lm': self.set_default_max_tokens(request_config, inputs) generation_config = self._prepare_generation_config(request_config) self._add_stop_words(generation_config, request_config, template) diff --git a/swift/llm/model/model/internlm.py b/swift/llm/model/model/internlm.py index 868fa25cb8..63faa20b8f 100644 --- a/swift/llm/model/model/internlm.py +++ b/swift/llm/model/model/internlm.py @@ -347,4 +347,4 @@ def get_model_tokenizer_xcomposer_ol(model_dir, *args, **kwargs): get_model_tokenizer_reward_model, requires=['transformers>=4.38'], architectures=['InternLM2ForRewardModel'], - tags=['reward_model'])) + task_type='seq_cls')) diff --git a/swift/llm/model/register.py b/swift/llm/model/register.py index 46da0015d4..f4e94b0291 100644 --- a/swift/llm/model/register.py +++ b/swift/llm/model/register.py @@ -8,8 +8,8 @@ import torch from peft import PeftModel -from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, - PretrainedConfig, PreTrainedModel, PreTrainedTokenizerBase) +from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM, AutoModelForSequenceClassification, + AutoTokenizer, GenerationConfig, PretrainedConfig, PreTrainedModel, PreTrainedTokenizerBase) from transformers.integrations import is_deepspeed_zero3_enabled from transformers.utils import is_torch_bf16_gpu_available, is_torch_cuda_available, is_torch_npu_available, strtobool from transformers.utils.versions import require_version @@ -62,6 +62,7 @@ class ModelMeta: # Additional files that need to be saved for full parameter training/merge-lora. additional_saved_files: List[str] = field(default_factory=list) torch_dtype: Optional[torch.dtype] = None + task_type: Literal['causal_lm', 'seq_cls', None] = None # File patterns to ignore when downloading the model. ignore_patterns: List[str] = field(default_factory=list) @@ -158,7 +159,9 @@ def get_model_tokenizer_from_local(model_dir: str, automodel_class=None, **kwargs): """Load the model and tokenizer from the local model_dir.""" - automodel_class = automodel_class or AutoModelForCausalLM + automodel_class_mapping = {'seq_cls': AutoModelForSequenceClassification, 'causal_lm': AutoModelForCausalLM} + if automodel_class is None: + automodel_class = automodel_class_mapping[model_info.task_type] if model_config is None: model_config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) # fix prediction_step (internvl2, ovis, ...) @@ -363,6 +366,7 @@ def get_model_info_meta( # model kwargs model_type: Optional[str] = None, quantization_config=None, + task_type=None, **kwargs) -> Tuple[ModelInfo, ModelMeta]: model_meta = get_matched_model_meta(model_id_or_path) model_dir = safe_snapshot_download( @@ -389,6 +393,7 @@ def get_model_info_meta( logger.info(f'Setting torch_dtype: {torch_dtype}') _check_torch_dtype(torch_dtype) model_info.torch_dtype = torch_dtype + model_info.task_type = task_type or model_meta.task_type model_meta.check_requires(model_info) return model_info, model_meta @@ -411,6 +416,7 @@ def get_model_tokenizer( attn_impl: Literal['flash_attn', 'sdpa', 'eager', None] = None, rope_scaling: Optional[Dict[str, Any]] = None, automodel_class=None, + task_type: Literal['causal_lm', 'seq_cls'] = 'causal_lm', model_kwargs: Optional[Dict[str, Any]] = None, **kwargs) -> Tuple[Optional[PreTrainedModel], PreTrainedTokenizerBase]: """ @@ -439,7 +445,8 @@ def get_model_tokenizer( revision=revision, download_model=download_model, model_type=model_type, - quantization_config=quantization_config) + quantization_config=quantization_config, + task_type=task_type) if not use_torchacc() and device_map is None: device_map = get_default_device_map() diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py index 940f3df642..d8c13e6ccb 100644 --- a/swift/llm/template/base.py +++ b/swift/llm/template/base.py @@ -185,6 +185,7 @@ def _kto_encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: def _seq_cls_encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: encoded = self._encode(inputs) + encoded.pop('labels', None) if inputs.label is not None: encoded['labels'] = int(inputs.label) return encoded @@ -205,6 +206,9 @@ def encode(self, elif isinstance(inputs, StdTemplateInputs): inputs = deepcopy(inputs) + if not self.is_training: + InferRequest.remove_response(inputs.messages) + assert isinstance(inputs, StdTemplateInputs) self._preprocess_inputs(inputs) if self.mode in {'vllm', 'lmdeploy'}: @@ -677,7 +681,14 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: def debug_logger(self, inputs): if not strtobool(os.getenv('SWIFT_DEBUG', 'false')): return - self.print_inputs(inputs) + if 'input_ids' in inputs: + k = 'input_ids' + val = inputs['input_ids'] + else: + k = 'generate_ids' + val = inputs['generate_ids'] + for v in val: + self.print_inputs({k: v.tolist()}) def get_generate_ids(self, generate_ids: Union[torch.Tensor, List[int]], num_prompt_tokens: int) -> Union[torch.Tensor, List[int]]: @@ -937,7 +948,9 @@ def _torchacc_xtuner_data_collator(self, res, padding_to, tokenizer, padding_sid def print_inputs(self, inputs: Dict[str, Any], tokenizer_kwargs: Optional[Dict[str, Any]] = None) -> None: if tokenizer_kwargs is None: tokenizer_kwargs = {} - for key in ['input', 'labels', 'generate_ids', 'chosen_input', 'chosen_labels', 'rejected_input', 'rejected_labels']: + for key in [ + 'input', 'labels', 'generate', 'chosen_input', 'chosen_labels', 'rejected_input', 'rejected_labels' + ]: val = inputs.get(key) # fix val is a tensor if val is None: val = inputs.get(f'{key}_ids') diff --git a/swift/llm/template/template/internlm.py b/swift/llm/template/template/internlm.py index 4d596d8a14..fb4e9682fa 100644 --- a/swift/llm/template/template/internlm.py +++ b/swift/llm/template/template/internlm.py @@ -34,8 +34,7 @@ register_template(ChatmlTemplateMeta(LLMTemplateType.internlm2, default_system=INTERNLM_SYSTEM)) -register_template( - ChatmlTemplateMeta(RMTemplateType.internlm2_reward, suffix=['<|im_end|>\n<|reward|>'])) +register_template(ChatmlTemplateMeta(RMTemplateType.internlm2_reward, suffix=['<|im_end|>\n<|reward|>'])) class InternLMXComposer2Template(Template): diff --git a/swift/llm/template/template_inputs.py b/swift/llm/template/template_inputs.py index 1f6734dde4..81aba5716b 100644 --- a/swift/llm/template/template_inputs.py +++ b/swift/llm/template/template_inputs.py @@ -46,7 +46,6 @@ def __post_init__(self): if isinstance(val, str): setattr(self, key, [val]) assert isinstance(self.messages, list), f'messages: {self.messages}' - self.remove_response(self.messages) @staticmethod def remove_response(messages) -> Optional[str]: diff --git a/tests/test_align/test_cls.py b/tests/test_align/test_cls.py new file mode 100644 index 0000000000..6ed752d68a --- /dev/null +++ b/tests/test_align/test_cls.py @@ -0,0 +1,60 @@ +import os +from pprint import pprint + +import torch + +os.environ['CUDA_VISIBLE_DEVICES'] = '1' +kwargs = { + 'per_device_train_batch_size': 4, + 'per_device_eval_batch_size': 4, + 'gradient_accumulation_steps': 4, + 'num_train_epochs': 1, + 'save_steps': 100, + 'max_length': 512, + 'task_type': 'seq_cls', + 'num_labels': 2, +} + + +def calc_acc(infer_result): + n_correct = 0 + for res in infer_result: + if res['response'] == res['labels']: + n_correct += 1 + return f'acc: {n_correct/len(infer_result)}, n_correct: {n_correct}, len(res): {len(infer_result)}' + + +def test_llm(): + from swift.llm import sft_main, TrainArguments, infer_main, InferArguments, Template + res = [] + for model in ['Qwen/Qwen2.5-0.5B-Instruct', 'Qwen/Qwen2.5-0.5B', 'AI-ModelScope/bert-base-chinese']: + dataset = ['DAMO_NLP/jd:cls#2000'] + result = sft_main(TrainArguments(model=model, dataset=dataset, split_dataset_ratio=0.1, **kwargs)) + last_model_checkpoint = result['last_model_checkpoint'] + infer_result = infer_main( + InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, truncation_strategy='right')) + res.append(calc_acc(infer_result)) + infer_result2 = infer_main( + InferArguments( + ckpt_dir=last_model_checkpoint, load_data_args=True, max_batch_size=16, truncation_strategy='right')) + res.append(calc_acc(infer_result2)) + + model = 'Qwen/Qwen2.5-0.5B-Instruct' + dataset = ['DAMO_NLP/jd#2000'] + train_kwargs = kwargs.copy() + train_kwargs.pop('task_type') + train_kwargs.pop('num_labels') + result = sft_main(TrainArguments(model=model, dataset=dataset, split_dataset_ratio=0.1, **train_kwargs)) + last_model_checkpoint = result['last_model_checkpoint'] + infer_result = infer_main( + InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, truncation_strategy='right')) + res.append(calc_acc(infer_result)) + infer_result2 = infer_main( + InferArguments( + ckpt_dir=last_model_checkpoint, load_data_args=True, max_batch_size=16, truncation_strategy='right')) + res.append(calc_acc(infer_result2)) + pprint(res) + + +if __name__ == '__main__': + test_llm() diff --git a/tests/test_align/test_template/test_llm.py b/tests/test_align/test_template/test_llm.py index af6ba95ad2..e8e23fa353 100644 --- a/tests/test_align/test_template/test_llm.py +++ b/tests/test_align/test_template/test_llm.py @@ -177,7 +177,6 @@ def test_internlm2_reward(): pt_engine.default_template.template_backend = 'jinja' res2 = _infer_model(pt_engine, messages=messages) assert res == res2 == '0.48681640625' - print() if __name__ == '__main__': From 16c8c00dedd43f2e3c802f7911d77cf7cb964623 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Jan 2025 21:41:26 +0800 Subject: [PATCH 23/47] update --- examples/deploy/lora/client.py | 10 ++-- examples/deploy/lora/server.sh | 2 +- examples/export/push_to_hub.sh | 3 +- examples/infer/demo_bert.py | 49 +++++++++++++++++++ .../infer/{demo_lora.py => demo_multilora.py} | 8 +-- .../{demo_rm.py => demo_reward_model.py} | 0 examples/infer/pt/bert.sh | 6 +++ examples/infer/pt/reward_model.sh | 3 ++ examples/train/seq_cls/bert/deploy.sh | 2 +- examples/train/seq_cls/bert/infer.sh | 2 +- examples/train/seq_cls/bert/sft.sh | 6 ++- examples/train/seq_cls/qwen2_5/deploy.sh | 2 +- swift/hub/hub.py | 2 +- swift/llm/model/register.py | 2 +- 14 files changed, 81 insertions(+), 16 deletions(-) create mode 100644 examples/infer/demo_bert.py rename examples/infer/{demo_lora.py => demo_multilora.py} (90%) rename examples/infer/{demo_rm.py => demo_reward_model.py} (100%) create mode 100644 examples/infer/pt/bert.sh create mode 100644 examples/infer/pt/reward_model.sh diff --git a/examples/deploy/lora/client.py b/examples/deploy/lora/client.py index 37ce773cf1..e61caad8ae 100644 --- a/examples/deploy/lora/client.py +++ b/examples/deploy/lora/client.py @@ -7,18 +7,18 @@ def infer_multilora(engine: InferClient, infer_request: InferRequest): print(f'models: {models}') request_config = RequestConfig(max_tokens=512, temperature=0) - # use lora + # use lora1 resp_list = engine.infer([infer_request], request_config, model=models[1]) response = resp_list[0].choices[0].message.content - print(f'lora-response: {response}') + print(f'lora1-response: {response}') # origin model resp_list = engine.infer([infer_request], request_config, model=models[0]) response = resp_list[0].choices[0].message.content print(f'response: {response}') - # use lora - resp_list = engine.infer([infer_request], request_config, model=models[1]) + # use lora2 + resp_list = engine.infer([infer_request], request_config, model=models[2]) response = resp_list[0].choices[0].message.content - print(f'lora-response: {response}') + print(f'lora2-response: {response}') if __name__ == '__main__': diff --git a/examples/deploy/lora/server.sh b/examples/deploy/lora/server.sh index ff3605aead..e3e2d925b2 100644 --- a/examples/deploy/lora/server.sh +++ b/examples/deploy/lora/server.sh @@ -3,5 +3,5 @@ CUDA_VISIBLE_DEVICES=0 swift deploy \ --host 0.0.0.0 \ --port 8000 \ - --adapters swift-lora=swift/test_lora \ + --adapters lora1=swift/test_lora lora2=swift/test_lora2 \ --infer_backend vllm diff --git a/examples/export/push_to_hub.sh b/examples/export/push_to_hub.sh index c4bcef7421..9771dfe2fa 100644 --- a/examples/export/push_to_hub.sh +++ b/examples/export/push_to_hub.sh @@ -2,4 +2,5 @@ CUDA_VISIBLE_DEVICES=0 swift export \ --adapters output/vx-xxx/checkpoint-xxx \ --push_to_hub true \ --hub_model_id '' \ - --hub_token '' + --hub_token '' \ + --use_hf false diff --git a/examples/infer/demo_bert.py b/examples/infer/demo_bert.py new file mode 100644 index 0000000000..2f5881b0d2 --- /dev/null +++ b/examples/infer/demo_bert.py @@ -0,0 +1,49 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +from typing import List + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +def infer_batch(engine: 'InferEngine', infer_requests: List['InferRequest']): + request_config = RequestConfig(max_tokens=512, temperature=0) + resp_list = engine.infer(infer_requests, request_config) + query0 = infer_requests[0].messages[0]['content'] + print(f'query0: {query0}') + print(f'response0: {resp_list[0].choices[0].message.content}') + print(f'query1: {query1}') + print(f'response1: {resp_list[1].choices[0].message.content}') + + +if __name__ == '__main__': + # This is an example of BERT with LoRA. + from swift.llm import (InferEngine, InferRequest, PtEngine, RequestConfig, load_dataset, safe_snapshot_download, + BaseArguments) + from swift.tuners import Swift + adapter_path = safe_snapshot_download('swift/test_bert') + args = BaseArguments.from_pretrained(adapter_path) + # method1 + model, processor = args.get_model_processor() + model = Swift.from_pretrained(model, adapter_path) + template = args.get_template(engine.processor) + engine = PtEngine.from_model_template(model, template, max_batch_size=64) + + # method2 + # engine = PtEngine(args.model, adapters=[adapter_path], max_batch_size=64, task_type=args.task_type) + # template = args.get_template(engine.processor) + # engine.default_template = template + + # Here, `load_dataset` is used for convenience; `infer_batch` does not require creating a dataset. + dataset = load_dataset(['DAMO_NLP/jd:cls#1000'], seed=42)[0] + print(f'dataset: {dataset}') + infer_requests = [InferRequest(messages=data['messages']) for data in dataset] + infer_batch(engine, infer_requests) + + infer_batch(engine, + [InferRequest(messages=[{ + 'role': 'user', + 'content': '今天天气真好呀' + }, { + 'role': 'user', + 'content': '真倒霉' + }])]) diff --git a/examples/infer/demo_lora.py b/examples/infer/demo_multilora.py similarity index 90% rename from examples/infer/demo_lora.py rename to examples/infer/demo_multilora.py index 0004c8577c..f75ef3c0fb 100644 --- a/examples/infer/demo_lora.py +++ b/examples/infer/demo_multilora.py @@ -7,6 +7,7 @@ def infer_multilora(infer_request: 'InferRequest', infer_backend: Literal['vllm', 'pt']): # Dynamic LoRA adapter_path = safe_snapshot_download('swift/test_lora') + adapter_path2 = safe_snapshot_download('swift/test_lora2') args = BaseArguments.from_pretrained(adapter_path) if infer_backend == 'pt': engine = PtEngine(args.model) @@ -16,19 +17,20 @@ def infer_multilora(infer_request: 'InferRequest', infer_backend: Literal['vllm' template = get_template(args.template, engine.processor, args.system) request_config = RequestConfig(max_tokens=512, temperature=0) adapter_request = AdapterRequest('lora1', adapter_path) + adapter_request2 = AdapterRequest('lora2', adapter_path2) # use lora resp_list = engine.infer([infer_request], request_config, template=template, adapter_request=adapter_request) response = resp_list[0].choices[0].message.content - print(f'lora-response: {response}') + print(f'lora1-response: {response}') # origin model resp_list = engine.infer([infer_request], request_config) response = resp_list[0].choices[0].message.content print(f'response: {response}') # use lora - resp_list = engine.infer([infer_request], request_config, template=template, adapter_request=adapter_request) + resp_list = engine.infer([infer_request], request_config, template=template, adapter_request=adapter_request2) response = resp_list[0].choices[0].message.content - print(f'lora-response: {response}') + print(f'lora2-response: {response}') def infer_pt(infer_request: 'InferRequest'): diff --git a/examples/infer/demo_rm.py b/examples/infer/demo_reward_model.py similarity index 100% rename from examples/infer/demo_rm.py rename to examples/infer/demo_reward_model.py diff --git a/examples/infer/pt/bert.sh b/examples/infer/pt/bert.sh new file mode 100644 index 0000000000..914679246d --- /dev/null +++ b/examples/infer/pt/bert.sh @@ -0,0 +1,6 @@ +# Since `swift/test_lora` is trained by swift and contains an `args.json` file, +# there is no need to explicitly set `--model`, `--system`, etc., as they will be automatically read. +# To disable this behavior, please set `--load_args false`. +CUDA_VISIBLE_DEVICES=0 \ +swift infer \ + --adapters swift/test_bert diff --git a/examples/infer/pt/reward_model.sh b/examples/infer/pt/reward_model.sh new file mode 100644 index 0000000000..3ad2b3f56f --- /dev/null +++ b/examples/infer/pt/reward_model.sh @@ -0,0 +1,3 @@ +CUDA_VISIBLE_DEVICES=0 \ +swift infer \ + --model Shanghai_AI_Laboratory/internlm2-1_8b-reward diff --git a/examples/train/seq_cls/bert/deploy.sh b/examples/train/seq_cls/bert/deploy.sh index c2102b6932..68021a695d 100644 --- a/examples/train/seq_cls/bert/deploy.sh +++ b/examples/train/seq_cls/bert/deploy.sh @@ -1,6 +1,6 @@ CUDA_VISIBLE_DEVICES=0 \ swift deploy \ - --model output/vx-xxx/checkpoint-xxx \ + --adapters output/vx-xxx/checkpoint-xxx \ --served_model_name bert-base-chinese \ --truncation_strategy right diff --git a/examples/train/seq_cls/bert/infer.sh b/examples/train/seq_cls/bert/infer.sh index 13bc1a1fd3..e38f2955ef 100644 --- a/examples/train/seq_cls/bert/infer.sh +++ b/examples/train/seq_cls/bert/infer.sh @@ -1,6 +1,6 @@ CUDA_VISIBLE_DEVICES=0 \ swift infer \ - --model output/vx-xxx/checkpoint-xxx \ + --adapters output/vx-xxx/checkpoint-xxx \ --load_data_args true \ --max_batch_size 16 \ --truncation_strategy right diff --git a/examples/train/seq_cls/bert/sft.sh b/examples/train/seq_cls/bert/sft.sh index 35081e0afc..538e74337b 100644 --- a/examples/train/seq_cls/bert/sft.sh +++ b/examples/train/seq_cls/bert/sft.sh @@ -4,19 +4,23 @@ CUDA_VISIBLE_DEVICES=0 \ swift sft \ --model AI-ModelScope/bert-base-chinese \ - --train_type full \ + --train_type lora \ --dataset 'DAMO_NLP/jd:cls#2000' \ --torch_dtype bfloat16 \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ --learning_rate 1e-4 \ + --lora_rank 8 \ + --lora_alpha 32 \ + --target_modules all-linear \ --gradient_accumulation_steps 16 \ --eval_steps 50 \ --save_steps 50 \ --save_total_limit 2 \ --logging_steps 5 \ --max_length 512 \ + --truncation_strategy right \ --output_dir output \ --warmup_ratio 0.05 \ --dataloader_num_workers 4 \ diff --git a/examples/train/seq_cls/qwen2_5/deploy.sh b/examples/train/seq_cls/qwen2_5/deploy.sh index 3bc08c297c..71627c0ff8 100644 --- a/examples/train/seq_cls/qwen2_5/deploy.sh +++ b/examples/train/seq_cls/qwen2_5/deploy.sh @@ -3,6 +3,6 @@ swift deploy \ --adapters output/vx-xxx/checkpoint-xxx # curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{ -# "model": "Qwen2.5-7B", +# "model": "Qwen2.5-0.5B", # "messages": [{"role": "user", "content": "包装差,容易被调包。"}] # }' diff --git a/swift/hub/hub.py b/swift/hub/hub.py index 704cfa39ed..0b2297d68c 100644 --- a/swift/hub/hub.py +++ b/swift/hub/hub.py @@ -273,7 +273,7 @@ def push_to_hub(cls, token or cls.ms_token, private, commit_message=commit_message, - ignore_patterns=ignore_patterns, + ignore_file_pattern=ignore_patterns, revision=revision, tag=path_in_repo) diff --git a/swift/llm/model/register.py b/swift/llm/model/register.py index f4e94b0291..b48203329b 100644 --- a/swift/llm/model/register.py +++ b/swift/llm/model/register.py @@ -393,7 +393,7 @@ def get_model_info_meta( logger.info(f'Setting torch_dtype: {torch_dtype}') _check_torch_dtype(torch_dtype) model_info.torch_dtype = torch_dtype - model_info.task_type = task_type or model_meta.task_type + model_info.task_type = model_meta.task_type or task_type model_meta.check_requires(model_info) return model_info, model_meta From 368b2ef5e8b7cc7bade20c58205e372e93d66569 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 3 Jan 2025 11:37:00 +0800 Subject: [PATCH 24/47] update --- examples/deploy/bert/client.py | 29 +++++++++++++++++++ examples/deploy/bert/server.sh | 11 +++++++ examples/deploy/reward_model/client.py | 18 ++++++++++++ examples/deploy/reward_model/server.sh | 5 ++++ examples/infer/demo_bert.py | 26 ++++++++++------- examples/infer/demo_reward_model.py | 6 ++-- examples/infer/pt/bert.sh | 4 ++- examples/infer/pt/reward_model.sh | 4 ++- examples/train/seq_cls/bert/deploy.sh | 3 +- examples/train/seq_cls/bert/infer.sh | 3 +- swift/llm/argument/base_args/model_args.py | 3 +- swift/llm/argument/base_args/template_args.py | 3 -- swift/llm/infer/infer.py | 16 +++++++--- swift/llm/model/utils.py | 2 +- swift/llm/template/base.py | 2 ++ 15 files changed, 107 insertions(+), 28 deletions(-) create mode 100644 examples/deploy/bert/client.py create mode 100644 examples/deploy/bert/server.sh create mode 100644 examples/deploy/reward_model/client.py create mode 100644 examples/deploy/reward_model/server.sh diff --git a/examples/deploy/bert/client.py b/examples/deploy/bert/client.py new file mode 100644 index 0000000000..0387b10233 --- /dev/null +++ b/examples/deploy/bert/client.py @@ -0,0 +1,29 @@ +from typing import List + +from swift.llm import InferClient, InferRequest, RequestConfig + + +def infer_batch(engine: 'InferEngine', infer_requests: List[InferRequest]): + resp_list = engine.infer(infer_requests) + query0 = infer_requests[0].messages[0]['content'] + query1 = infer_requests[1].messages[0]['content'] + print(f'query0: {query0}') + print(f'response0: {resp_list[0].choices[0].message.content}') + print(f'query1: {query1}') + print(f'response1: {resp_list[1].choices[0].message.content}') + + +if __name__ == '__main__': + engine = InferClient(host='127.0.0.1', port=8000) + models = engine.models + print(f'models: {models}') + infer_batch(engine, [ + InferRequest(messages=[{ + 'role': 'user', + 'content': '今天天气真好呀' + }]), + InferRequest(messages=[{ + 'role': 'user', + 'content': '真倒霉' + }]) + ]) diff --git a/examples/deploy/bert/server.sh b/examples/deploy/bert/server.sh new file mode 100644 index 0000000000..07208b50af --- /dev/null +++ b/examples/deploy/bert/server.sh @@ -0,0 +1,11 @@ +# Since `swift/test_lora` is trained by swift and contains an `args.json` file, +# there is no need to explicitly set `--model`, `--system`, etc., as they will be automatically read. +CUDA_VISIBLE_DEVICES=0 swift deploy \ + --host 0.0.0.0 \ + --port 8000 \ + --adapters swift/test_bert \ + --infer_backend pt \ + --truncation_strategy right \ + --max_length 512 \ + --served_model_name bert-base-chinese + diff --git a/examples/deploy/reward_model/client.py b/examples/deploy/reward_model/client.py new file mode 100644 index 0000000000..07f74b11b9 --- /dev/null +++ b/examples/deploy/reward_model/client.py @@ -0,0 +1,18 @@ +from typing import List + +from swift.llm import InferClient, InferRequest, RequestConfig + +if __name__ == '__main__': + engine = InferClient(host='127.0.0.1', port=8000) + models = engine.models + print(f'models: {models}') + messages = [{ + 'role': 'user', + 'content': "Hello! What's your name?" + }, { + 'role': 'assistant', + 'content': 'My name is InternLM2! A helpful AI assistant. What can I do for you?' + }] + resp_list = engine.infer([InferRequest(messages=messages)]) + print(f'messages: {messages}') + print(f'response: {resp_list[0].choices[0].message.content}') diff --git a/examples/deploy/reward_model/server.sh b/examples/deploy/reward_model/server.sh new file mode 100644 index 0000000000..53f70e3b79 --- /dev/null +++ b/examples/deploy/reward_model/server.sh @@ -0,0 +1,5 @@ +CUDA_VISIBLE_DEVICES=0 swift deploy \ + --host 0.0.0.0 \ + --port 8000 \ + --model Shanghai_AI_Laboratory/internlm2-1_8b-reward \ + --infer_backend pt diff --git a/examples/infer/demo_bert.py b/examples/infer/demo_bert.py index 2f5881b0d2..3d28d0aa76 100644 --- a/examples/infer/demo_bert.py +++ b/examples/infer/demo_bert.py @@ -6,9 +6,9 @@ def infer_batch(engine: 'InferEngine', infer_requests: List['InferRequest']): - request_config = RequestConfig(max_tokens=512, temperature=0) - resp_list = engine.infer(infer_requests, request_config) + resp_list = engine.infer(infer_requests) query0 = infer_requests[0].messages[0]['content'] + query1 = infer_requests[1].messages[0]['content'] print(f'query0: {query0}') print(f'response0: {resp_list[0].choices[0].message.content}') print(f'query1: {query1}') @@ -22,10 +22,12 @@ def infer_batch(engine: 'InferEngine', infer_requests: List['InferRequest']): from swift.tuners import Swift adapter_path = safe_snapshot_download('swift/test_bert') args = BaseArguments.from_pretrained(adapter_path) + args.max_length = 512 + args.truncation_strategy = 'right' # method1 model, processor = args.get_model_processor() model = Swift.from_pretrained(model, adapter_path) - template = args.get_template(engine.processor) + template = args.get_template(processor) engine = PtEngine.from_model_template(model, template, max_batch_size=64) # method2 @@ -39,11 +41,13 @@ def infer_batch(engine: 'InferEngine', infer_requests: List['InferRequest']): infer_requests = [InferRequest(messages=data['messages']) for data in dataset] infer_batch(engine, infer_requests) - infer_batch(engine, - [InferRequest(messages=[{ - 'role': 'user', - 'content': '今天天气真好呀' - }, { - 'role': 'user', - 'content': '真倒霉' - }])]) + infer_batch(engine, [ + InferRequest(messages=[{ + 'role': 'user', + 'content': '今天天气真好呀' + }]), + InferRequest(messages=[{ + 'role': 'user', + 'content': '真倒霉' + }]) + ]) diff --git a/examples/infer/demo_reward_model.py b/examples/infer/demo_reward_model.py index 7e32932a7c..91e66bb8b5 100644 --- a/examples/infer/demo_reward_model.py +++ b/examples/infer/demo_reward_model.py @@ -6,10 +6,8 @@ def infer_batch(engine: 'InferEngine', infer_requests: List['InferRequest']): - request_config = RequestConfig(max_tokens=512, temperature=0) - resp_list = engine.infer(infer_requests, request_config) - query0 = infer_requests[0].messages[0]['content'] - print(f'query0: {query0}') + resp_list = engine.infer(infer_requests) + print(f'messages0: {infer_requests[0].messages}') print(f'response0: {resp_list[0].choices[0].message.content}') diff --git a/examples/infer/pt/bert.sh b/examples/infer/pt/bert.sh index 914679246d..28fbc566ae 100644 --- a/examples/infer/pt/bert.sh +++ b/examples/infer/pt/bert.sh @@ -3,4 +3,6 @@ # To disable this behavior, please set `--load_args false`. CUDA_VISIBLE_DEVICES=0 \ swift infer \ - --adapters swift/test_bert + --adapters swift/test_bert \ + --truncation_strategy right \ + --max_length 512 diff --git a/examples/infer/pt/reward_model.sh b/examples/infer/pt/reward_model.sh index 3ad2b3f56f..2d0b63d140 100644 --- a/examples/infer/pt/reward_model.sh +++ b/examples/infer/pt/reward_model.sh @@ -1,3 +1,5 @@ CUDA_VISIBLE_DEVICES=0 \ swift infer \ - --model Shanghai_AI_Laboratory/internlm2-1_8b-reward + --model Shanghai_AI_Laboratory/internlm2-1_8b-reward \ + --val_dataset AI-ModelScope/alpaca-gpt4-data-zh#1000 \ + --max_batch_size 64 diff --git a/examples/train/seq_cls/bert/deploy.sh b/examples/train/seq_cls/bert/deploy.sh index 68021a695d..58be2ee93e 100644 --- a/examples/train/seq_cls/bert/deploy.sh +++ b/examples/train/seq_cls/bert/deploy.sh @@ -2,7 +2,8 @@ CUDA_VISIBLE_DEVICES=0 \ swift deploy \ --adapters output/vx-xxx/checkpoint-xxx \ --served_model_name bert-base-chinese \ - --truncation_strategy right + --truncation_strategy right \ + --max_length 512 # curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{ # "model": "bert-base-chinese", diff --git a/examples/train/seq_cls/bert/infer.sh b/examples/train/seq_cls/bert/infer.sh index e38f2955ef..c51124f1b0 100644 --- a/examples/train/seq_cls/bert/infer.sh +++ b/examples/train/seq_cls/bert/infer.sh @@ -3,4 +3,5 @@ swift infer \ --adapters output/vx-xxx/checkpoint-xxx \ --load_data_args true \ --max_batch_size 16 \ - --truncation_strategy right + --truncation_strategy right \ + --max_length 512 diff --git a/swift/llm/argument/base_args/model_args.py b/swift/llm/argument/base_args/model_args.py index 5cc0bbbbf3..924f32adcc 100644 --- a/swift/llm/argument/base_args/model_args.py +++ b/swift/llm/argument/base_args/model_args.py @@ -116,6 +116,7 @@ def _init_rope_scaling(self): def _init_model_info(self) -> torch.dtype: self.model_info, self.model_meta = get_model_info_meta(**self.get_model_kwargs()) + self.task_type = self.model_info.task_type self.model_dir = self.model_info.model_dir self.model_type = self.model_info.model_type if isinstance(self.rope_scaling, str): @@ -152,8 +153,8 @@ def get_model_kwargs(self): 'quantization_config': self.get_quantization_config(), 'attn_impl': self.attn_impl, 'rope_scaling': self.rope_scaling, + 'task_type': self.task_type, } if self.task_type == 'seq_cls': - kwargs['task_type'] = self.task_type kwargs['model_kwargs'] = {'num_labels': self.num_labels} return kwargs diff --git a/swift/llm/argument/base_args/template_args.py b/swift/llm/argument/base_args/template_args.py index 64ed6c1cd8..98c4d80a72 100644 --- a/swift/llm/argument/base_args/template_args.py +++ b/swift/llm/argument/base_args/template_args.py @@ -47,9 +47,6 @@ def __post_init__(self): if self.template is None and hasattr(self, 'model_meta'): self.template = self.model_meta.template - if self.max_length is None and hasattr(self, 'model_info'): - self.max_length = self.model_info.max_model_len - def get_template_kwargs(self): truncation_strategy = self.truncation_strategy if truncation_strategy == 'delete': diff --git a/swift/llm/infer/infer.py b/swift/llm/infer/infer.py index f4b2c035fc..271f025b17 100644 --- a/swift/llm/infer/infer.py +++ b/swift/llm/infer/infer.py @@ -138,10 +138,18 @@ def infer_cli(self) -> List[Dict[str, Any]]: infer_state.add_query(query) if args.model_meta.is_multimodal: infer_state.input_mm_data() - data = infer_state.to_dict() - response = self.infer_single(data, request_config) - infer_state.add_response(response) - data = {'response': response, **data} + if args.task_type == 'seq_cls' and args.num_labels in {None, 1}: + # reward model + response = infer_state.input_text() + infer_state.add_response(response) + data = infer_state.to_dict() + response = self.infer_single(data, request_config) + data = {'response': response, **data} + else: + data = infer_state.to_dict() + response = self.infer_single(data, request_config) + infer_state.add_response(response) + data = {'response': response, **data} result_list.append(data) if self.jsonl_writer: self.jsonl_writer.append(data) diff --git a/swift/llm/model/utils.py b/swift/llm/model/utils.py index efacbc91fb..21bf592c00 100644 --- a/swift/llm/model/utils.py +++ b/swift/llm/model/utils.py @@ -59,7 +59,7 @@ class ModelInfo: # extra config: Optional[PretrainedConfig] = None - task_type: Optional[str] = None + task_type: Literal['causal_lm', 'seq_cls', None] = None def __post_init__(self): from .register import get_model_name diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py index d8c13e6ccb..3648de64c4 100644 --- a/swift/llm/template/base.py +++ b/swift/llm/template/base.py @@ -79,6 +79,8 @@ def __init__( self.model_info = processor.model_info self.config = self.model_info.config self.model_meta = processor.model_meta + if max_length is None: + max_length = self.model_info.max_model_len tokenizer = self.tokenizer if not use_chat_template: From 9a783ddeab4ae293b1fd8946c86cddf9a7e404f5 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 3 Jan 2025 11:42:34 +0800 Subject: [PATCH 25/47] update --- tests/train/test_rm.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/train/test_rm.py b/tests/train/test_rm.py index 2d05c7e035..91a8fc6872 100644 --- a/tests/train/test_rm.py +++ b/tests/train/test_rm.py @@ -10,25 +10,26 @@ 'num_train_epochs': 1, } + def test_infer(): from swift.llm import infer_main, InferArguments - infer_main(InferArguments(model='Shanghai_AI_Laboratory/internlm2-1_8b-reward', - val_dataset='AI-ModelScope/alpaca-gpt4-data-zh#500')) + infer_main( + InferArguments( + model='Shanghai_AI_Laboratory/internlm2-1_8b-reward', val_dataset='AI-ModelScope/alpaca-gpt4-data-zh#500')) def test_llm(): from swift.llm import TrainArguments, sft_main, infer_main, InferArguments result = sft_main( TrainArguments( - model='Qwen/Qwen2.5-1.5B-Instruct', + model='Shanghai_AI_Laboratory/internlm2-1_8b-reward', train_type='lora', - num_labels=2, - dataset=['DAMO_NLP/jd:cls#2000'], + dataset=['hjh0119/shareAI-Llama3-DPO-zh-en-emoji:zh#100'], **kwargs)) last_model_checkpoint = result['last_model_checkpoint'] infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) if __name__ == '__main__': - test_infer() - # test_llm() + # test_infer() + test_llm() From 121164026b4d612461743767a38c8d2584d41d48 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 3 Jan 2025 15:06:09 +0800 Subject: [PATCH 26/47] revert --- tests/test_align/test_template/test_llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_align/test_template/test_llm.py b/tests/test_align/test_template/test_llm.py index e8e23fa353..46c1b4c90f 100644 --- a/tests/test_align/test_template/test_llm.py +++ b/tests/test_align/test_template/test_llm.py @@ -2,7 +2,7 @@ import torch -os.environ['CUDA_VISIBLE_DEVICES'] = '0' +os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' os.environ['SWIFT_DEBUG'] = '1' From e7e7fd63eee27776eff4010baded1e5da6d4e162 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Sun, 5 Jan 2025 22:21:35 +0800 Subject: [PATCH 27/47] update --- swift/llm/model/model/reward_model.py | 14 ----------- tests/train/test_rm.py | 35 --------------------------- 2 files changed, 49 deletions(-) delete mode 100644 swift/llm/model/model/reward_model.py delete mode 100644 tests/train/test_rm.py diff --git a/swift/llm/model/model/reward_model.py b/swift/llm/model/model/reward_model.py deleted file mode 100644 index 30a9d5660d..0000000000 --- a/swift/llm/model/model/reward_model.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -from transformers import AutoConfig, AutoModel - -from swift.llm import TemplateType -from swift.utils import get_logger -from ..constant import RMModelType -from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_from_local, register_model - -logger = get_logger() - -# ModelGroup([ -# Model('Qwen/Qwen2.5-Math-RM-72B', 'Qwen/Qwen2.5-Math-RM-72B'), -# Model('Qwen/Qwen2-Math-RM-72B', 'Qwen/Qwen2-Math-RM-72B'), -# ]), diff --git a/tests/train/test_rm.py b/tests/train/test_rm.py deleted file mode 100644 index 91a8fc6872..0000000000 --- a/tests/train/test_rm.py +++ /dev/null @@ -1,35 +0,0 @@ -import os - -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -kwargs = { - 'per_device_train_batch_size': 2, - 'per_device_eval_batch_size': 2, - 'save_steps': 50, - 'gradient_accumulation_steps': 4, - 'num_train_epochs': 1, -} - - -def test_infer(): - from swift.llm import infer_main, InferArguments - infer_main( - InferArguments( - model='Shanghai_AI_Laboratory/internlm2-1_8b-reward', val_dataset='AI-ModelScope/alpaca-gpt4-data-zh#500')) - - -def test_llm(): - from swift.llm import TrainArguments, sft_main, infer_main, InferArguments - result = sft_main( - TrainArguments( - model='Shanghai_AI_Laboratory/internlm2-1_8b-reward', - train_type='lora', - dataset=['hjh0119/shareAI-Llama3-DPO-zh-en-emoji:zh#100'], - **kwargs)) - last_model_checkpoint = result['last_model_checkpoint'] - infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) - - -if __name__ == '__main__': - # test_infer() - test_llm() From 2b47806c7b4221b39fa6e7fa1f8016589e176e77 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 6 Jan 2025 00:06:19 +0800 Subject: [PATCH 28/47] update --- swift/llm/argument/rlhf_args.py | 11 +++--- swift/llm/infer/utils.py | 6 ++-- swift/llm/train/rlhf.py | 41 +++++++++++++++++----- swift/llm/train/tuner.py | 16 +++------ swift/trainers/rlhf_trainer/ppo_trainer.py | 26 +++++++------- 5 files changed, 61 insertions(+), 39 deletions(-) diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py index 914aab2769..e9d3ec72d4 100644 --- a/swift/llm/argument/rlhf_args.py +++ b/swift/llm/argument/rlhf_args.py @@ -1,6 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from dataclasses import dataclass, field -from typing import Literal, Optional +from typing import List, Literal, Optional from swift.llm import MODEL_MAPPING from .train_args import TrainArguments @@ -44,6 +44,7 @@ class RLHFArguments(TrainArguments): undesirable_weight: float = 1.0 # PPO reward_model: Optional[str] = None + reward_adapters: List[str] = field(default_factory=list) reward_model_type: Optional[str] = field( default=None, metadata={'help': f'model_type choices: {list(MODEL_MAPPING.keys())}'}) reward_model_revision: Optional[str] = None @@ -71,9 +72,11 @@ def __post_init__(self): raise ValueError('CPO/ORPO or LoRA training does not require a ref_model to be passed in.') def _init_ppo(self): - self.response_length = self.max_new_tokens - self.num_ppo_epochs = self.num_train_epochs - # TODO: streaming, MLLM + if self.rlhf_type == 'ppo': + self.response_length = self.max_new_tokens + self.num_ppo_epochs = self.num_train_epochs + self.padding_side = 'left' + # TODO: streaming, MLLM def _init_simpo(self): if self.rlhf_type != 'simpo': diff --git a/swift/llm/infer/utils.py b/swift/llm/infer/utils.py index 67f1b02f01..6f004857dc 100644 --- a/swift/llm/infer/utils.py +++ b/swift/llm/infer/utils.py @@ -118,7 +118,7 @@ def check_query(self, query: str) -> Optional[str]: return query -def _prepare_adapter(args, model): +def prepare_adapter(args, model, adapters=None): if args.tuner_backend == 'unsloth': if args.model_meta.is_multimodal: from unsloth import FastVisionModel as UnslothModel @@ -131,7 +131,7 @@ def _prepare_adapter(args, model): else: tuner = Swift # compat deploy - for adapter in args.adapters: + for adapter in adapters: model = tuner.from_pretrained(model, adapter) if args.train_type == 'bone': # Bone has a problem of float32 matmul with bloat16 in `peft==0.14.0` @@ -141,6 +141,6 @@ def _prepare_adapter(args, model): def prepare_model_template(args, **kwargs): model, processor = args.get_model_processor(**kwargs) - model = _prepare_adapter(args, model) + model = prepare_adapter(args, model, args.adapters) template = args.get_template(processor) return model, template diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py index 2e5bff8910..1f0bbea640 100644 --- a/swift/llm/train/rlhf.py +++ b/swift/llm/train/rlhf.py @@ -12,21 +12,41 @@ class SwiftRLHF(SwiftSft): args: args_class def _prepare_model_tokenizer(self): + from swift.llm.infer.utils import prepare_adapter args = self.args - self.ref_model = None - if args.ref_model: + for key in ['ref', 'reward', 'value']: + origin_key = key + setattr(self, f'{key}_model', None) + if key == 'value': + if args.rlhf_type == 'ppo': + key = 'reward' + else: + continue + model_id_or_path = getattr(args, f'{key}_model') + if model_id_or_path is None: + continue + model_type = getattr(args, f'{key}_model_type') + model_revision = getattr(args, f'{key}_model_revision') + adapters = args.adapters if key == 'ref' else args.reward_adapters + # Be aware of the unexpected behavior caused by double monkey patching. - self.ref_model, _ = args.get_model_processor( - model=args.ref_model, model_type=args.ref_model_type, model_revision=args.ref_model_revision) - self.ref_model.requires_grad_(False).eval() + model = args.get_model_processor( + model=model_id_or_path, model_type=model_type, model_revision=model_revision)[0] + + model = prepare_adapter(args, model, adapters) + if origin_key in {'ref', 'reward'}: + model.requires_grad_(False).eval() + else: + model = self.prepare_model(args, model, task_type='seq_cls') + setattr(self, f'{origin_key}_model', model) super()._prepare_model_tokenizer() def _prepare_template(self) -> None: args = self.args super()._prepare_template() - mode = 'kto' if args.rlhf_type == 'kto' else 'rlhf' - self.template.set_mode(mode) + model_mapping = {'kto': 'kto', 'ppo': 'pt'} + self.template.set_mode(model_mapping.get(args.rlhf_type, 'rlhf')) if args.rlhf_type != 'orpo' or args.model_meta.is_multimodal: # Avoid padding labels during the model's forward pass in multimodal models. @@ -41,8 +61,11 @@ def _get_dataset(self): def _get_trainer_kwargs(self): trainer_kwargs = {} - if self.ref_model: - trainer_kwargs['ref_model'] = self.ref_model + for key in ['ref', 'reward', 'value']: + key = f'{key}_model' + model = getattr(self, key) + if model: + trainer_kwargs[key] = model return trainer_kwargs diff --git a/swift/llm/train/tuner.py b/swift/llm/train/tuner.py index 4584bdccae..afa6fe7692 100644 --- a/swift/llm/train/tuner.py +++ b/swift/llm/train/tuner.py @@ -136,7 +136,7 @@ def get_vera_target_modules(model, config): return config -def prepare_adapter(args: TrainArguments, model, *, template=None, train_dataset=None): +def prepare_adapter(args: TrainArguments, model, *, template=None, train_dataset=None, task_type=None): from swift.tuners import (AdaLoraConfig, AdapterConfig, BOFTConfig, LLaMAProConfig, LongLoRAModelType, LoraConfig, LoRAConfig, ReftConfig, Swift, VeraConfig) target_modules = get_target_modules(args, model) @@ -153,7 +153,7 @@ def prepare_adapter(args: TrainArguments, model, *, template=None, train_dataset 'lorap_lr_ratio': args.lorap_lr_ratio, 'init_lora_weights': args.init_weights, } - task_type = args.task_type.upper() + task_type = (task_type or args.task_type).upper() if args.train_type in ('lora', 'longlora'): if args.use_swift_lora: lora_config = LoRAConfig(lora_dtype=args.lora_dtype, **lora_kwargs) @@ -329,14 +329,7 @@ def torchacc_resume_from_checkpoint(args, model): class TunerMixin: @classmethod - def prepare_model( - cls, - args, - model, - *, - template=None, - train_dataset=None, - ): + def prepare_model(cls, args, model, *, template=None, train_dataset=None, task_type=None): if args.use_liger: # Apply liger apply_liger(args.model_type) @@ -361,7 +354,8 @@ def prepare_model( tuner: Tuner = extra_tuners[args.train_type] model = tuner.prepare_model(args, model) else: - model = prepare_adapter(args, model, template=template, train_dataset=train_dataset) + model = prepare_adapter( + args, model, template=template, train_dataset=train_dataset, task_type=task_type) # fix bug: Attempting to unscale FP16 gradients. # peft: https://github.com/huggingface/peft/issues/1249 for p in model.parameters(): diff --git a/swift/trainers/rlhf_trainer/ppo_trainer.py b/swift/trainers/rlhf_trainer/ppo_trainer.py index bcdfbf6b27..29d11191d2 100644 --- a/swift/trainers/rlhf_trainer/ppo_trainer.py +++ b/swift/trainers/rlhf_trainer/ppo_trainer.py @@ -1,4 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +from contextlib import contextmanager + from torch.utils.data import DataLoader from transformers import PreTrainedModel from trl import PPOv2Trainer as HFPPOTrainer @@ -9,6 +11,18 @@ class PPOTrainer(RLHFTrainerMixin, SwiftMixin, HFPPOTrainer): + @contextmanager + def _patch_dataloader(): + print() + + @contextmanager + def _patch_init(): + kwargs_to_pop = ['model', 'model_init', 'compute_metrics', 'preprocess_logits_for_metrics'] + for kwarg in kwargs_to_pop: + kwargs.pop(kwarg, None) + kwargs['config'] = kwargs.pop('args') + HFPPOTrainer.__init__(self, **kwargs) + def __init__(self, model: PreTrainedModel, ref_model: PreTrainedModel, *_args, **kwargs): kwargs['policy'] = model kwargs['ref_policy'] = ref_model @@ -33,15 +47,3 @@ def __init__(self, model: PreTrainedModel, ref_model: PreTrainedModel, *_args, * def train(self, *args, **kwargs): # remove args that are not needed for the HFPPOTrainer HFPPOTrainer.train(self) - - -def patched_init(self, **kwargs): - kwargs_to_pop = ['model', 'model_init', 'compute_metrics', 'preprocess_logits_for_metrics'] - for kwarg in kwargs_to_pop: - kwargs.pop(kwarg, None) - kwargs['config'] = kwargs.pop('args') - original_init(self, **kwargs) - - -original_init = HFPPOTrainer.__init__ -HFPPOTrainer.__init__ = patched_init From ea082cdc4e3a4651e15d1bfeb40ac76345ecc03f Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 6 Jan 2025 10:34:33 +0800 Subject: [PATCH 29/47] update --- swift/llm/train/rlhf.py | 4 ++++ swift/llm/train/tuner.py | 8 +++++--- tests/train/test_ppo.py | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 3 deletions(-) create mode 100644 tests/train/test_ppo.py diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py index 1f0bbea640..9cb0eb124c 100644 --- a/swift/llm/train/rlhf.py +++ b/swift/llm/train/rlhf.py @@ -38,6 +38,10 @@ def _prepare_model_tokenizer(self): model.requires_grad_(False).eval() else: model = self.prepare_model(args, model, task_type='seq_cls') + logger.info(f'value_model: {model}') + model_parameter_info = get_model_parameter_info(model) + self.train_msg['value_model_parameter_info'] = model_parameter_info + logger.info(f'value_model_parameter_info: {model_parameter_info}') setattr(self, f'{origin_key}_model', model) super()._prepare_model_tokenizer() diff --git a/swift/llm/train/tuner.py b/swift/llm/train/tuner.py index afa6fe7692..c47212934d 100644 --- a/swift/llm/train/tuner.py +++ b/swift/llm/train/tuner.py @@ -105,7 +105,7 @@ def get_target_modules(args, model) -> Union[str, List[str]]: return target_modules -def get_modules_to_save(args, model): +def get_modules_to_save(args, model, task_type=None): modules_to_save = args.modules_to_save.copy() if 'all-embedding' in args.modules_to_save: modules_to_save.remove('all-embedding') @@ -113,6 +113,8 @@ def get_modules_to_save(args, model): if 'all-norm' in args.modules_to_save: modules_to_save.remove('all-norm') modules_to_save += find_norm(model) + if task_type and task_type.lower() == 'seq_cls': # reward_model + modules_to_save.append('v_head') return modules_to_save @@ -139,8 +141,9 @@ def get_vera_target_modules(model, config): def prepare_adapter(args: TrainArguments, model, *, template=None, train_dataset=None, task_type=None): from swift.tuners import (AdaLoraConfig, AdapterConfig, BOFTConfig, LLaMAProConfig, LongLoRAModelType, LoraConfig, LoRAConfig, ReftConfig, Swift, VeraConfig) + task_type = (task_type or args.task_type).upper() target_modules = get_target_modules(args, model) - modules_to_save = get_modules_to_save(args, model) + modules_to_save = get_modules_to_save(args, model, task_type) lora_kwargs = { 'r': args.lora_rank, 'target_modules': target_modules, @@ -153,7 +156,6 @@ def prepare_adapter(args: TrainArguments, model, *, template=None, train_dataset 'lorap_lr_ratio': args.lorap_lr_ratio, 'init_lora_weights': args.init_weights, } - task_type = (task_type or args.task_type).upper() if args.train_type in ('lora', 'longlora'): if args.use_swift_lora: lora_config = LoRAConfig(lora_dtype=args.lora_dtype, **lora_kwargs) diff --git a/tests/train/test_ppo.py b/tests/train/test_ppo.py new file mode 100644 index 0000000000..073701d3fe --- /dev/null +++ b/tests/train/test_ppo.py @@ -0,0 +1,40 @@ +import os + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +kwargs = { + 'per_device_train_batch_size': 2, + 'save_steps': 5, + 'gradient_accumulation_steps': 4, + 'num_train_epochs': 1, +} + + +def test_rm(): + from swift.llm import rlhf_main, RLHFArguments, infer_main, InferArguments + result = rlhf_main( + RLHFArguments( + rlhf_type='rm', + model='Shanghai_AI_Laboratory/internlm2-1_8b-reward', + dataset=['hjh0119/shareAI-Llama3-DPO-zh-en-emoji#100'], + **kwargs)) + last_model_checkpoint = result['last_model_checkpoint'] + infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True, merge_lora=True)) + + +def test_ppo(): + from swift.llm import rlhf_main, RLHFArguments, infer_main, InferArguments + result = rlhf_main( + RLHFArguments( + rlhf_type='ppo', + model='Qwen/Qwen2.5-7B-Instruct', + reward_model='AI-ModelScope/Skywork-Reward-Llama-3.1-8B-v0.2', + dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], + **kwargs)) + last_model_checkpoint = result['last_model_checkpoint'] + infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True, merge_lora=True)) + + +if __name__ == '__main__': + # test_rm() + test_ppo() From 2f045c97c4fb8a8c55dd660027995158768d7e30 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 6 Jan 2025 14:21:39 +0800 Subject: [PATCH 30/47] update --- swift/llm/train/rlhf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py index 9cb0eb124c..f2e715b4bb 100644 --- a/swift/llm/train/rlhf.py +++ b/swift/llm/train/rlhf.py @@ -1,11 +1,13 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from typing import List, Union -from swift.utils import patch_getattr +from swift.utils import get_logger, get_model_parameter_info, patch_getattr from ..argument import RLHFArguments from .kto import prepare_kto_dataset from .sft import SwiftSft +logger = get_logger() + class SwiftRLHF(SwiftSft): args_class = RLHFArguments From 6c62557c43366e0ccdd6c819721a1d901510272c Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 6 Jan 2025 16:38:04 +0800 Subject: [PATCH 31/47] update --- swift/trainers/arguments.py | 4 +- swift/trainers/mixin.py | 29 ++++++------ swift/trainers/rlhf_trainer/ppo_trainer.py | 54 +++++++++------------- 3 files changed, 39 insertions(+), 48 deletions(-) diff --git a/swift/trainers/arguments.py b/swift/trainers/arguments.py index a0b78b947f..f107b41aff 100644 --- a/swift/trainers/arguments.py +++ b/swift/trainers/arguments.py @@ -80,7 +80,7 @@ class Seq2SeqTrainingArguments(SwiftArgumentsMixin, HfSeq2SeqTrainingArguments): try: from trl import (DPOConfig as HfDPOConfig, CPOConfig as HfCPOConfig, ORPOConfig as HfORPOConfig, KTOConfig as - HfKTOConfig, RewardConfig as HfRewardConfig, PPOv2Config as HfPPOConfig) + HfKTOConfig, RewardConfig as HfRewardConfig, PPOv2Config as HfPPOv2Config) @dataclass class DPOConfig(SwiftArgumentsMixin, HfDPOConfig): @@ -103,7 +103,7 @@ class RewardConfig(SwiftArgumentsMixin, HfRewardConfig): pass @dataclass - class PPOConfig(SwiftArgumentsMixin, HfPPOConfig): + class PPOConfig(SwiftArgumentsMixin, HfPPOv2Config): pass except (ImportError, RuntimeError): diff --git a/swift/trainers/mixin.py b/swift/trainers/mixin.py index d0281bcdfc..5eb7a72dd3 100644 --- a/swift/trainers/mixin.py +++ b/swift/trainers/mixin.py @@ -44,21 +44,20 @@ class SwiftMixin: - def __init__( - self, - model: Union[PreTrainedModel, Module] = None, - args: TrainingArguments = None, - data_collator: Optional[DataCollator] = None, - train_dataset: Optional[HfDataset] = None, - eval_dataset: Optional[Union[HfDataset, Dict[str, HfDataset]]] = None, - template: Optional[Template] = None, - model_init: Optional[Callable[[], PreTrainedModel]] = None, - compute_loss_func: Optional[Callable] = None, - compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None, - callbacks: Optional[List[TrainerCallback]] = None, - optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), - preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], - torch.Tensor]] = None) -> None: + def __init__(self, + model: Union[PreTrainedModel, Module] = None, + args: TrainingArguments = None, + data_collator: Optional[DataCollator] = None, + train_dataset: Optional[HfDataset] = None, + eval_dataset: Optional[Union[HfDataset, Dict[str, HfDataset]]] = None, + template: Optional[Template] = None, + model_init: Optional[Callable[[], PreTrainedModel]] = None, + compute_loss_func: Optional[Callable] = None, + compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None, + callbacks: Optional[List[TrainerCallback]] = None, + optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + **kwargs) -> None: if args.check_model and hasattr(model, 'model_dir'): check_local_model_is_latest( model.model_dir, user_agent={ diff --git a/swift/trainers/rlhf_trainer/ppo_trainer.py b/swift/trainers/rlhf_trainer/ppo_trainer.py index 29d11191d2..a6a24a634c 100644 --- a/swift/trainers/rlhf_trainer/ppo_trainer.py +++ b/swift/trainers/rlhf_trainer/ppo_trainer.py @@ -3,47 +3,39 @@ from torch.utils.data import DataLoader from transformers import PreTrainedModel -from trl import PPOv2Trainer as HFPPOTrainer +from trl import PPOv2Trainer as HFPPOv2Trainer from ..mixin import SwiftMixin from .rlhf_mixin import RLHFTrainerMixin -class PPOTrainer(RLHFTrainerMixin, SwiftMixin, HFPPOTrainer): +class PPOTrainer(SwiftMixin, HFPPOv2Trainer): + ppo_trainer_init = HFPPOv2Trainer.__init__ + del HFPPOv2Trainer.__init__ + @staticmethod @contextmanager - def _patch_dataloader(): - print() + def _patch_dataloader(data_collator): + __init__ = DataLoader.__init__ - @contextmanager - def _patch_init(): - kwargs_to_pop = ['model', 'model_init', 'compute_metrics', 'preprocess_logits_for_metrics'] - for kwarg in kwargs_to_pop: - kwargs.pop(kwarg, None) - kwargs['config'] = kwargs.pop('args') - HFPPOTrainer.__init__(self, **kwargs) + def __new_init__(self, *args, **kwargs): + kwargs['data_collator'] = data_collator + __init__(self, *args, **kwargs) + + DataLoader.__init__ = __new_init__ + yield + DataLoader.__init__ = __init__ def __init__(self, model: PreTrainedModel, ref_model: PreTrainedModel, *_args, **kwargs): - kwargs['policy'] = model - kwargs['ref_policy'] = ref_model - super().__init__(model, ref_model, *_args, **kwargs) - # reset dataloader - self.dataloader = DataLoader( - self.train_dataset, - batch_size=self.local_dataloader_batch_size, - shuffle=True, - collate_fn=kwargs['data_collator'], - drop_last=True, # needed; otherwise the last batch will be of ragged shape - ) - self.accelerator.prepare(self.data_collator) - self.eval_dataloader = DataLoader( - self.eval_dataset, - batch_size=self.args.per_device_eval_batch_size, - collate_fn=kwargs['data_collator'], - drop_last=True, - ) # no need to shuffle eval dataset - self.eval_dataloader = self.accelerator.prepare(self.eval_dataloader) + super().__init__(model, *_args, **kwargs) + with self._patch_dataloader(kwargs['data_collator']): + new_kwargs = { + k: v + for k, v in kwargs.items() + if k in ['train_dataset', 'data_collator', 'reward_model', 'val_model', 'eval_dataset', 'tokenizer'] + } + self.ppo_trainer_init(config=kwargs['args'], policy=model, ref_policy=ref_model, **new_kwargs) def train(self, *args, **kwargs): # remove args that are not needed for the HFPPOTrainer - HFPPOTrainer.train(self) + super().train() From 585ad2397e13d644d535947c84b1a8f45eaa7d66 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 6 Jan 2025 16:39:05 +0800 Subject: [PATCH 32/47] fix --- swift/trainers/rlhf_trainer/ppo_trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/swift/trainers/rlhf_trainer/ppo_trainer.py b/swift/trainers/rlhf_trainer/ppo_trainer.py index a6a24a634c..c17833ee7b 100644 --- a/swift/trainers/rlhf_trainer/ppo_trainer.py +++ b/swift/trainers/rlhf_trainer/ppo_trainer.py @@ -6,7 +6,6 @@ from trl import PPOv2Trainer as HFPPOv2Trainer from ..mixin import SwiftMixin -from .rlhf_mixin import RLHFTrainerMixin class PPOTrainer(SwiftMixin, HFPPOv2Trainer): From 89dbe794dc111fa0f190b275e9b0c3510cb65fd8 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 6 Jan 2025 16:47:47 +0800 Subject: [PATCH 33/47] fix --- swift/trainers/rlhf_trainer/ppo_trainer.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/swift/trainers/rlhf_trainer/ppo_trainer.py b/swift/trainers/rlhf_trainer/ppo_trainer.py index c17833ee7b..1d4666662b 100644 --- a/swift/trainers/rlhf_trainer/ppo_trainer.py +++ b/swift/trainers/rlhf_trainer/ppo_trainer.py @@ -5,7 +5,9 @@ from transformers import PreTrainedModel from trl import PPOv2Trainer as HFPPOv2Trainer +from swift.utils import patch_getattr from ..mixin import SwiftMixin +from .rlhf_mixin import RLHFTrainerMixin class PPOTrainer(SwiftMixin, HFPPOv2Trainer): @@ -14,11 +16,11 @@ class PPOTrainer(SwiftMixin, HFPPOv2Trainer): @staticmethod @contextmanager - def _patch_dataloader(data_collator): + def _patch_dataloader(collate_fn): __init__ = DataLoader.__init__ def __new_init__(self, *args, **kwargs): - kwargs['data_collator'] = data_collator + kwargs['collate_fn'] = collate_fn __init__(self, *args, **kwargs) DataLoader.__init__ = __new_init__ @@ -31,9 +33,11 @@ def __init__(self, model: PreTrainedModel, ref_model: PreTrainedModel, *_args, * new_kwargs = { k: v for k, v in kwargs.items() - if k in ['train_dataset', 'data_collator', 'reward_model', 'val_model', 'eval_dataset', 'tokenizer'] + if k in ['train_dataset', 'data_collator', 'reward_model', 'value_model', 'eval_dataset'] } - self.ppo_trainer_init(config=kwargs['args'], policy=model, ref_policy=ref_model, **new_kwargs) + self.ppo_trainer_init( + config=kwargs['args'], tokenizer=self.tokenizer, policy=model, ref_policy=ref_model, **new_kwargs) + patch_getattr(self.model.__class__, 'policy') def train(self, *args, **kwargs): # remove args that are not needed for the HFPPOTrainer From d8030db5322cb6639e62dd9a816719d43ae88225 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 6 Jan 2025 17:34:23 +0800 Subject: [PATCH 34/47] fix --- swift/llm/argument/base_args/base_args.py | 3 +- swift/llm/argument/rlhf_args.py | 46 ++++++++++++++--------- swift/llm/template/template_meta.py | 6 +++ swift/llm/train/rlhf.py | 9 +++-- 4 files changed, 42 insertions(+), 22 deletions(-) diff --git a/swift/llm/argument/base_args/base_args.py b/swift/llm/argument/base_args/base_args.py index 2851683db2..9d4d922a3c 100644 --- a/swift/llm/argument/base_args/base_args.py +++ b/swift/llm/argument/base_args/base_args.py @@ -242,7 +242,7 @@ def get_template(self, processor: 'Processor') -> 'Template': logger.info(f'default_system: {template.template_meta.default_system}') return template - def get_model_processor(self, *, model=None, model_type=None, model_revision=None, **kwargs): + def get_model_processor(self, *, model=None, model_type=None, model_revision=None, task_type=None, **kwargs): if self.tuner_backend == 'unsloth': return load_by_unsloth(self) kwargs.update(self.get_model_kwargs()) @@ -250,5 +250,6 @@ def get_model_processor(self, *, model=None, model_type=None, model_revision=Non kwargs['model_id_or_path'] = model or self.model kwargs['model_type'] = model_type or self.model_type kwargs['model_revision'] = model_revision or self.model_revision + kwargs['task_type'] = task_type or self.task_type return get_model_tokenizer(**kwargs) diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py index e9d3ec72d4..62658fa2ea 100644 --- a/swift/llm/argument/rlhf_args.py +++ b/swift/llm/argument/rlhf_args.py @@ -3,11 +3,37 @@ from typing import List, Literal, Optional from swift.llm import MODEL_MAPPING +from ..template import get_template_meta from .train_args import TrainArguments @dataclass -class RLHFArguments(TrainArguments): +class PPOArguments: + reward_model: Optional[str] = None + reward_adapters: List[str] = field(default_factory=list) + reward_model_type: Optional[str] = field( + default=None, metadata={'help': f'model_type choices: {list(MODEL_MAPPING.keys())}'}) + reward_model_revision: Optional[str] = None + + num_ppo_epochs: int = 4 + whiten_rewards: bool = False + kl_coef: float = 0.05 + cliprange: float = 0.2 + vf_coef: float = 0.1 + cliprange_value: float = 0.2 + gamma: float = 1.0 + lam: float = 0.95 + + num_mini_batches: int = 1 + local_rollout_forward_batch_size: int = 64 + num_sample_generations: int = 10 + response_length: int = 53 + temperature: float = 0.7 + missing_eos_penalty: Optional[float] = None + + +@dataclass +class RLHFArguments(PPOArguments, TrainArguments): """ RLHFArguments is a dataclass that holds arguments specific to the Reinforcement Learning with Human Feedback (RLHF) training backend. @@ -42,27 +68,13 @@ class RLHFArguments(TrainArguments): # KTO desirable_weight: float = 1.0 undesirable_weight: float = 1.0 - # PPO - reward_model: Optional[str] = None - reward_adapters: List[str] = field(default_factory=list) - reward_model_type: Optional[str] = field( - default=None, metadata={'help': f'model_type choices: {list(MODEL_MAPPING.keys())}'}) - reward_model_revision: Optional[str] = None - local_rollout_forward_batch_size: int = 64 - kl_coef: float = 0.05 - cliprange: float = 0.2 - vf_coef: float = 0.1 - cliprange_value: float = 0.2 - gamma: float = 1.0 - lam: float = 0.95 - num_sample_generations: int = 10 def __post_init__(self): self._init_rm() self._init_simpo() - self._init_ppo() self._set_default() super().__post_init__() + self._init_ppo() if self.rlhf_type in ['dpo', 'kto'] and self.train_type == 'full' or self.rlhf_type == 'ppo': self.ref_model = self.ref_model or self.model @@ -73,8 +85,6 @@ def __post_init__(self): def _init_ppo(self): if self.rlhf_type == 'ppo': - self.response_length = self.max_new_tokens - self.num_ppo_epochs = self.num_train_epochs self.padding_side = 'left' # TODO: streaming, MLLM diff --git a/swift/llm/template/template_meta.py b/swift/llm/template/template_meta.py index 98520516c2..82a07bdfae 100644 --- a/swift/llm/template/template_meta.py +++ b/swift/llm/template/template_meta.py @@ -128,6 +128,12 @@ def init(self, tokenizer: PreTrainedTokenizerBase) -> None: if tokenizer.eos_token not in self.stop_words: self.stop_words.append(tokenizer.eos_token) + self.stop_token_id = tokenizer.eos_token_id + if self.suffix: + stop_token_id = tokenizer.convert_tokens_to_ids(self.suffix[-1]) + if stop_token_id is not None: + self.stop_token_id = stop_token_id + def check_system(self, system: Optional[str]) -> None: if system is not None: assert self.support_system, ( diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py index f2e715b4bb..4d1dddc030 100644 --- a/swift/llm/train/rlhf.py +++ b/swift/llm/train/rlhf.py @@ -30,16 +30,16 @@ def _prepare_model_tokenizer(self): model_type = getattr(args, f'{key}_model_type') model_revision = getattr(args, f'{key}_model_revision') adapters = args.adapters if key == 'ref' else args.reward_adapters - + task_type = args.task_type if origin_key == 'ref' else 'seq_cls' # Be aware of the unexpected behavior caused by double monkey patching. model = args.get_model_processor( - model=model_id_or_path, model_type=model_type, model_revision=model_revision)[0] + model=model_id_or_path, model_type=model_type, model_revision=model_revision, task_type=task_type)[0] model = prepare_adapter(args, model, adapters) if origin_key in {'ref', 'reward'}: model.requires_grad_(False).eval() else: - model = self.prepare_model(args, model, task_type='seq_cls') + model = self.prepare_model(args, model, task_type=task_type) logger.info(f'value_model: {model}') model_parameter_info = get_model_parameter_info(model) self.train_msg['value_model_parameter_info'] = model_parameter_info @@ -58,6 +58,9 @@ def _prepare_template(self) -> None: # Avoid padding labels during the model's forward pass in multimodal models. self.template.loss_scale = 'last_round' + if args.rlhf_type == 'ppo': + self.training_args.stop_token_id = self.template.template_meta.stop_token_id + def _get_dataset(self): args = self.args train_dataset, val_dataset = super()._get_dataset() From ac49ee642e664dfe902271129790d6b92cb9c333 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Mon, 6 Jan 2025 19:26:23 +0800 Subject: [PATCH 35/47] update --- ...06\345\222\214\351\203\250\347\275\262.md" | 2 +- .../Instruction/Inference-and-deployment.md | 2 +- swift/llm/argument/base_args/base_args.py | 6 ++--- swift/llm/argument/rlhf_args.py | 1 + swift/llm/infer/infer_engine/infer_engine.py | 2 +- swift/llm/train/rlhf.py | 10 +++++--- swift/trainers/rlhf_trainer/ppo_trainer.py | 24 +++++++++++++++++-- 7 files changed, 36 insertions(+), 11 deletions(-) diff --git "a/docs/source/Instruction/\346\216\250\347\220\206\345\222\214\351\203\250\347\275\262.md" "b/docs/source/Instruction/\346\216\250\347\220\206\345\222\214\351\203\250\347\275\262.md" index d6cb841f07..76a574b957 100644 --- "a/docs/source/Instruction/\346\216\250\347\220\206\345\222\214\351\203\250\347\275\262.md" +++ "b/docs/source/Instruction/\346\216\250\347\220\206\345\222\214\351\203\250\347\275\262.md" @@ -4,7 +4,7 @@ SWIFT支持以命令行、Python代码和界面方式进行推理和部署: - 使用`engine.infer`或者`engine.infer_async`进行python的方式推理. 参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/infer/demo.py). - 使用`swift infer`使用命令行的方式进行推理. 参考[这里](https://github.com/modelscope/ms-swift/blob/main/examples/infer/cli_demo.sh). - 使用`swift deploy`进行服务部署,并使用openai API或者`client.infer`的方式推理. 服务端参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/deploy/server), 客户端参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/deploy/client). -- 使用`swift app`部署模型进行界面推理, 可以查看[这里](../GetStarted/界面使用.md) +- 使用`swift app`部署模型进行界面推理, 可以查看[这里](../GetStarted/Web-UI.md) ## 命令行推理指令 diff --git a/docs/source_en/Instruction/Inference-and-deployment.md b/docs/source_en/Instruction/Inference-and-deployment.md index e77b7a970d..1229ba3590 100644 --- a/docs/source_en/Instruction/Inference-and-deployment.md +++ b/docs/source_en/Instruction/Inference-and-deployment.md @@ -4,7 +4,7 @@ SWIFT supports inference and deployment through command line, Python code, and i - Use `engine.infer` or `engine.infer_async` for Python-based inference. See [here](https://github.com/modelscope/ms-swift/blob/main/examples/infer/demo.py) for reference. - Use `swift infer` for command-line-based inference. See [here](https://github.com/modelscope/ms-swift/blob/main/examples/infer/cli_demo.sh) for reference. - Use `swift deploy` for service deployment and perform inference using the OpenAI API or `client.infer`. Refer to the server guidelines [here](https://github.com/modelscope/ms-swift/tree/main/examples/deploy/server) and the client guidelines [here](https://github.com/modelscope/ms-swift/tree/main/examples/deploy/client). -- Deploy the model with `swift app` for web-based inference. You can check [here](../GetStarted/Interface-usage.md) for details. +- Deploy the model with `swift app` for web-based inference. You can check [here](../GetStarted/Web-UI.md) for details. ## Command Line Inference diff --git a/swift/llm/argument/base_args/base_args.py b/swift/llm/argument/base_args/base_args.py index 9d4d922a3c..22340beef7 100644 --- a/swift/llm/argument/base_args/base_args.py +++ b/swift/llm/argument/base_args/base_args.py @@ -43,7 +43,6 @@ def _handle_ckpt_dir(self: 'BaseArguments'): return self.adapters.insert(0, self.ckpt_dir) else: - assert self.model is None, f'self.model: {self.model}' self.model = self.ckpt_dir self.ckpt_dir = None logger.warning('The `--ckpt_dir` parameter will be removed in `ms-swift>=3.2`. ' @@ -236,9 +235,10 @@ def _init_device(self): else: torch.cuda.set_device(self.local_rank) - def get_template(self, processor: 'Processor') -> 'Template': + def get_template(self, processor: 'Processor', template_type=None) -> 'Template': template_kwargs = self.get_template_kwargs() - template = get_template(self.template, processor, **template_kwargs) + template_type = template_type or self.template + template = get_template(template_type, processor, **template_kwargs) logger.info(f'default_system: {template.template_meta.default_system}') return template diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py index 62658fa2ea..60c4511587 100644 --- a/swift/llm/argument/rlhf_args.py +++ b/swift/llm/argument/rlhf_args.py @@ -14,6 +14,7 @@ class PPOArguments: reward_model_type: Optional[str] = field( default=None, metadata={'help': f'model_type choices: {list(MODEL_MAPPING.keys())}'}) reward_model_revision: Optional[str] = None + reward_template: Optional[str] = None num_ppo_epochs: int = 4 whiten_rewards: bool = False diff --git a/swift/llm/infer/infer_engine/infer_engine.py b/swift/llm/infer/infer_engine/infer_engine.py index 80feb707b8..fe6057b383 100644 --- a/swift/llm/infer/infer_engine/infer_engine.py +++ b/swift/llm/infer/infer_engine/infer_engine.py @@ -174,7 +174,7 @@ def _get_num_tokens(inputs: Dict[str, Any]) -> int: else: return input_ids.shape[-1] elif 'inputs_embeds' in inputs: # 2d or 3d - return inputs['inputs_embeds'].shape[-1] + return inputs['inputs_embeds'].shape[-2] raise ValueError(f'Unable to retrieve input_ids and inputs_embeds. inputs: {inputs}') def set_default_max_tokens(self, request_config: RequestConfig, inputs: Dict[str, Any]) -> None: diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py index 4d1dddc030..8d4ac35925 100644 --- a/swift/llm/train/rlhf.py +++ b/swift/llm/train/rlhf.py @@ -32,8 +32,10 @@ def _prepare_model_tokenizer(self): adapters = args.adapters if key == 'ref' else args.reward_adapters task_type = args.task_type if origin_key == 'ref' else 'seq_cls' # Be aware of the unexpected behavior caused by double monkey patching. - model = args.get_model_processor( - model=model_id_or_path, model_type=model_type, model_revision=model_revision, task_type=task_type)[0] + model, processor = args.get_model_processor( + model=model_id_or_path, model_type=model_type, model_revision=model_revision, task_type=task_type) + if origin_key == 'reward': + self.reward_template = args.get_template(processor, args.reward_template) model = prepare_adapter(args, model, adapters) if origin_key in {'ref', 'reward'}: @@ -59,7 +61,7 @@ def _prepare_template(self) -> None: self.template.loss_scale = 'last_round' if args.rlhf_type == 'ppo': - self.training_args.stop_token_id = self.template.template_meta.stop_token_id + args.training_args.stop_token_id = self.template.template_meta.stop_token_id def _get_dataset(self): args = self.args @@ -75,6 +77,8 @@ def _get_trainer_kwargs(self): model = getattr(self, key) if model: trainer_kwargs[key] = model + if self.args.rlhf_type == 'ppo': + trainer_kwargs['reward_template'] = self.reward_template return trainer_kwargs diff --git a/swift/trainers/rlhf_trainer/ppo_trainer.py b/swift/trainers/rlhf_trainer/ppo_trainer.py index 1d4666662b..3208342d42 100644 --- a/swift/trainers/rlhf_trainer/ppo_trainer.py +++ b/swift/trainers/rlhf_trainer/ppo_trainer.py @@ -1,6 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from contextlib import contextmanager - +import torch from torch.utils.data import DataLoader from transformers import PreTrainedModel from trl import PPOv2Trainer as HFPPOv2Trainer @@ -29,6 +29,7 @@ def __new_init__(self, *args, **kwargs): def __init__(self, model: PreTrainedModel, ref_model: PreTrainedModel, *_args, **kwargs): super().__init__(model, *_args, **kwargs) + self.reward_template = kwargs['reward_template'] with self._patch_dataloader(kwargs['data_collator']): new_kwargs = { k: v @@ -39,6 +40,25 @@ def __init__(self, model: PreTrainedModel, ref_model: PreTrainedModel, *_args, * config=kwargs['args'], tokenizer=self.tokenizer, policy=model, ref_policy=ref_model, **new_kwargs) patch_getattr(self.model.__class__, 'policy') + @contextmanager + def patch_reward_model(self): + model_cls = self.reward_model.__class__ + forward = model_cls.forward + trainer = self + + def new_forward(self, input_ids, *args, **kwargs): + idx = (input_ids == 0).cumsum(dim=1)[:, -1] + trainer.template.tokenizer.batch_decode(input_ids) + + print(trainer) + return forward(self, input_ids, *args, **kwargs) + + model_cls.forward = new_forward + yield + model_cls.forward = forward + + def train(self, *args, **kwargs): # remove args that are not needed for the HFPPOTrainer - super().train() + with self.patch_reward_model(): + super().train() From 25d9d9deda6353e4756d4ffc67cc4d4848aceb56 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 7 Jan 2025 00:44:03 +0800 Subject: [PATCH 36/47] update --- examples/train/rlhf/ppo.sh | 28 ++++++++++++++++++++++ swift/llm/argument/rlhf_args.py | 3 ++- swift/llm/train/rlhf.py | 8 ++----- swift/trainers/mixin.py | 5 ++-- swift/trainers/rlhf_trainer/ppo_trainer.py | 26 ++++---------------- swift/trainers/trainer_factory.py | 3 +++ tests/train/test_ppo.py | 2 +- 7 files changed, 43 insertions(+), 32 deletions(-) create mode 100644 examples/train/rlhf/ppo.sh diff --git a/examples/train/rlhf/ppo.sh b/examples/train/rlhf/ppo.sh new file mode 100644 index 0000000000..a4341bea31 --- /dev/null +++ b/examples/train/rlhf/ppo.sh @@ -0,0 +1,28 @@ +nproc_per_node=2 + +CUDA_VISIBLE_DEVICES=0,1 \ +NPROC_PER_NODE=$nproc_per_node \ +swift rlhf \ + --rlhf_type ppo \ + --model LLM-Research/Meta-Llama-3.1-8B-Instruct \ + --reward_model 'AI-ModelScope/Skywork-Reward-Llama-3.1-8B-v0.2' \ + --train_type lora \ + --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' 'AI-ModelScope/alpaca-gpt4-data-en#20000' \ + --torch_dtype bfloat16 \ + --num_train_epochs 1 \ + --per_device_train_batch_size 1 \ + --per_device_eval_batch_size 1 \ + --learning_rate 1e-4 \ + --lora_rank 8 \ + --lora_alpha 32 \ + --target_modules all-linear \ + --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \ + --eval_steps 100 \ + --save_steps 100 \ + --save_total_limit 5 \ + --logging_steps 5 \ + --max_length 2048 \ + --output_dir output \ + --warmup_ratio 0.05 \ + --dataloader_num_workers 4 \ + --deepspeed zero2 diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py index 60c4511587..83620a9b3c 100644 --- a/swift/llm/argument/rlhf_args.py +++ b/swift/llm/argument/rlhf_args.py @@ -14,7 +14,6 @@ class PPOArguments: reward_model_type: Optional[str] = field( default=None, metadata={'help': f'model_type choices: {list(MODEL_MAPPING.keys())}'}) reward_model_revision: Optional[str] = None - reward_template: Optional[str] = None num_ppo_epochs: int = 4 whiten_rewards: bool = False @@ -87,6 +86,8 @@ def __post_init__(self): def _init_ppo(self): if self.rlhf_type == 'ppo': self.padding_side = 'left' + self.metric_for_best_model = None + self.training_args.metric_for_best_model = None # TODO: streaming, MLLM def _init_simpo(self): diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py index 8d4ac35925..37e8d9903c 100644 --- a/swift/llm/train/rlhf.py +++ b/swift/llm/train/rlhf.py @@ -32,10 +32,8 @@ def _prepare_model_tokenizer(self): adapters = args.adapters if key == 'ref' else args.reward_adapters task_type = args.task_type if origin_key == 'ref' else 'seq_cls' # Be aware of the unexpected behavior caused by double monkey patching. - model, processor = args.get_model_processor( - model=model_id_or_path, model_type=model_type, model_revision=model_revision, task_type=task_type) - if origin_key == 'reward': - self.reward_template = args.get_template(processor, args.reward_template) + model = args.get_model_processor( + model=model_id_or_path, model_type=model_type, model_revision=model_revision, task_type=task_type)[0] model = prepare_adapter(args, model, adapters) if origin_key in {'ref', 'reward'}: @@ -77,8 +75,6 @@ def _get_trainer_kwargs(self): model = getattr(self, key) if model: trainer_kwargs[key] = model - if self.args.rlhf_type == 'ppo': - trainer_kwargs['reward_template'] = self.reward_template return trainer_kwargs diff --git a/swift/trainers/mixin.py b/swift/trainers/mixin.py index 5eb7a72dd3..8c68e1a27d 100644 --- a/swift/trainers/mixin.py +++ b/swift/trainers/mixin.py @@ -72,6 +72,7 @@ def __init__(self, from swift.trainers.xtuner import init_sequence_parallel_xtuner init_sequence_parallel_xtuner(args.sequence_parallel_size) + self.model_meta = model.model_meta with self.hub.patch_hub(): super().__init__( model=model, @@ -216,7 +217,7 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None): # tokenizer if not is_adapter: from swift.llm import save_checkpoint - additional_saved_files = self.model.model_meta.additional_saved_files + additional_saved_files = self.model_meta.additional_saved_files save_checkpoint(None, self.template.processor, output_dir, additional_saved_files=additional_saved_files) def _fix_zero3_gather_all_parameters(self) -> None: @@ -246,7 +247,7 @@ def _save_checkpoint(self, *args, **kwargs): return result def train(self, *args, **kwargs): - if self.model.model_meta.is_multimodal: + if self.model_meta.is_multimodal: models = list( set([ v for k, v in self.__dict__.items() diff --git a/swift/trainers/rlhf_trainer/ppo_trainer.py b/swift/trainers/rlhf_trainer/ppo_trainer.py index 3208342d42..19f8328b46 100644 --- a/swift/trainers/rlhf_trainer/ppo_trainer.py +++ b/swift/trainers/rlhf_trainer/ppo_trainer.py @@ -1,5 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from contextlib import contextmanager + import torch from torch.utils.data import DataLoader from transformers import PreTrainedModel @@ -29,7 +30,6 @@ def __new_init__(self, *args, **kwargs): def __init__(self, model: PreTrainedModel, ref_model: PreTrainedModel, *_args, **kwargs): super().__init__(model, *_args, **kwargs) - self.reward_template = kwargs['reward_template'] with self._patch_dataloader(kwargs['data_collator']): new_kwargs = { k: v @@ -38,27 +38,9 @@ def __init__(self, model: PreTrainedModel, ref_model: PreTrainedModel, *_args, * } self.ppo_trainer_init( config=kwargs['args'], tokenizer=self.tokenizer, policy=model, ref_policy=ref_model, **new_kwargs) - patch_getattr(self.model.__class__, 'policy') - - @contextmanager - def patch_reward_model(self): - model_cls = self.reward_model.__class__ - forward = model_cls.forward - trainer = self - - def new_forward(self, input_ids, *args, **kwargs): - idx = (input_ids == 0).cumsum(dim=1)[:, -1] - trainer.template.tokenizer.batch_decode(input_ids) - - print(trainer) - return forward(self, input_ids, *args, **kwargs) - - model_cls.forward = new_forward - yield - model_cls.forward = forward - + unwrap_model = self.accelerator.unwrap_model(self.model) + patch_getattr(unwrap_model, 'policy') def train(self, *args, **kwargs): # remove args that are not needed for the HFPPOTrainer - with self.patch_reward_model(): - super().train() + super().train() diff --git a/swift/trainers/trainer_factory.py b/swift/trainers/trainer_factory.py index 480ca8287d..19c93a042b 100644 --- a/swift/trainers/trainer_factory.py +++ b/swift/trainers/trainer_factory.py @@ -56,4 +56,7 @@ def get_training_args(cls, args): if k not in parameters: args_dict.pop(k) + if 'ppo' in training_args_cls.__name__.lower(): + args_dict['world_size'] = args.global_world_size + return training_args_cls(**args_dict) diff --git a/tests/train/test_ppo.py b/tests/train/test_ppo.py index 073701d3fe..0a7c98022e 100644 --- a/tests/train/test_ppo.py +++ b/tests/train/test_ppo.py @@ -27,7 +27,7 @@ def test_ppo(): result = rlhf_main( RLHFArguments( rlhf_type='ppo', - model='Qwen/Qwen2.5-7B-Instruct', + model='LLM-Research/Meta-Llama-3.1-8B-Instruct', reward_model='AI-ModelScope/Skywork-Reward-Llama-3.1-8B-v0.2', dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], **kwargs)) From 102257bca9878b1d553379d0db770778bbc47acf Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 7 Jan 2025 10:59:47 +0800 Subject: [PATCH 37/47] update --- examples/deploy/lora/client.py | 2 +- examples/infer/demo_hf.py | 60 ++++++++++++++++++++++ examples/infer/demo_lora.py | 2 +- swift/llm/template/base.py | 2 +- tests/test_align/test_template/test_llm.py | 4 +- 5 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 examples/infer/demo_hf.py diff --git a/examples/deploy/lora/client.py b/examples/deploy/lora/client.py index e61caad8ae..ae66b10df0 100644 --- a/examples/deploy/lora/client.py +++ b/examples/deploy/lora/client.py @@ -23,5 +23,5 @@ def infer_multilora(engine: InferClient, infer_request: InferRequest): if __name__ == '__main__': engine = InferClient(host='127.0.0.1', port=8000) - infer_request = InferRequest(messages=[{'role': 'user', 'content': '你是谁'}]) + infer_request = InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}]) infer_multilora(engine, infer_request) diff --git a/examples/infer/demo_hf.py b/examples/infer/demo_hf.py new file mode 100644 index 0000000000..58959078f8 --- /dev/null +++ b/examples/infer/demo_hf.py @@ -0,0 +1,60 @@ +def infer_hf(): + from transformers import AutoModelForCausalLM, AutoTokenizer + from peft import PeftModel + from modelscope import snapshot_download + model_dir = snapshot_download('Qwen/Qwen2.5-7B-Instruct') + adapter_dir = snapshot_download('swift/test_lora') + model = AutoModelForCausalLM.from_pretrained(model_dir, torch_dtype='auto', device_map='auto') + model = PeftModel.from_pretrained(model, adapter_dir) + + tokenizer = AutoTokenizer.from_pretrained(model_dir) + + messages = [{ + 'role': 'system', + 'content': 'You are a helpful assistant.' + }, { + 'role': 'user', + 'content': 'who are you?' + }] + text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + model_inputs = tokenizer([text], return_tensors='pt').to(model.device) + + generated_ids = model.generate(**model_inputs, max_new_tokens=512, do_sample=False) + generated_ids = [ + output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) + ] + + response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] + print(f'response: {response}') + return response + + +def infer_swift(): + from swift.llm import get_model_tokenizer, get_template, InferRequest, RequestConfig, PtEngine + from modelscope import snapshot_download + from swift.tuners import Swift + model_dir = snapshot_download('Qwen/Qwen2.5-7B-Instruct') + adapter_dir = snapshot_download('swift/test_lora') + model, tokenizer = get_model_tokenizer(model_dir, device_map='auto') + model = Swift.from_pretrained(model, adapter_dir) + template = get_template(model.model_meta.template, tokenizer) + engine = PtEngine.from_model_template(model, template) + + messages = [{ + 'role': 'system', + 'content': 'You are a helpful assistant.' + }, { + 'role': 'user', + 'content': 'who are you?' + }] + request_config = RequestConfig(max_tokens=512, temperature=0) + resp_list = engine.infer([InferRequest(messages=messages)], request_config=request_config) + response = resp_list[0].choices[0].message.content + print(f'response: {response}') + return response + + +if __name__ == '__main__': + response = infer_hf() + response2 = infer_swift() + assert response == response2 diff --git a/examples/infer/demo_lora.py b/examples/infer/demo_lora.py index 7489d1c38a..8d9396f135 100644 --- a/examples/infer/demo_lora.py +++ b/examples/infer/demo_lora.py @@ -63,6 +63,6 @@ def infer_lora(infer_request: 'InferRequest'): from swift.llm import (PtEngine, RequestConfig, AdapterRequest, get_template, BaseArguments, InferRequest, safe_snapshot_download, get_model_tokenizer) from swift.tuners import Swift - infer_request = InferRequest(messages=[{'role': 'user', 'content': '你是谁'}]) + infer_request = InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}]) # infer_lora(infer_request) infer_multilora(infer_request, 'pt') diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py index a4b2aa7c1d..d2a2ae84a8 100644 --- a/swift/llm/template/base.py +++ b/swift/llm/template/base.py @@ -598,7 +598,7 @@ def _swift_encode(self, inputs: StdTemplateInputs): context_list = prompt.copy() extra_context_list = [] extra_context_type = None - if i < n_round - 1 or self.mode == 'seq_cls' and response is not None: + if i < n_round - 1: # Not the last round. context_list.append('{{RESPONSE}}') extra_context_list = template_meta.chat_sep diff --git a/tests/test_align/test_template/test_llm.py b/tests/test_align/test_template/test_llm.py index da54bbb017..0b29120dfe 100644 --- a/tests/test_align/test_template/test_llm.py +++ b/tests/test_align/test_template/test_llm.py @@ -215,7 +215,7 @@ def test_qwen2_reward(): res = _infer_model(pt_engine, messages=messages) pt_engine.default_template.template_backend = 'jinja' res2 = _infer_model(pt_engine, messages=messages) - assert res == res2 == '1.390625' + assert res == '1.84375' and res2 == '1.390625' # \n diff def test_qwen2_5_math(): @@ -239,7 +239,7 @@ def test_skywork_reward(): res = _infer_model(pt_engine, messages=messages) pt_engine.default_template.template_backend = 'jinja' res2 = _infer_model(pt_engine, messages=messages) - assert res == '14.1875' + assert res == '14.25' assert res2 == '13.8125' From db9bdc6ff1ef23d930ff84b1b3d0eedd64d875bc Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 7 Jan 2025 11:14:48 +0800 Subject: [PATCH 38/47] update --- examples/train/rlhf/ppo.sh | 5 +++-- swift/llm/argument/rlhf_args.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/train/rlhf/ppo.sh b/examples/train/rlhf/ppo.sh index a4341bea31..7983d2229b 100644 --- a/examples/train/rlhf/ppo.sh +++ b/examples/train/rlhf/ppo.sh @@ -12,7 +12,7 @@ swift rlhf \ --num_train_epochs 1 \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ - --learning_rate 1e-4 \ + --learning_rate 1e-5 \ --lora_rank 8 \ --lora_alpha 32 \ --target_modules all-linear \ @@ -25,4 +25,5 @@ swift rlhf \ --output_dir output \ --warmup_ratio 0.05 \ --dataloader_num_workers 4 \ - --deepspeed zero2 + --deepspeed zero2 \ + --response_length 512 diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py index 83620a9b3c..d707dfdf7f 100644 --- a/swift/llm/argument/rlhf_args.py +++ b/swift/llm/argument/rlhf_args.py @@ -27,7 +27,7 @@ class PPOArguments: num_mini_batches: int = 1 local_rollout_forward_batch_size: int = 64 num_sample_generations: int = 10 - response_length: int = 53 + response_length: int = 512 temperature: float = 0.7 missing_eos_penalty: Optional[float] = None From ee202e1dc5f817087a2baa3416a81b8ed36746c5 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 7 Jan 2025 11:40:12 +0800 Subject: [PATCH 39/47] update --- ...344\271\211\346\225\260\346\215\256\351\233\206.md" | 10 +++++++++- docs/source_en/Customization/Custom-dataset.md | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git "a/docs/source/Customization/\350\207\252\345\256\232\344\271\211\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Customization/\350\207\252\345\256\232\344\271\211\346\225\260\346\215\256\351\233\206.md" index df19d99391..9c63c6f080 100644 --- "a/docs/source/Customization/\350\207\252\345\256\232\344\271\211\346\225\260\346\215\256\351\233\206.md" +++ "b/docs/source/Customization/\350\207\252\345\256\232\344\271\211\346\225\260\346\215\256\351\233\206.md" @@ -53,7 +53,7 @@ query-response格式: ### RLHF -#### DPO/ORPO/CPO/SimPO/RM/PPO +#### DPO/ORPO/CPO/SimPO/RM ```jsonl {"messages": [{"role": "system", "content": "你是个有用无害的助手"}, {"role": "user", "content": "告诉我明天的天气"}, {"role": "assistant", "content": "明天天气晴朗"}], "rejected_response": "我不知道"} @@ -67,6 +67,14 @@ query-response格式: {"messages": [{"role": "system", "content": "你是个有用无害的数学计算器"}, {"role": "user", "content": "1+1等于几"}, {"role": "assistant", "content": "等于2"}, {"role": "user", "content": "再加1呢"}, {"role": "assistant", "content": "等于3"}], "label": true} ``` +#### PPO + +```jsonl +{"messages": [{"role": "system", "content": "你是个有用无害的助手"}, {"role": "user", "content": "告诉我明天的天气"}]} +{"messages": [{"role": "system", "content": "你是个有用无害的数学计算器"}, {"role": "user", "content": "1+1等于几"}, {"role": "assistant", "content": "等于2"}, {"role": "user", "content": "再加1呢"}]} +{"messages": [{"role": "user", "content": "你的名字是什么"}]} +``` + ### 序列分类 ```jsonl {"messages": [{"role": "user", "content": "今天天气真好呀"}], "label": 1} diff --git a/docs/source_en/Customization/Custom-dataset.md b/docs/source_en/Customization/Custom-dataset.md index 3bb38cfe3c..268b06dbc0 100644 --- a/docs/source_en/Customization/Custom-dataset.md +++ b/docs/source_en/Customization/Custom-dataset.md @@ -52,7 +52,7 @@ The following provides the recommended dataset format for ms-swift, where the sy ### RLHF -#### DPO/ORPO/CPO/SimPO/RM/PPO +#### DPO/ORPO/CPO/SimPO/RM ```jsonl {"messages": [{"role": "system", "content": "You are a useful and harmless assistant"}, {"role": "user", "content": "Tell me tomorrow's weather"}, {"role": "assistant", "content": "Tomorrow's weather will be sunny"}], "rejected_response": "I don't know"} @@ -66,6 +66,14 @@ The following provides the recommended dataset format for ms-swift, where the sy {"messages": [{"role": "system", "content": "You are a useful and harmless math calculator"}, {"role": "user", "content": "What is 1 + 1?"}, {"role": "assistant", "content": "It equals 2"}, {"role": "user", "content": "What about adding 1?"}, {"role": "assistant", "content": "It equals 3"}], "label": true} ``` +#### PPO + +```jsonl +{"messages": [{"role": "system", "content": "You are a useful and harmless assistant"}, {"role": "user", "content": "Tell me tomorrow's weather"}]} +{"messages": [{"role": "system", "content": "You are a useful and harmless math calculator"}, {"role": "user", "content": "What is 1 + 1?"}, {"role": "assistant", "content": "It equals 2"}, {"role": "user", "content": "What about adding 1?"}]} +{"messages": [{"role": "user", "content": "What is your name?"}]} +``` + ### Sequence Classification ```jsonl {"messages": [{"role": "user", "content": "The weather is really nice today"}], "label": 1} From 455fbd5142ba5259eebbc68563ccdcfd768c4b6c Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 7 Jan 2025 11:44:51 +0800 Subject: [PATCH 40/47] fix --- swift/llm/argument/rlhf_args.py | 1 - 1 file changed, 1 deletion(-) diff --git a/swift/llm/argument/rlhf_args.py b/swift/llm/argument/rlhf_args.py index d707dfdf7f..8ddd396f53 100644 --- a/swift/llm/argument/rlhf_args.py +++ b/swift/llm/argument/rlhf_args.py @@ -3,7 +3,6 @@ from typing import List, Literal, Optional from swift.llm import MODEL_MAPPING -from ..template import get_template_meta from .train_args import TrainArguments From 5789bb920536e78b1bf7405c45e9b4cbb20c9e9b Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 7 Jan 2025 13:12:42 +0800 Subject: [PATCH 41/47] update --- ...44\350\241\214\345\217\202\346\225\260.md" | 23 ++++++++++++- .../Instruction/Command-line-parameters.md | 34 ++++++++++++++++--- examples/train/rlhf/ppo.sh | 4 +-- swift/plugin/loss_scale.py | 7 ++-- 4 files changed, 57 insertions(+), 11 deletions(-) diff --git "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" index 0946491270..e8769a1a6c 100644 --- "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" +++ "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" @@ -50,7 +50,7 @@ - 🔥max_pixels: 多模态模型图片前处理的最大像素数(H\*W),默认不缩放。 - tools_prompt: 智能体训练时的工具列表转为system的格式,请参考[智能体训练](./智能体的支持.md),默认为'react_en' - padding_side: 当训练`batch_size>=2`时的padding_side,可选值为'left', 'right',默认为'right'。(`generate`的batch_size>=2时,只进行左padding) -- loss_scale: 如何针对训练添加token的loss权重。默认为`'default'`,代表所有response(含history)以1计算交叉熵损失。具体可以查看[插件化](../Customization/插件化.md)和[智能体训练](./智能体的支持.md) +- loss_scale: 如何针对训练添加token的loss权重。默认为`'default'`,代表所有response(含history)以1计算交叉熵损失。可选值为'default', 'last_round', 'all', 以及agent需要的loss_scale: 'react', 'agentflan', 'alpha_umi', 'qwen'。具体可以查看[插件化](../Customization/插件化.md)和[智能体训练](./智能体的支持.md) - sequence_parallel_size: 序列并行数量。参考[example](https://github.com/modelscope/ms-swift/tree/main/examples/train/sequence_parallel/train.sh) - use_chat_template: 使用chat模板或generation模板,默认为`True`。`swift pt`会自动设置为generation模板 - template_backend: 使用swift或jinja进行推理。如果使用jinja,则使用transformers的`apply_chat_template`。默认为swift @@ -324,6 +324,27 @@ RLHF参数继承于[训练参数](#训练参数) - desirable_weight: KTO算法中对desirable response的loss权重 $\lambda_D$ ,默认为`1.` - undesirable_weight: KTO论文中对undesirable response的loss权重 $\lambda_U$ , 默认为`1.` +#### PPO参数 +- reward_model: 默认为None +- reward_adapters: 默认为`[]` +- reward_model_type: 默认为None +- reward_model_revision: 默认为None + +以下参数含义可以参考[这里](https://huggingface.co/docs/trl/main/ppo_trainer) +- num_ppo_epochs: 默认为4 +- whiten_rewards: 默认为False +- kl_coef: 默认为0.05 +- cliprange: 默认为0.2 +- vf_coef: 默认为0.1 +- cliprange_value: 默认为0.2 +- gamma: 默认为1.0 +- lam: 默认为0.95 +- num_mini_batches: 默认为1 +- local_rollout_forward_batch_size: 默认为64 +- num_sample_generations: 默认为10 +- response_length: 默认为512 +- temperature: 默认为0.7 +- missing_eos_penalty: 默认为None ### 推理参数 diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md index 454b9a0984..233a51861f 100644 --- a/docs/source_en/Instruction/Command-line-parameters.md +++ b/docs/source_en/Instruction/Command-line-parameters.md @@ -50,7 +50,7 @@ The introduction to command line parameters will cover base arguments, atomic ar - 🔥max_pixels: Maximum pixel count for pre-processing images in multimodal models (H*W), default is no scaling. - tools_prompt: The list of tools for agent training converted to system format, refer to [Agent Training](./Agent-support.md), default is 'react_en'. - padding_side: The padding_side used when training with `batch_size >= 2`, with optional values of 'left' and 'right', defaulting to 'right'. (When the batch_size in `generate` is >= 2, only left padding is applied.) -- loss_scale: How to add token loss weight during training. Default is `'default'`, meaning all responses (including history) are treated as 1 for cross-entropy loss. For specifics, see [Pluginization](../Customization/Pluginization.md) and [Agent Training](./Agent-support.md). +- loss_scale: How to add token loss weight during training. Default is `'default'`, meaning all responses (including history) are treated as 1 for cross-entropy loss. The optional values are 'default', 'last_round', 'all', and the loss scale required by the agent: 'react', 'agentflan', 'alpha_umi', 'qwen'. For specifics, see [Pluginization](../Customization/Pluginization.md) and [Agent Training](./Agent-support.md). - sequence_parallel_size: Number of sequence parallelism. Refer to [example](https://github.com/modelscope/ms-swift/tree/main/examples/train/sequence_parallel/train.sh). - use_chat_template: Use chat template or generation template, default is `True`. `swift pt` is automatically set to the generation template. - template_backend: Use swift or jinja for inference. If using jinja, it will utilize transformers' `apply_chat_template`. Default is swift. @@ -318,16 +318,40 @@ RLHF arguments inherit from the [training arguments](#training-arguments). - 🔥beta: KL regularization term coefficient, default is `None`, i.e., for `simpo` algorithm default is `2.`, for other algorithms default is `0.1`. Refer to the [documentation](./Human-alignment.md) for specifics. - label_smoothing: Whether to use DPO smoothing, default value is `0`, generally set between 0~0.5. -- + - 🔥rpo_alpha: Weight for adding sft_loss in DPO, default is `1`. The final loss is `KL_loss + rpo_alpha * sft_loss`. -- + - cpo_alpha: The coefficient of nll loss in CPO/SimPO loss, default is `1.`. -- + - simpo_gamma: Reward margin term in SimPO algorithm, recommended to set between 0.5-1.5 in the paper, default is `1.`. -- + - desirable_weight: Loss weight for desirable response in KTO algorithm $\lambda_D$, default is `1.`. - undesirable_weight: Loss weight for undesirable response in KTO paper $\lambda_U$, default is `1.`. +#### PPO Arguments + +- reward_model: Defaults to None +- reward_adapters: Defaults to `[]` +- reward_model_type: Defaults to None +- reward_model_revision: Defaults to None + +The meanings of the following parameters can be referenced [here](https://huggingface.co/docs/trl/main/ppo_trainer): + +- num_ppo_epochs: Defaults to 4 +- whiten_rewards: Defaults to False +- kl_coef: Defaults to 0.05 +- cliprange: Defaults to 0.2 +- vf_coef: Defaults to 0.1 +- cliprange_value: Defaults to 0.2 +- gamma: Defaults to 1.0 +- lam: Defaults to 0.95 +- num_mini_batches: Defaults to 1 +- local_rollout_forward_batch_size: Defaults to 64 +- num_sample_generations: Defaults to 10 +- response_length: Defaults to 512 +- temperature: Defaults to 0.7 +- missing_eos_penalty: Defaults to None + ### Inference Arguments Inference arguments include the [base arguments](#base-arguments), [merge arguments](#merge-arguments), [vLLM arguments](#vllm-arguments), [LMDeploy arguments](#LMDeploy-arguments), and also contain the following: diff --git a/examples/train/rlhf/ppo.sh b/examples/train/rlhf/ppo.sh index 7983d2229b..4410b1609e 100644 --- a/examples/train/rlhf/ppo.sh +++ b/examples/train/rlhf/ppo.sh @@ -1,6 +1,6 @@ -nproc_per_node=2 +nproc_per_node=4 -CUDA_VISIBLE_DEVICES=0,1 \ +CUDA_VISIBLE_DEVICES=0,1,2,3 \ NPROC_PER_NODE=$nproc_per_node \ swift rlhf \ --rlhf_type ppo \ diff --git a/swift/plugin/loss_scale.py b/swift/plugin/loss_scale.py index 275d2e0e4b..21733dfabe 100644 --- a/swift/plugin/loss_scale.py +++ b/swift/plugin/loss_scale.py @@ -180,11 +180,12 @@ def get_loss_scale(self, context: str, context_type: ContextType, *args, **kwarg # Add your loss scale here, use --loss_scale xxx to train loss_scale_map = { + 'last_round': LastRoundLossScale(), + 'default': LossScale(), + 'all': TrainAllLossScale(), + # agent 'agentflan': AgentFlanLossScale(), 'react': REACTLossScale(), 'alpha_umi': AlphaUmiLossScale(), - 'default': LossScale(), - 'last_round': LastRoundLossScale(), 'qwen': QwenLossScale(), - 'all': TrainAllLossScale(), } From 813dadf163b258355c87a2681bb3b5a85c5741b8 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 7 Jan 2025 13:14:31 +0800 Subject: [PATCH 42/47] update --- examples/train/rlhf/ppo.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/train/rlhf/ppo.sh b/examples/train/rlhf/ppo.sh index 4410b1609e..86f93d9348 100644 --- a/examples/train/rlhf/ppo.sh +++ b/examples/train/rlhf/ppo.sh @@ -1,3 +1,4 @@ +# Currently, it only supports the case where the model and reward_model use the same template/tokenizer. nproc_per_node=4 CUDA_VISIBLE_DEVICES=0,1,2,3 \ From 828996b82d3813834543b9d27c57e75e10e7980a Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 7 Jan 2025 13:17:26 +0800 Subject: [PATCH 43/47] fix --- swift/llm/train/rlhf.py | 2 +- swift/trainers/rlhf_trainer/ppo_trainer.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/swift/llm/train/rlhf.py b/swift/llm/train/rlhf.py index 37e8d9903c..feffd4e65c 100644 --- a/swift/llm/train/rlhf.py +++ b/swift/llm/train/rlhf.py @@ -1,7 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from typing import List, Union -from swift.utils import get_logger, get_model_parameter_info, patch_getattr +from swift.utils import get_logger, get_model_parameter_info from ..argument import RLHFArguments from .kto import prepare_kto_dataset from .sft import SwiftSft diff --git a/swift/trainers/rlhf_trainer/ppo_trainer.py b/swift/trainers/rlhf_trainer/ppo_trainer.py index 19f8328b46..c31ae5b7eb 100644 --- a/swift/trainers/rlhf_trainer/ppo_trainer.py +++ b/swift/trainers/rlhf_trainer/ppo_trainer.py @@ -1,19 +1,18 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from contextlib import contextmanager -import torch from torch.utils.data import DataLoader from transformers import PreTrainedModel from trl import PPOv2Trainer as HFPPOv2Trainer from swift.utils import patch_getattr from ..mixin import SwiftMixin -from .rlhf_mixin import RLHFTrainerMixin + +ppo_trainer_init = HFPPOv2Trainer.__init__ +del HFPPOv2Trainer.__init__ class PPOTrainer(SwiftMixin, HFPPOv2Trainer): - ppo_trainer_init = HFPPOv2Trainer.__init__ - del HFPPOv2Trainer.__init__ @staticmethod @contextmanager @@ -36,7 +35,7 @@ def __init__(self, model: PreTrainedModel, ref_model: PreTrainedModel, *_args, * for k, v in kwargs.items() if k in ['train_dataset', 'data_collator', 'reward_model', 'value_model', 'eval_dataset'] } - self.ppo_trainer_init( + ppo_trainer_init( config=kwargs['args'], tokenizer=self.tokenizer, policy=model, ref_policy=ref_model, **new_kwargs) unwrap_model = self.accelerator.unwrap_model(self.model) patch_getattr(unwrap_model, 'policy') From 0486eab8e5068140f1ac385e072cdc795647a19c Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 7 Jan 2025 13:24:48 +0800 Subject: [PATCH 44/47] update --- swift/trainers/rlhf_trainer/ppo_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/trainers/rlhf_trainer/ppo_trainer.py b/swift/trainers/rlhf_trainer/ppo_trainer.py index c31ae5b7eb..1196d5b06c 100644 --- a/swift/trainers/rlhf_trainer/ppo_trainer.py +++ b/swift/trainers/rlhf_trainer/ppo_trainer.py @@ -36,7 +36,7 @@ def __init__(self, model: PreTrainedModel, ref_model: PreTrainedModel, *_args, * if k in ['train_dataset', 'data_collator', 'reward_model', 'value_model', 'eval_dataset'] } ppo_trainer_init( - config=kwargs['args'], tokenizer=self.tokenizer, policy=model, ref_policy=ref_model, **new_kwargs) + self, config=kwargs['args'], tokenizer=self.tokenizer, policy=model, ref_policy=ref_model, **new_kwargs) unwrap_model = self.accelerator.unwrap_model(self.model) patch_getattr(unwrap_model, 'policy') From 2e98d6aedc69c93e3b82b23ecc10ae9b847316ea Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 7 Jan 2025 14:12:35 +0800 Subject: [PATCH 45/47] fix --- swift/trainers/__init__.py | 10 ++++----- swift/trainers/arguments.py | 37 -------------------------------- swift/trainers/rlhf_arguments.py | 28 ++++++++++++++++++++++++ tests/train/test_ppo.py | 4 ++-- 4 files changed, 34 insertions(+), 45 deletions(-) create mode 100644 swift/trainers/rlhf_arguments.py diff --git a/swift/trainers/__init__.py b/swift/trainers/__init__.py index 2e57e64de2..da7ab951cc 100644 --- a/swift/trainers/__init__.py +++ b/swift/trainers/__init__.py @@ -15,10 +15,10 @@ ShardedDDPOption = None if TYPE_CHECKING: - from .arguments import (Seq2SeqTrainingArguments, TrainingArguments, DPOConfig, CPOConfig, KTOConfig, ORPOConfig, - PPOConfig, RewardConfig) + from .arguments import Seq2SeqTrainingArguments, TrainingArguments from .rlhf_trainer import (CPOTrainer, DPOTrainer, KTOTrainer, ORPOTrainer, RLHFTrainerMixin, PPOTrainer, RewardTrainer) + from .rlhf_arguments import DPOConfig, CPOConfig, KTOConfig, ORPOConfig, PPOConfig, RewardConfig from .trainer_factory import TrainerFactory from .trainers import Seq2SeqTrainer, Trainer from .mixin import SwiftMixin @@ -26,10 +26,8 @@ else: _extra_objects = {k: v for k, v in globals().items() if not k.startswith('_')} _import_structure = { - 'arguments': [ - 'Seq2SeqTrainingArguments', 'TrainingArguments', 'DPOConfig', 'CPOConfig', 'KTOConfig', 'ORPOConfig', - 'PPOConfig', 'RewardConfig' - ], + 'arguments': ['Seq2SeqTrainingArguments', 'TrainingArguments'], + 'rlhf_arguments': ['DPOConfig', 'CPOConfig', 'KTOConfig', 'ORPOConfig', 'PPOConfig', 'RewardConfig'], 'rlhf_trainer': ['CPOTrainer', 'DPOTrainer', 'KTOTrainer', 'ORPOTrainer', 'RLHFTrainerMixin', 'PPOTrainer', 'RewardTrainer'], 'trainer_factory': ['TrainerFactory'], diff --git a/swift/trainers/arguments.py b/swift/trainers/arguments.py index f107b41aff..809d42b85b 100644 --- a/swift/trainers/arguments.py +++ b/swift/trainers/arguments.py @@ -76,40 +76,3 @@ class TrainingArguments(SwiftArgumentsMixin, HfTrainingArguments): @dataclass class Seq2SeqTrainingArguments(SwiftArgumentsMixin, HfSeq2SeqTrainingArguments): pass - - -try: - from trl import (DPOConfig as HfDPOConfig, CPOConfig as HfCPOConfig, ORPOConfig as HfORPOConfig, KTOConfig as - HfKTOConfig, RewardConfig as HfRewardConfig, PPOv2Config as HfPPOv2Config) - - @dataclass - class DPOConfig(SwiftArgumentsMixin, HfDPOConfig): - pass - - @dataclass - class CPOConfig(SwiftArgumentsMixin, HfCPOConfig): - pass - - @dataclass - class ORPOConfig(SwiftArgumentsMixin, HfORPOConfig): - pass - - @dataclass - class KTOConfig(SwiftArgumentsMixin, HfKTOConfig): - pass - - @dataclass - class RewardConfig(SwiftArgumentsMixin, HfRewardConfig): - pass - - @dataclass - class PPOConfig(SwiftArgumentsMixin, HfPPOv2Config): - pass - -except (ImportError, RuntimeError): - DPOConfig = None - CPOConfig = None - ORPOConfig = None - KTOConfig = None - RewardConfig = None - PPOConfig = None diff --git a/swift/trainers/rlhf_arguments.py b/swift/trainers/rlhf_arguments.py new file mode 100644 index 0000000000..a9309bb201 --- /dev/null +++ b/swift/trainers/rlhf_arguments.py @@ -0,0 +1,28 @@ +from trl import (DPOConfig as HfDPOConfig, CPOConfig as HfCPOConfig, ORPOConfig as HfORPOConfig, KTOConfig as + HfKTOConfig, RewardConfig as HfRewardConfig, PPOv2Config as HfPPOv2Config) + +from .arguments import SwiftArgumentsMixin + +@dataclass +class DPOConfig(SwiftArgumentsMixin, HfDPOConfig): + pass + +@dataclass +class CPOConfig(SwiftArgumentsMixin, HfCPOConfig): + pass + +@dataclass +class ORPOConfig(SwiftArgumentsMixin, HfORPOConfig): + pass + +@dataclass +class KTOConfig(SwiftArgumentsMixin, HfKTOConfig): + pass + +@dataclass +class RewardConfig(SwiftArgumentsMixin, HfRewardConfig): + pass + +@dataclass +class PPOConfig(SwiftArgumentsMixin, HfPPOv2Config): + pass \ No newline at end of file diff --git a/tests/train/test_ppo.py b/tests/train/test_ppo.py index 0a7c98022e..4ad3180502 100644 --- a/tests/train/test_ppo.py +++ b/tests/train/test_ppo.py @@ -27,8 +27,8 @@ def test_ppo(): result = rlhf_main( RLHFArguments( rlhf_type='ppo', - model='LLM-Research/Meta-Llama-3.1-8B-Instruct', - reward_model='AI-ModelScope/Skywork-Reward-Llama-3.1-8B-v0.2', + model='LLM-Research/Llama-3.2-1B-Instruct', + reward_model='AI-ModelScope/GRM-Llama3.2-3B-rewardmodel-ft', dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], **kwargs)) last_model_checkpoint = result['last_model_checkpoint'] From 6c2b6826d7b777bc8168a26b90ff08419124c896 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 7 Jan 2025 14:16:52 +0800 Subject: [PATCH 46/47] fix --- swift/trainers/rlhf_arguments.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/swift/trainers/rlhf_arguments.py b/swift/trainers/rlhf_arguments.py index a9309bb201..9db0541522 100644 --- a/swift/trainers/rlhf_arguments.py +++ b/swift/trainers/rlhf_arguments.py @@ -1,28 +1,40 @@ -from trl import (DPOConfig as HfDPOConfig, CPOConfig as HfCPOConfig, ORPOConfig as HfORPOConfig, KTOConfig as - HfKTOConfig, RewardConfig as HfRewardConfig, PPOv2Config as HfPPOv2Config) +from dataclasses import dataclass + +from trl import CPOConfig as HfCPOConfig +from trl import DPOConfig as HfDPOConfig +from trl import KTOConfig as HfKTOConfig +from trl import ORPOConfig as HfORPOConfig +from trl import PPOv2Config as HfPPOv2Config +from trl import RewardConfig as HfRewardConfig from .arguments import SwiftArgumentsMixin + @dataclass class DPOConfig(SwiftArgumentsMixin, HfDPOConfig): pass + @dataclass class CPOConfig(SwiftArgumentsMixin, HfCPOConfig): pass + @dataclass class ORPOConfig(SwiftArgumentsMixin, HfORPOConfig): pass + @dataclass class KTOConfig(SwiftArgumentsMixin, HfKTOConfig): pass + @dataclass class RewardConfig(SwiftArgumentsMixin, HfRewardConfig): pass + @dataclass class PPOConfig(SwiftArgumentsMixin, HfPPOv2Config): - pass \ No newline at end of file + pass From c592ac0871022ef6f4f839ed002fa917fce9a6d4 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 7 Jan 2025 14:43:44 +0800 Subject: [PATCH 47/47] update --- swift/llm/template/template/mplug.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/swift/llm/template/template/mplug.py b/swift/llm/template/template/mplug.py index 4e25652257..9882cd3388 100644 --- a/swift/llm/template/template/mplug.py +++ b/swift/llm/template/template/mplug.py @@ -97,7 +97,7 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: if images: image_inputs = processor.image_processor(images, cut_enable=cut_enable, return_tensors='pt') added_tokens_len = 0 - cut_shapes = image_inputs['cut_shape'] or [None] * len(idx_list) + cut_shapes = image_inputs['cut_shape'] or [None] * 2 * len(idx_list) image_token_list = self.processor.encode('<|image|>', add_special_tokens=False) for idx, cut_shape in zip(idx_list, cut_shapes[::2]): if cut_shape: @@ -161,6 +161,8 @@ def _post_encode(self, model: nn.Module, inputs: Dict[str, Any]) -> Dict[str, An if 'pixel_values' in inputs: pixel_values = inputs.pop('pixel_values') inputs['image_embeds'] = torch.concat([model.forward_image(pv) for pv in pixel_values]) + else: + inputs['media_offset'] = [None] * inputs['input_ids'].shape[0] return inputs