diff --git "a/docs/source/Customization/\350\207\252\345\256\232\344\271\211\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Customization/\350\207\252\345\256\232\344\271\211\346\225\260\346\215\256\351\233\206.md" index 20da62d448..93cc2d78c4 100644 --- "a/docs/source/Customization/\350\207\252\345\256\232\344\271\211\346\225\260\346\215\256\351\233\206.md" +++ "b/docs/source/Customization/\350\207\252\345\256\232\344\271\211\346\225\260\346\215\256\351\233\206.md" @@ -88,7 +88,7 @@ alpaca格式: > 注: RM 额外支持 margin 列,参考[RM文档](../Instruction/人类对齐.md#rm) -当然,你也可以直接使用`rejected_messages`,而不是只提供`rejected_response`/`rejected_images`(需ms-swift>=3.8),这将提供更大的灵活度(例如多模态/agent场景)。在多模态场景下,若使用rejected_messages,你需要额外传入"rejected_images","rejected_audios","rejected_videos"等内容。数据格式例子如下: +当然,你也可以直接使用`rejected_messages`,而不是只提供`rejected_response`/`rejected_images`(需ms-swift>=3.8),这将提供更大的灵活度(例如多模态/agent场景)。若使用rejected_messages,在多模态场景下,你需要额外传入"rejected_images","rejected_audios","rejected_videos"等内容;在Agent场景下,你需要额外传入"rejected_tools"等内容。多模态数据格式例子如下: ```jsonl {"messages": [{"role": "user", "content": "这是什么"}, {"role": "assistant", "content": "这是一只小猫咪。"}], "images": ["cat.png"], "rejected_messages": [{"role": "user", "content": "这是什么"}, {"role": "assistant", "content": "这是一只小狗。"}], "rejected_images": ["cat.png"]} diff --git a/docs/source_en/Customization/Custom-dataset.md b/docs/source_en/Customization/Custom-dataset.md index 198db5ea6b..c20e0e1437 100644 --- a/docs/source_en/Customization/Custom-dataset.md +++ b/docs/source_en/Customization/Custom-dataset.md @@ -88,7 +88,7 @@ The format of multimodal data should follow the specifications in [Multimodal Da > Note: RM additionally supports the margin column. For details, refer to the [RM documentation](../Instruction/RLHF.md#rm). -Sure, you can also directly use `rejected_messages` instead of only providing `rejected_response` / `rejected_images` (requires ms-swift>=3.8), which offers greater flexibility (e.g., for multimodal or agent scenarios). In multimodal cases, if you use `rejected_messages`, you need to additionally provide fields such as `"rejected_images"`, `"rejected_audios"`, `"rejected_videos"`, etc. An example of the data format is as follows: +Sure, you can also directly use `rejected_messages` instead of only providing `rejected_response` / `rejected_images` (requires ms-swift>=3.8), which offers greater flexibility (e.g., for multimodal or agent scenarios). If you use "rejected_messages", then in multimodal scenarios you must also provide "rejected_images", "rejected_audios", "rejected_videos", etc.; in Agent scenarios you must also provide "rejected_tools", etc. An example of the multimodal data format is as follows: ```jsonl {"messages": [{"role": "user", "content": "What is this?"}, {"role": "assistant", "content": "This is a kitten."}], "images": ["kitten.png"], "rejected_messages": [{"role": "user", "content": "What is this?"}, {"role": "assistant", "content": "This is a puppy."}], "rejected_images": ["kitten.png"]} diff --git a/swift/llm/model/model/qwen.py b/swift/llm/model/model/qwen.py index d1352a6a13..1682aecdc8 100644 --- a/swift/llm/model/model/qwen.py +++ b/swift/llm/model/model/qwen.py @@ -6,6 +6,7 @@ from transformers import AutoConfig, AutoTokenizer, BitsAndBytesConfig, PreTrainedTokenizerBase from transformers.dynamic_module_utils import get_class_from_dynamic_module from transformers.models.auto.tokenization_auto import get_tokenizer_config +from transformers.utils.versions import require_version from swift.llm import TemplateType from swift.utils import get_device_count, get_dist_setting, get_env_args, get_logger @@ -714,6 +715,7 @@ def get_model_tokenizer_qwen2_vl(*args, **kwargs): patch_get_input_embeddings(base_model.visual, 'patch_embed') from qwen_vl_utils import vision_process + require_version('qwen_vl_utils<0.0.12') global_vars = patch_qwen_vl_utils(vision_process) tokenizer.global_vars = global_vars # In order to have different hashes for the template. return model, tokenizer diff --git a/swift/megatron/trainers/dpo_trainer.py b/swift/megatron/trainers/dpo_trainer.py index daa1847b74..76a5ca9276 100644 --- a/swift/megatron/trainers/dpo_trainer.py +++ b/swift/megatron/trainers/dpo_trainer.py @@ -57,6 +57,7 @@ def setup_model_and_optimizer(self, model_provider_func, model_type, *_args, **k def _forward_step_helper(model, inputs): args = get_args() if mpu.is_pipeline_first_stage(): + assert args.padding_free, 'Currently `rlhf_type="dpo"` only supports padding_free.' micro_batch_size = 1 # use qkv_format 'thd' seq_length = inputs['input_ids'].shape[1] if args.sequence_parallel: