Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ RLHF:
CUDA_VISIBLE_DEVICES=0 swift rlhf \
--rlhf_type dpo \
--model Qwen/Qwen2.5-7B-Instruct \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji:en \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
--train_type lora \
--output_dir output \
...
Expand Down
2 changes: 1 addition & 1 deletion README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ RLHF:
CUDA_VISIBLE_DEVICES=0 swift rlhf \
--rlhf_type dpo \
--model Qwen/Qwen2.5-7B-Instruct \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji:zh \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
--train_type lora \
--output_dir output \
...
Expand Down
1 change: 1 addition & 0 deletions docs/source/Instruction/命令行参数.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
- num_beams: beam search的并行保留数量,默认为1
- 🔥stream: 流式输出,默认为`False`
- stop_words: 额外的停止词,默认为`[]`
- logprobs: 是否输出logprobs,默认为False

### 量化参数
以下为拉起模型时量化的参数,具体含义可以查看[量化](https://huggingface.co/docs/transformers/main/en/main_classes/quantization)文档。这里不包含`swift export`中涉及的`gptq``awq`量化参数
Expand Down
1 change: 1 addition & 0 deletions docs/source_en/Instruction/Command-line-parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Refer to the [generation_config](https://huggingface.co/docs/transformers/main_c
- num_beams: Number of beams for beam search, default is 1.
- 🔥stream: Stream output, default is `False`.
- stop_words: Additional stop words, default is `[]`.
- logprobs: Whether to output logprobs, default is False.

### Quantization Arguments

Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# 4*32GiB
# 4*50GiB
# You can refer to `https://github.com/QwenLM/Qwen2-VL` for the meaning of the `MAX_PIXELS` parameter.
# --rlhf_type cpo/orpo/simpo/rm/kto are also supported
nproc_per_node=4
# --rlhf_type cpo/orpo/simpo are also supported
nproc_per_node=2

CUDA_VISIBLE_DEVICES=0,1,2,3 \
CUDA_VISIBLE_DEVICES=0,1 \
NPROC_PER_NODE=$nproc_per_node \
MAX_PIXELS=1003520 \
swift rlhf \
--rlhf_type dpo \
--model Qwen/Qwen2-VL-7B-Instruct \
--dataset swift/RLAIF-V-Dataset \
--dataset 'swift/RLAIF-V-Dataset#20000' \
--train_type lora \
--torch_dtype bfloat16 \
--num_train_epochs 1 \
Expand All @@ -24,7 +24,7 @@ swift rlhf \
--eval_steps 100 \
--save_steps 100 \
--save_total_limit 5 \
--deepspeed zero3 \
--deepspeed zero2 \
--logging_steps 5 \
--max_length 2048 \
--output_dir output \
Expand Down
31 changes: 31 additions & 0 deletions examples/train/multimodal/rlhf/kto.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Due to the absence of a multi-modal open-source dataset for kto,
# we will use a pure text kto dataset as an example here.
nproc_per_node=2

CUDA_VISIBLE_DEVICES=0,1 \
NPROC_PER_NODE=$nproc_per_node \
MAX_PIXELS=1003520 \
swift rlhf \
--rlhf_type kto \
--model Qwen/Qwen2-VL-7B-Instruct \
--dataset 'AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#10000' \
--train_type lora \
--torch_dtype bfloat16 \
--num_train_epochs 1 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--learning_rate 1e-4 \
--lora_rank 8 \
--lora_alpha 32 \
--target_modules all-linear \
--freeze_vit true \
--gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
--eval_steps 100 \
--save_steps 100 \
--save_total_limit 5 \
--deepspeed zero2 \
--logging_steps 5 \
--max_length 2048 \
--output_dir output \
--warmup_ratio 0.05 \
--dataloader_num_workers 4
17 changes: 12 additions & 5 deletions examples/train/rlhf/cpo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,22 @@ swift rlhf \
--rlhf_type cpo \
--model Qwen/Qwen2.5-7B-Instruct \
--train_type lora \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji:zh \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
--torch_dtype bfloat16 \
--num_train_epochs 1 \
--weight_decay 0.1 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--learning_rate 1e-4 \
--lora_rank 8 \
--lora_alpha 32 \
--target_modules all-linear \
--gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
--gradient_checkpointing_kwargs '{"use_reentrant": false}' \
--eval_steps 100 \
--save_steps 100 \
--save_total_limit 2 \
--logging_steps 5
--save_total_limit 5 \
--logging_steps 5 \
--max_length 2048 \
--output_dir output \
--warmup_ratio 0.05 \
--dataloader_num_workers 4 \
--deepspeed zero2
16 changes: 12 additions & 4 deletions examples/train/rlhf/dpo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,22 @@ swift rlhf \
--rlhf_type dpo \
--model Qwen/Qwen2.5-7B-Instruct \
--train_type lora \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji:zh \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
--torch_dtype bfloat16 \
--num_train_epochs 1 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--learning_rate 1e-4 \
--lora_rank 8 \
--lora_alpha 32 \
--target_modules all-linear \
--gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
--gradient_checkpointing_kwargs '{"use_reentrant": false}' \
--eval_steps 100 \
--save_steps 100 \
--save_total_limit 2 \
--logging_steps 5
--save_total_limit 5 \
--logging_steps 5 \
--max_length 2048 \
--output_dir output \
--warmup_ratio 0.05 \
--dataloader_num_workers 4 \
--deepspeed zero2
19 changes: 13 additions & 6 deletions examples/train/rlhf/kto.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,26 @@
nproc_per_node=4
nproc_per_node=2

CUDA_VISIBLE_DEVICES=0,1,2,3 \
CUDA_VISIBLE_DEVICES=0,1 \
NPROC_PER_NODE=$nproc_per_node \
swift rlhf \
--rlhf_type kto \
--model Qwen/Qwen2.5-7B-Instruct \
--train_type lora \
--dataset 'AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#10000' \
--num_train_epochs 2 \
--num_train_epochs 1 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--learning_rate 1e-4 \
--lora_rank 8 \
--lora_alpha 32 \
--target_modules all-linear \
--gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
--gradient_checkpointing_kwargs '{"use_reentrant": false}' \
--eval_steps 100 \
--save_steps 100 \
--save_total_limit 2 \
--logging_steps 5
--save_total_limit 5 \
--logging_steps 5 \
--max_length 2048 \
--output_dir output \
--warmup_ratio 0.05 \
--dataloader_num_workers 4 \
--deepspeed zero2
16 changes: 12 additions & 4 deletions examples/train/rlhf/orpo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,22 @@ swift rlhf \
--rlhf_type orpo \
--model Qwen/Qwen2.5-7B-Instruct \
--train_type lora \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji:zh \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
--torch_dtype bfloat16 \
--num_train_epochs 1 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--learning_rate 1e-4 \
--lora_rank 8 \
--lora_alpha 32 \
--target_modules all-linear \
--gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
--gradient_checkpointing_kwargs '{"use_reentrant": false}' \
--eval_steps 100 \
--save_steps 100 \
--save_total_limit 2 \
--logging_steps 5
--save_total_limit 5 \
--logging_steps 5 \
--max_length 2048 \
--output_dir output \
--warmup_ratio 0.05 \
--dataloader_num_workers 4 \
--deepspeed zero2
16 changes: 12 additions & 4 deletions examples/train/rlhf/rm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,22 @@ swift rlhf \
--rlhf_type rm \
--model Qwen/Qwen2.5-7B-Instruct \
--train_type lora \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji:zh \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
--torch_dtype bfloat16 \
--num_train_epochs 1 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--learning_rate 1e-4 \
--lora_rank 8 \
--lora_alpha 32 \
--target_modules all-linear \
--gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
--gradient_checkpointing_kwargs '{"use_reentrant": false}' \
--eval_steps 100 \
--save_steps 100 \
--save_total_limit 2 \
--logging_steps 5
--save_total_limit 5 \
--logging_steps 5 \
--max_length 2048 \
--output_dir output \
--warmup_ratio 0.05 \
--dataloader_num_workers 4 \
--deepspeed zero2
19 changes: 13 additions & 6 deletions examples/train/rlhf/simpo.sh
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
# 2*50GB
nproc_per_node=2

CUDA_VISIBLE_DEVICES=0,1 \
NPROC_PER_NODE=$nproc_per_node \
swift rlhf \
--rlhf_type simpo \
--model Qwen/Qwen2.5-7B-Instruct \
--model Qwen/Qwen2.5-3B-Instruct \
--train_type full \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji:zh \
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
--torch_dtype bfloat16 \
--num_train_epochs 1 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--learning_rate 1e-5 \
--gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
--warmup_ratio 0.03 \
--eval_steps 100 \
--save_steps 100 \
--save_total_limit 2 \
--deepspeed zero3 \
--logging_steps 5
--save_total_limit 5 \
--logging_steps 5 \
--max_length 2048 \
--output_dir output \
--warmup_ratio 0.05 \
--dataloader_num_workers 4 \
--deepspeed zero2
2 changes: 1 addition & 1 deletion swift/hub/hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def load_dataset(cls,
cls.try_login(token)
if revision is None or revision == 'main':
revision = 'master'
# noinspection PyTypeChecker

return MsDataset.load(
dataset_id,
subset_name=subset_name,
Expand Down
29 changes: 12 additions & 17 deletions swift/llm/argument/export_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from dataclasses import dataclass
from typing import Literal, Optional

import torch

from swift.utils import get_logger
from .base_args import BaseArguments, to_abspath
from .merge_args import MergeArguments
Expand Down Expand Up @@ -51,14 +53,6 @@ class ExportArguments(MergeArguments, BaseArguments):
# compat
to_peft_format: bool = False

def _init_quant(self):

if self.quant_bits:
if self.quant_method is None:
raise ValueError('Please specify the quantization method using `--quant_method awq/gptq`.')
if len(self.dataset) == 0 and self.quant_method in {'gptq', 'awq'}:
raise ValueError(f'self.dataset: {self.dataset}, Please input the quant dataset.')

def _init_output_dir(self):
suffix = None
if self.output_dir is None:
Expand All @@ -68,7 +62,7 @@ def _init_output_dir(self):
suffix = 'peft'
elif self.merge_lora:
suffix = 'merged'
elif self.quant_bits:
elif self.quant_method:
suffix = f'{self.quant_method}-int{self.quant_bits}'
elif self.to_ollama:
suffix = 'ollama'
Expand All @@ -82,13 +76,14 @@ def _init_output_dir(self):
assert not os.path.exists(self.output_dir), f'args.output_dir: {self.output_dir} already exists.'

def __post_init__(self):
if self.quant_bits and self.quant_method is None:
raise ValueError('Please specify the quantization method using `--quant_method awq/gptq/bnb`.')
if self.quant_method and self.quant_bits is None:
raise ValueError('Please specify `--quant_bits`.')
if self.quant_method in {'gptq', 'awq'} and self.torch_dtype is None:
self.torch_dtype = torch.float16

BaseArguments.__post_init__(self)
self._init_output_dir()
if self.quant_bits:
self._init_quant()

def _init_torch_dtype(self) -> None:
if self.quant_bits and self.torch_dtype is None:
self.torch_dtype = 'float16'
logger.info(f'Setting args.torch_dtype: {self.torch_dtype}')
super()._init_torch_dtype()
if self.quant_method in {'gptq', 'awq'} and len(self.dataset) == 0:
raise ValueError(f'self.dataset: {self.dataset}, Please input the quant dataset.')
6 changes: 6 additions & 0 deletions swift/llm/argument/rlhf_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class RLHFArguments(TrainArguments):
undesirable_weight: float = 1.0

def __post_init__(self):
self._init_rm()
self._init_simpo()
self._set_default()
super().__post_init__()
Expand All @@ -65,6 +66,11 @@ def _init_simpo(self):
if self.beta is None:
self.beta = 2.

def _init_rm(self):
if self.rlhf_type == 'rm':
self.task_type = 'seq_cls'
self.num_labels = 1

def _set_default(self):
if self.beta is None:
self.beta = 0.1
Expand Down
18 changes: 4 additions & 14 deletions swift/llm/dataset/dataset/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,20 +537,10 @@ def repair_conversations(s: Union[str, Any]) -> Any:
register_dataset(
DatasetMeta(
ms_dataset_id='hjh0119/shareAI-Llama3-DPO-zh-en-emoji',
subsets=[
SubsetDataset(
'zh',
preprocess_func=ResponsePreprocessor(columns_mapping={
'answer_zh': 'response',
'answer_en': 'rejected_response'
})),
SubsetDataset(
'en',
preprocess_func=ResponsePreprocessor(columns_mapping={
'answer_en': 'response',
'answer_zh': 'rejected_response'
}))
],
preprocess_func=ResponsePreprocessor(columns_mapping={
'answer_zh': 'response',
'answer_en': 'rejected_response'
}),
tags=['rlhf', 'dpo']))

register_dataset(
Expand Down
2 changes: 1 addition & 1 deletion swift/llm/export/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def run(self):
args.adapters[0] = swift_to_peft_format(args.adapters[0], args.output_dir)
elif args.merge_lora:
merge_lora(args)
elif args.quant_method is not None:
elif args.quant_method:
quantize_model(args)
elif args.to_ollama:
export_to_ollama(args)
Expand Down
1 change: 0 additions & 1 deletion swift/llm/infer/infer_engine/infer_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ def _update_metrics(result, metrics: Optional[List[Metric]] = None):
metric.update(response)
return result_origin

@torch.inference_mode()
def infer(self,
infer_requests: List[InferRequest],
request_config: Optional[RequestConfig] = None,
Expand Down
Loading
Loading