Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/Instruction/GRPO.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pip install git+https://github.com/huggingface/trl.git # trl>=0.15.0.dev0
from swift.plugin.orm import ORM, orms
class DummyLengthRewardFunction(ORM)
def __call__(completions, **kwargs):
return [1.0 if len(completion) > 1024 else 0.0 for comletion in comletions]
return [1.0 if len(completion) > 1024 else 0.0 for completion in completions]

orms['dummy']= DummyLengthRewardFunction
```
Expand Down
2 changes: 1 addition & 1 deletion docs/source/Instruction/NPU支持.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ Legend:
SYS = Path traversing PCIe and NUMA nodes. Nodes are connected through SMP, such as QPI, UPI.
PHB = Path traversing PCIe and the PCIe host bridge of a CPU.
PIX = Path traversing a single PCIe switch
PXB = Path traversing multipul PCIe switches
PXB = Path traversing multiple PCIe switches
HCCS = Connection traversing HCCS.
NA = Unknown relationship.
```
Expand Down
2 changes: 1 addition & 1 deletion docs/source/Instruction/命令行参数.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@
- 🔥freeze_aligner: 该参数只对多模态模型生效,可用于全参和LoRA,含义参考`freeze_llm`。默认为True
- 🔥target_modules: 指定lora模块, 默认为`all-linear`. 在LLM和多模态LLM中,其行为有所不同. 若是LLM则自动寻找除lm_head外的linear并附加tuner,若是多模态LLM,则默认只在LLM上附加tuner,该行为可以被`freeze_llm`、`freeze_vit`、`freeze_aligner`控制。该参数不限于LoRA,可用于其他tuners
- 🔥target_regex: 指定lora模块的regex表达式,默认为`None`。如果该值传入,则target_modules参数失效。该参数不限于LoRA,可用于其他tuners
- init_weights: 初始化weights的方法,LoRA可以指定为`true`、`false`、`guassian`、`pissa`、`pissa_niter_[number of iters]`,Bone可以指定为`true`、`false`、`bat`。默认值`true`
- init_weights: 初始化weights的方法,LoRA可以指定为`true`、`false`、`gaussian`、`pissa`、`pissa_niter_[number of iters]`,Bone可以指定为`true`、`false`、`bat`。默认值`true`
- 🔥modules_to_save: 在已附加tuner后,额外指定一部分原模型模块参与训练和存储。默认为`[]`. 该参数不限于LoRA,可用于其他tuners

#### 全参
Expand Down
2 changes: 1 addition & 1 deletion examples/train/rft/rft.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def do_sample(model: str, model_type: str, dataset: List[str], iter: int):
for device in range(device_count):
sample_cmd = (
f'{conda_prefix} CUDA_VISIBLE_DEVICES={device} swift sample '
f'--model {model} --model_type {model_type} ' # change to --resume_from_checkpoint to use the lastest optimzer state # noqa
f'--model {model} --model_type {model_type} ' # change to --resume_from_checkpoint to use the latest optimizer state # noqa
f'--dataset {" ".join(dataset)} '
f'--data_range {device} {device_count} '
f'--max_length 2048 '
Expand Down
2 changes: 1 addition & 1 deletion swift/llm/sampling/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Sampler:
def __init__(self, input_args: SamplingArguments):
self.args = input_args
self.template = None
self.processer = None
self.processor = None
self.prm_model = None
self.orm_model = None
self._prepare_model_tokenizer()
Expand Down
2 changes: 1 addition & 1 deletion swift/plugin/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def format_toolbench(tool_names, tool_descs):
TOOLBENCH_PROMPT = """You can use many tools(functions) to do the following task.
First I will give you the task description, and your task start.
At each step, you need to give your thought to analyze the status now and what to do next, \
with a function call to actually excute your step. Your output should follow this format:
with a function call to actually execute your step. Your output should follow this format:
Thought:
Action:
Action Input:
Expand Down
8 changes: 4 additions & 4 deletions swift/trainers/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ class GRPOArgumentsMixin:
vllm_enable_prefix_caching: bool = True
# reward function args, see details in swift/plugin/orm.py
# cosine reward, https://arxiv.org/abs/2502.03373
cosine_min_len_value_wrong: float = 0.0 # r^w_0 in paper, Reward for wrong answers with zero comletion length.
cosine_max_len_value_wrong: float = -0.5 # r^w_L in paper, Reward for wrong answers with max comletion length.
cosine_min_len_value_correct: float = 1.0 # r^c_0 in paper, Reward for correct answers with zero comletion length.
cosine_max_len_value_correct: float = 0.5 # r^c_L in paper, Reward for correct answers with max comletion length.
cosine_min_len_value_wrong: float = 0.0 # r^w_0 in paper, Reward for wrong answers with zero completion length.
cosine_max_len_value_wrong: float = -0.5 # r^w_L in paper, Reward for wrong answers with max completion length.
cosine_min_len_value_correct: float = 1.0 # r^c_0 in paper, Reward for correct answers with zero completion length.
cosine_max_len_value_correct: float = 0.5 # r^c_L in paper, Reward for correct answers with max completion length.
cosine_max_len: Optional[int] = None # Lmax in paper, default equal to max_completion_length
# repetition penalty, https://arxiv.org/abs/2502.03373
repetition_n_grams: int = 3
Expand Down
2 changes: 1 addition & 1 deletion swift/trainers/rlhf_trainer/grpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def __init__(self,
vllm_device = self.args.vllm_device
if vllm_device == 'auto':
if get_device_count() == 1:
vllm_device = get_device() # particular case when training with onyl 1 GPU: share it
vllm_device = get_device() # particular case when training with only 1 GPU: share it
else:
local_world_size = get_dist_setting()[3]
vllm_device = get_device(local_world_size) # take the next GPU idx
Expand Down
2 changes: 1 addition & 1 deletion swift/tuners/scetuning/scetuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class SCETuningConfig(SwiftConfig):
hint_modules(`Union[List[str], str]`): The hint module to be replaced, can a regex string
tuner_mode(`str`): Location of tuner operation.
tuner_op(`str`): Tuner operation.
down_ratio(`flaot`): The dim down ratio of tuner hidden state.
down_ratio(`float`): The dim down ratio of tuner hidden state.
"""

dims: Optional[Union[List[int], int]] = field(
Expand Down
4 changes: 2 additions & 2 deletions swift/utils/torchacc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def pad_and_split_batch(padding_to, input_ids, attention_mask, labels, loss_scal
loss_scale = F.pad(loss_scale, pad_tuple, 'constant', 0.)
labels = F.pad(labels, pad_tuple, 'constant', -100)

# manully split the batch to different DP rank.
# manually split the batch to different DP rank.
batch_size = input_ids.shape[0] // world_size
if batch_size > 0:
start = rank * batch_size
Expand Down Expand Up @@ -862,7 +862,7 @@ def clip_grad_norm_(self, parameters, max_norm, norm_type=2):
while isinstance(opt, AcceleratedOptimizer):
opt = opt.optimizer
gradients = xm._fetch_gradients(opt)
# Use xm.all_reduce to perform an in-place all-reduce. Recusrsive all-reduce each tensor
# Use xm.all_reduce to perform an in-place all-reduce. Recursive all-reduce each tensor
# one by one in self.reduce is non-inplace.
xm.all_reduce('sum', gradients, scale=1.0 / self.num_processes)
# Set is_xla_gradients_synced to True to avoid all-reduce twice in the AcceleratedOptimizer step.
Expand Down
4 changes: 2 additions & 2 deletions tests/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# NOTICE: Tensorflow 1.15 seems not so compatible with pytorch.
# A segmentation fault may be raise by pytorch cpp library
# if 'import tensorflow' in front of 'import torch'.
# Puting a 'import torch' here can bypass this incompatibility.
# Putting a 'import torch' here can bypass this incompatibility.
import torch
import yaml
from model_tag import ModelTag, commit_model_ut_result
Expand Down Expand Up @@ -255,7 +255,7 @@ def wait_for_workers(workers):
break

if is_all_completed:
logger.info('All sub porcess is completed!')
logger.info('All sub process is completed!')
break
time.sleep(0.001)

Expand Down