diff --git a/docs/source/Instruction/GRPO.md b/docs/source/Instruction/GRPO.md index f75a999dc7..42f85336f3 100644 --- a/docs/source/Instruction/GRPO.md +++ b/docs/source/Instruction/GRPO.md @@ -20,7 +20,7 @@ pip install git+https://github.com/huggingface/trl.git # trl>=0.15.0.dev0 from swift.plugin.orm import ORM, orms class DummyLengthRewardFunction(ORM) def __call__(completions, **kwargs): - return [1.0 if len(completion) > 1024 else 0.0 for comletion in comletions] + return [1.0 if len(completion) > 1024 else 0.0 for completion in completions] orms['dummy']= DummyLengthRewardFunction ``` diff --git "a/docs/source/Instruction/NPU\346\224\257\346\214\201.md" "b/docs/source/Instruction/NPU\346\224\257\346\214\201.md" index 8f0a54176c..8e2ad6d1de 100644 --- "a/docs/source/Instruction/NPU\346\224\257\346\214\201.md" +++ "b/docs/source/Instruction/NPU\346\224\257\346\214\201.md" @@ -49,7 +49,7 @@ Legend: SYS = Path traversing PCIe and NUMA nodes. Nodes are connected through SMP, such as QPI, UPI. PHB = Path traversing PCIe and the PCIe host bridge of a CPU. PIX = Path traversing a single PCIe switch - PXB = Path traversing multipul PCIe switches + PXB = Path traversing multiple PCIe switches HCCS = Connection traversing HCCS. NA = Unknown relationship. ``` diff --git "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" index f2622ce21e..d65847efd4 100644 --- "a/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" +++ "b/docs/source/Instruction/\345\221\275\344\273\244\350\241\214\345\217\202\346\225\260.md" @@ -148,7 +148,7 @@ - 🔥freeze_aligner: 该参数只对多模态模型生效,可用于全参和LoRA,含义参考`freeze_llm`。默认为True - 🔥target_modules: 指定lora模块, 默认为`all-linear`. 在LLM和多模态LLM中,其行为有所不同. 若是LLM则自动寻找除lm_head外的linear并附加tuner,若是多模态LLM,则默认只在LLM上附加tuner,该行为可以被`freeze_llm`、`freeze_vit`、`freeze_aligner`控制。该参数不限于LoRA,可用于其他tuners - 🔥target_regex: 指定lora模块的regex表达式,默认为`None`。如果该值传入,则target_modules参数失效。该参数不限于LoRA,可用于其他tuners -- init_weights: 初始化weights的方法,LoRA可以指定为`true`、`false`、`guassian`、`pissa`、`pissa_niter_[number of iters]`,Bone可以指定为`true`、`false`、`bat`。默认值`true` +- init_weights: 初始化weights的方法,LoRA可以指定为`true`、`false`、`gaussian`、`pissa`、`pissa_niter_[number of iters]`,Bone可以指定为`true`、`false`、`bat`。默认值`true` - 🔥modules_to_save: 在已附加tuner后,额外指定一部分原模型模块参与训练和存储。默认为`[]`. 该参数不限于LoRA,可用于其他tuners #### 全参 diff --git a/examples/train/rft/rft.py b/examples/train/rft/rft.py index 8e24d54d6c..dc29866023 100644 --- a/examples/train/rft/rft.py +++ b/examples/train/rft/rft.py @@ -60,7 +60,7 @@ def do_sample(model: str, model_type: str, dataset: List[str], iter: int): for device in range(device_count): sample_cmd = ( f'{conda_prefix} CUDA_VISIBLE_DEVICES={device} swift sample ' - f'--model {model} --model_type {model_type} ' # change to --resume_from_checkpoint to use the lastest optimzer state # noqa + f'--model {model} --model_type {model_type} ' # change to --resume_from_checkpoint to use the latest optimizer state # noqa f'--dataset {" ".join(dataset)} ' f'--data_range {device} {device_count} ' f'--max_length 2048 ' diff --git a/swift/llm/sampling/base.py b/swift/llm/sampling/base.py index 302bc79fd4..ca3c818d6f 100644 --- a/swift/llm/sampling/base.py +++ b/swift/llm/sampling/base.py @@ -13,7 +13,7 @@ class Sampler: def __init__(self, input_args: SamplingArguments): self.args = input_args self.template = None - self.processer = None + self.processor = None self.prm_model = None self.orm_model = None self._prepare_model_tokenizer() diff --git a/swift/plugin/tools.py b/swift/plugin/tools.py index a4efafc39d..2de0371637 100644 --- a/swift/plugin/tools.py +++ b/swift/plugin/tools.py @@ -70,7 +70,7 @@ def format_toolbench(tool_names, tool_descs): TOOLBENCH_PROMPT = """You can use many tools(functions) to do the following task. First I will give you the task description, and your task start. At each step, you need to give your thought to analyze the status now and what to do next, \ -with a function call to actually excute your step. Your output should follow this format: +with a function call to actually execute your step. Your output should follow this format: Thought: Action: Action Input: diff --git a/swift/trainers/arguments.py b/swift/trainers/arguments.py index 2a300b6293..96cc8283a3 100644 --- a/swift/trainers/arguments.py +++ b/swift/trainers/arguments.py @@ -77,10 +77,10 @@ class GRPOArgumentsMixin: vllm_enable_prefix_caching: bool = True # reward function args, see details in swift/plugin/orm.py # cosine reward, https://arxiv.org/abs/2502.03373 - cosine_min_len_value_wrong: float = 0.0 # r^w_0 in paper, Reward for wrong answers with zero comletion length. - cosine_max_len_value_wrong: float = -0.5 # r^w_L in paper, Reward for wrong answers with max comletion length. - cosine_min_len_value_correct: float = 1.0 # r^c_0 in paper, Reward for correct answers with zero comletion length. - cosine_max_len_value_correct: float = 0.5 # r^c_L in paper, Reward for correct answers with max comletion length. + cosine_min_len_value_wrong: float = 0.0 # r^w_0 in paper, Reward for wrong answers with zero completion length. + cosine_max_len_value_wrong: float = -0.5 # r^w_L in paper, Reward for wrong answers with max completion length. + cosine_min_len_value_correct: float = 1.0 # r^c_0 in paper, Reward for correct answers with zero completion length. + cosine_max_len_value_correct: float = 0.5 # r^c_L in paper, Reward for correct answers with max completion length. cosine_max_len: Optional[int] = None # Lmax in paper, default equal to max_completion_length # repetition penalty, https://arxiv.org/abs/2502.03373 repetition_n_grams: int = 3 diff --git a/swift/trainers/rlhf_trainer/grpo_trainer.py b/swift/trainers/rlhf_trainer/grpo_trainer.py index 60620e8969..bf837e21eb 100644 --- a/swift/trainers/rlhf_trainer/grpo_trainer.py +++ b/swift/trainers/rlhf_trainer/grpo_trainer.py @@ -107,7 +107,7 @@ def __init__(self, vllm_device = self.args.vllm_device if vllm_device == 'auto': if get_device_count() == 1: - vllm_device = get_device() # particular case when training with onyl 1 GPU: share it + vllm_device = get_device() # particular case when training with only 1 GPU: share it else: local_world_size = get_dist_setting()[3] vllm_device = get_device(local_world_size) # take the next GPU idx diff --git a/swift/tuners/scetuning/scetuning.py b/swift/tuners/scetuning/scetuning.py index a560b9d855..e5468c6a5e 100644 --- a/swift/tuners/scetuning/scetuning.py +++ b/swift/tuners/scetuning/scetuning.py @@ -29,7 +29,7 @@ class SCETuningConfig(SwiftConfig): hint_modules(`Union[List[str], str]`): The hint module to be replaced, can a regex string tuner_mode(`str`): Location of tuner operation. tuner_op(`str`): Tuner operation. - down_ratio(`flaot`): The dim down ratio of tuner hidden state. + down_ratio(`float`): The dim down ratio of tuner hidden state. """ dims: Optional[Union[List[int], int]] = field( diff --git a/swift/utils/torchacc_utils.py b/swift/utils/torchacc_utils.py index cd21f4b7bc..665085fb22 100644 --- a/swift/utils/torchacc_utils.py +++ b/swift/utils/torchacc_utils.py @@ -79,7 +79,7 @@ def pad_and_split_batch(padding_to, input_ids, attention_mask, labels, loss_scal loss_scale = F.pad(loss_scale, pad_tuple, 'constant', 0.) labels = F.pad(labels, pad_tuple, 'constant', -100) - # manully split the batch to different DP rank. + # manually split the batch to different DP rank. batch_size = input_ids.shape[0] // world_size if batch_size > 0: start = rank * batch_size @@ -862,7 +862,7 @@ def clip_grad_norm_(self, parameters, max_norm, norm_type=2): while isinstance(opt, AcceleratedOptimizer): opt = opt.optimizer gradients = xm._fetch_gradients(opt) - # Use xm.all_reduce to perform an in-place all-reduce. Recusrsive all-reduce each tensor + # Use xm.all_reduce to perform an in-place all-reduce. Recursive all-reduce each tensor # one by one in self.reduce is non-inplace. xm.all_reduce('sum', gradients, scale=1.0 / self.num_processes) # Set is_xla_gradients_synced to True to avoid all-reduce twice in the AcceleratedOptimizer step. diff --git a/tests/run.py b/tests/run.py index 419c194a2a..ed349136da 100644 --- a/tests/run.py +++ b/tests/run.py @@ -19,7 +19,7 @@ # NOTICE: Tensorflow 1.15 seems not so compatible with pytorch. # A segmentation fault may be raise by pytorch cpp library # if 'import tensorflow' in front of 'import torch'. -# Puting a 'import torch' here can bypass this incompatibility. +# Putting a 'import torch' here can bypass this incompatibility. import torch import yaml from model_tag import ModelTag, commit_model_ut_result @@ -255,7 +255,7 @@ def wait_for_workers(workers): break if is_all_completed: - logger.info('All sub porcess is completed!') + logger.info('All sub process is completed!') break time.sleep(0.001)