modelscope · Jintao-Huang · Feb 14, 2025 · Feb 14, 2025
diff --git a/docs/source/Instruction/GRPO.md b/docs/source/Instruction/GRPO.md
@@ -20,7 +20,7 @@ pip install git+https://github.com/huggingface/trl.git # trl>=0.15.0.dev0
 from swift.plugin.orm import ORM, orms
 class DummyLengthRewardFunction(ORM)
     def __call__(completions, **kwargs):
-        return [1.0 if len(completion) > 1024 else 0.0 for comletion in comletions]
+        return [1.0 if len(completion) > 1024 else 0.0 for completion in completions]
 
 orms['dummy']= DummyLengthRewardFunction
 ```

diff --git a/docs/source/Instruction/NPU支持.md b/docs/source/Instruction/NPU支持.md
@@ -49,7 +49,7 @@ Legend:
   SYS  = Path traversing PCIe and NUMA nodes. Nodes are connected through SMP, such as QPI, UPI.
   PHB  = Path traversing PCIe and the PCIe host bridge of a CPU.
   PIX  = Path traversing a single PCIe switch
-  PXB  = Path traversing multipul PCIe switches
+  PXB  = Path traversing multiple PCIe switches
   HCCS = Connection traversing HCCS.
   NA   = Unknown relationship.
 ```

diff --git a/docs/source/Instruction/命令行参数.md b/docs/source/Instruction/命令行参数.md
@@ -148,7 +148,7 @@
 - 🔥freeze_aligner: 该参数只对多模态模型生效，可用于全参和LoRA，含义参考`freeze_llm`。默认为True
 - 🔥target_modules: 指定lora模块, 默认为`all-linear`. 在LLM和多模态LLM中，其行为有所不同. 若是LLM则自动寻找除lm_head外的linear并附加tuner，若是多模态LLM，则默认只在LLM上附加tuner，该行为可以被`freeze_llm`、`freeze_vit`、`freeze_aligner`控制。该参数不限于LoRA，可用于其他tuners
 - 🔥target_regex: 指定lora模块的regex表达式，默认为`None`。如果该值传入，则target_modules参数失效。该参数不限于LoRA，可用于其他tuners
-- init_weights: 初始化weights的方法，LoRA可以指定为`true`、`false`、`guassian`、`pissa`、`pissa_niter_[number of iters]`，Bone可以指定为`true`、`false`、`bat`。默认值`true`
+- init_weights: 初始化weights的方法，LoRA可以指定为`true`、`false`、`gaussian`、`pissa`、`pissa_niter_[number of iters]`，Bone可以指定为`true`、`false`、`bat`。默认值`true`
 - 🔥modules_to_save: 在已附加tuner后，额外指定一部分原模型模块参与训练和存储。默认为`[]`. 该参数不限于LoRA，可用于其他tuners
 
 #### 全参

diff --git a/examples/train/rft/rft.py b/examples/train/rft/rft.py
@@ -60,7 +60,7 @@ def do_sample(model: str, model_type: str, dataset: List[str], iter: int):
     for device in range(device_count):
         sample_cmd = (
             f'{conda_prefix} CUDA_VISIBLE_DEVICES={device} swift sample '
-            f'--model {model} --model_type {model_type} '  # change to --resume_from_checkpoint to use the lastest optimzer state # noqa
+            f'--model {model} --model_type {model_type} '  # change to --resume_from_checkpoint to use the latest optimizer state # noqa
             f'--dataset {" ".join(dataset)} '
             f'--data_range {device} {device_count} '
             f'--max_length 2048 '

diff --git a/swift/llm/sampling/base.py b/swift/llm/sampling/base.py
@@ -13,7 +13,7 @@ class Sampler:
     def __init__(self, input_args: SamplingArguments):
         self.args = input_args
         self.template = None
-        self.processer = None
+        self.processor = None
         self.prm_model = None
         self.orm_model = None
         self._prepare_model_tokenizer()

diff --git a/swift/plugin/tools.py b/swift/plugin/tools.py
@@ -70,7 +70,7 @@ def format_toolbench(tool_names, tool_descs):
     TOOLBENCH_PROMPT = """You can use many tools(functions) to do the following task.
 First I will give you the task description, and your task start.
 At each step, you need to give your thought to analyze the status now and what to do next, \
-with a function call to actually excute your step. Your output should follow this format:
+with a function call to actually execute your step. Your output should follow this format:
 Thought:
 Action:
 Action Input:

diff --git a/swift/trainers/arguments.py b/swift/trainers/arguments.py
@@ -77,10 +77,10 @@ class GRPOArgumentsMixin:
     vllm_enable_prefix_caching: bool = True
     # reward function args, see details in swift/plugin/orm.py
     # cosine reward, https://arxiv.org/abs/2502.03373
-    cosine_min_len_value_wrong: float = 0.0  # r^w_0 in paper, Reward for wrong answers with zero comletion length.
-    cosine_max_len_value_wrong: float = -0.5  # r^w_L in paper, Reward for wrong answers with max comletion length.
-    cosine_min_len_value_correct: float = 1.0  # r^c_0 in paper, Reward for correct answers with zero comletion length.
-    cosine_max_len_value_correct: float = 0.5  # r^c_L in paper, Reward for correct answers with max comletion length.
+    cosine_min_len_value_wrong: float = 0.0  # r^w_0 in paper, Reward for wrong answers with zero completion length.
+    cosine_max_len_value_wrong: float = -0.5  # r^w_L in paper, Reward for wrong answers with max completion length.
+    cosine_min_len_value_correct: float = 1.0  # r^c_0 in paper, Reward for correct answers with zero completion length.
+    cosine_max_len_value_correct: float = 0.5  # r^c_L in paper, Reward for correct answers with max completion length.
     cosine_max_len: Optional[int] = None  # Lmax in paper, default equal to max_completion_length
     # repetition penalty, https://arxiv.org/abs/2502.03373
     repetition_n_grams: int = 3

diff --git a/swift/trainers/rlhf_trainer/grpo_trainer.py b/swift/trainers/rlhf_trainer/grpo_trainer.py
@@ -107,7 +107,7 @@ def __init__(self,
                 vllm_device = self.args.vllm_device
                 if vllm_device == 'auto':
                     if get_device_count() == 1:
-                        vllm_device = get_device()  # particular case when training with onyl 1 GPU: share it
+                        vllm_device = get_device()  # particular case when training with only 1 GPU: share it
                     else:
                         local_world_size = get_dist_setting()[3]
                         vllm_device = get_device(local_world_size)  # take the next GPU idx

diff --git a/swift/tuners/scetuning/scetuning.py b/swift/tuners/scetuning/scetuning.py
@@ -29,7 +29,7 @@ class SCETuningConfig(SwiftConfig):
         hint_modules(`Union[List[str], str]`): The hint module to be replaced, can a regex string
         tuner_mode(`str`): Location of tuner operation.
         tuner_op(`str`): Tuner operation.
-        down_ratio(`flaot`): The dim down ratio of tuner hidden state.
+        down_ratio(`float`): The dim down ratio of tuner hidden state.
     """
 
     dims: Optional[Union[List[int], int]] = field(

diff --git a/swift/utils/torchacc_utils.py b/swift/utils/torchacc_utils.py
@@ -79,7 +79,7 @@ def pad_and_split_batch(padding_to, input_ids, attention_mask, labels, loss_scal
             loss_scale = F.pad(loss_scale, pad_tuple, 'constant', 0.)
         labels = F.pad(labels, pad_tuple, 'constant', -100)
 
-    # manully split the batch to different DP rank.
+    # manually split the batch to different DP rank.
     batch_size = input_ids.shape[0] // world_size
     if batch_size > 0:
         start = rank * batch_size
@@ -862,7 +862,7 @@ def clip_grad_norm_(self, parameters, max_norm, norm_type=2):
                     while isinstance(opt, AcceleratedOptimizer):
                         opt = opt.optimizer
                     gradients = xm._fetch_gradients(opt)
-                    # Use xm.all_reduce to perform an in-place all-reduce. Recusrsive all-reduce each tensor
+                    # Use xm.all_reduce to perform an in-place all-reduce. Recursive all-reduce each tensor
                     # one by one in self.reduce is non-inplace.
                     xm.all_reduce('sum', gradients, scale=1.0 / self.num_processes)
                     # Set is_xla_gradients_synced to True to avoid all-reduce twice in the AcceleratedOptimizer step.

diff --git a/tests/run.py b/tests/run.py
@@ -19,7 +19,7 @@
 # NOTICE: Tensorflow 1.15 seems not so compatible with pytorch.
 #         A segmentation fault may be raise by pytorch cpp library
 #         if 'import tensorflow' in front of 'import torch'.
-#         Puting a 'import torch' here can bypass this incompatibility.
+#         Putting a 'import torch' here can bypass this incompatibility.
 import torch
 import yaml
 from model_tag import ModelTag, commit_model_ut_result
@@ -255,7 +255,7 @@ def wait_for_workers(workers):
                 break
 
         if is_all_completed:
-            logger.info('All sub porcess is completed!')
+            logger.info('All sub process is completed!')
             break
         time.sleep(0.001)