From 64a461eb55a7a867410d35bd4ca69a07caca7d44 Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Mon, 10 Nov 2025 12:37:28 +0800 Subject: [PATCH 1/2] fix docs --- docs/source/Instruction/Command-line-parameters.md | 2 +- docs/source_en/Instruction/Command-line-parameters.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/Instruction/Command-line-parameters.md b/docs/source/Instruction/Command-line-parameters.md index 4e731404b8..05601f728a 100644 --- a/docs/source/Instruction/Command-line-parameters.md +++ b/docs/source/Instruction/Command-line-parameters.md @@ -843,4 +843,4 @@ qwen2_5_omni除了包含qwen2_5_vl和qwen2_audio的模型特定参数外,还 - VLLM_USE_V1: 用于切换vLLM使用V0/V1版本。 - SWIFT_TIMEOUT: (ms-swift>=3.10) 若多模态数据集中存在图像URL,该参数用于控制获取图片的timeout,默认为20s。 - ROOT_IMAGE_DIR: (ms-swift>=3.8) 图像(多模态)资源的根目录。通过设置该参数,可以在数据集中使用相对于 `ROOT_IMAGE_DIR` 的相对路径。默认情况下,是相对于运行目录的相对路径。 -- SWIFT_SINGLE_DEVICE_MODE: (ms-swift>=3.10) 单设备模式,在此模式下,每个进程只能看到一个设备,目前用于兼容PPU设备 +- SWIFT_SINGLE_DEVICE_MODE: (ms-swift>=3.10) 单设备模式,可选值为"0"(默认值)/"1",在此模式下,每个进程只能看到一个设备,目前用于兼容PPU设备 diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md index fd3ab0686b..9a3aa6f22f 100644 --- a/docs/source_en/Instruction/Command-line-parameters.md +++ b/docs/source_en/Instruction/Command-line-parameters.md @@ -868,4 +868,4 @@ The meanings of the following parameters can be found in the example code [here] - VLLM_USE_V1: Used to switch between V0 and V1 versions of vLLM. - SWIFT_TIMEOUT: (ms-swift >= 3.10) If the multimodal dataset contains image URLs, this parameter controls the timeout for fetching images, defaulting to 20 seconds. - ROOT_IMAGE_DIR: (ms-swift>=3.8) The root directory for image (multimodal) resources. By setting this parameter, relative paths in the dataset can be interpreted relative to `ROOT_IMAGE_DIR`. By default, paths are relative to the current working directory. -- SWIFT_SINGLE_DEVICE_MODE: (ms-swift>=3.10) Single device mode. In this mode, each process can only see one device. Currently used for compatibility with PPU devices. +- SWIFT_SINGLE_DEVICE_MODE: (ms-swift>=3.10) Single device mode, valid values are "0"(default)/"1". In this mode, each process can only see one device. Currently used for compatibility with PPU devices. From cffed2f71ab63733bbb2b641510a42c56622dd30 Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Fri, 14 Nov 2025 00:18:24 +0800 Subject: [PATCH 2/2] fix loss --- swift/plugin/loss.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/swift/plugin/loss.py b/swift/plugin/loss.py index 0dc72882eb..61b3eb4cda 100755 --- a/swift/plugin/loss.py +++ b/swift/plugin/loss.py @@ -446,7 +446,7 @@ def infonce_loss(outputs, labels, loss_scale=None, num_items_in_batch=None, **kw similarity_matrix = torch.cat(logits_list, dim=1) # temperature scaling and CE similarity_matrix = similarity_matrix / temperature - loss = nn.CrossEntropyLoss()(similarity_matrix, labels) / world_size # avoid duplicate + loss = nn.CrossEntropyLoss()(similarity_matrix, labels) else: all_tensors = [] for tensor in split_tensors: @@ -499,7 +499,6 @@ def infonce_loss(outputs, labels, loss_scale=None, num_items_in_batch=None, **kw # next positive is neg+1 length += tensor.size(0) - 1 loss /= len(split_tensors) - loss /= world_size # avoid duplicate return loss