modelscope · Jintao-Huang · Oct 9, 2025 · Oct 9, 2025 · Oct 9, 2025
diff --git a/docs/source/Instruction/命令行参数.md b/docs/source/Instruction/命令行参数.md
@@ -37,7 +37,7 @@
 - new_special_tokens: 需要新增的特殊tokens。默认为`[]`。例子参考[这里](https://github.com/modelscope/ms-swift/tree/main/examples/train/new_special_tokens)。
   - 注意：你也可以传入以`.txt`结尾的文件路径，每行为一个special token。
 - num_labels: 分类模型（即`--task_type seq_cls`）需要指定该参数。代表标签数量，默认为None。
-- problem_type: 分类模型（即`--task_type seq_cls`）需要指定该参数。可选为'regression', 'single_label_classification', 'multi_label_classification'。默认为'single_label_classification'。
+- problem_type: 分类模型（即`--task_type seq_cls`）需要指定该参数。可选为'regression', 'single_label_classification', 'multi_label_classification'。默认为None，若模型为 reward_model 或 num_labels 为1，该参数为'regression'，其他情况，该参数为'single_label_classification'。
 - rope_scaling: rope类型，支持`linear`和`dynamic`和`yarn`，或者直接传入一个json字符串:`"{\"factor\":2.0,\"type\":\"yarn\"}"`，请配合`max_model_len`共同使用。默认为None。
 - max_model_len: 如果使用`rope_scaling`，可以设置`max_model_len`，该参数可以用来计算rope的`factor`倍数。最后的`max_position_embeddings`会设置为原值的`factor`倍。如果`rope_scaling`是json字符串，则本值不生效。
 - device_map: 模型使用的device_map配置，例如：'auto'、'cpu'、json字符串、json文件路径。默认为None，根据设备和分布式训练情况自动设置。

diff --git a/docs/source/Megatron-SWIFT/命令行参数.md b/docs/source/Megatron-SWIFT/命令行参数.md
@@ -268,7 +268,7 @@ Megatron训练参数继承自Megatron参数和基本参数（与ms-swift共用da
 - enable_channel_loss: 打开channel loss，默认为`False`。你需要在数据集中准备"channel"字段，ms-swift会根据该字段分组统计loss。数据集格式参考[channel loss](../Customization/自定义数据集.md#channel-loss)。
 - 🔥task_type: 默认为'causal_lm'。可选为'causal_lm'、'seq_cls'。
 - num_labels: 分类模型（即`--task_type seq_cls`）需要指定该参数。代表标签数量，默认为None。
-- problem_type: 分类模型（即`--task_type seq_cls`）需要指定该参数。可选为'regression', 'single_label_classification', 'multi_label_classification'。默认为'single_label_classification'。
+- problem_type: 分类模型（即`--task_type seq_cls`）需要指定该参数。可选为'regression', 'single_label_classification', 'multi_label_classification'。默认为None，若模型为 reward_model 或 num_labels 为1，该参数为'regression'，其他情况，该参数为'single_label_classification'。
 
 
 ## RLHF参数

diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md
@@ -38,7 +38,7 @@ Hints:
 - new_special_tokens: The special tokens to be added. Default is `[]`. See the example [here](https://github.com/modelscope/ms-swift/tree/main/examples/train/new_special_tokens).
   - Note: You can also pass a file path ending with `.txt`, where each line represents a special token.
 - num_labels: This parameter is required for classification models (i.e., `--task_type seq_cls`). It represents the number of labels, with a default value of None.
-- problem_type: This parameter is required for classification models (i.e., `--task_type seq_cls`). The options are 'regression', 'single_label_classification', and 'multi_label_classification'. The default value is 'single_label_classification'.
+- problem_type: This parameter is required for classification models (i.e., `--task_type seq_cls`). The options are 'regression', 'single_label_classification', and 'multi_label_classification'. Defaults to None. If the model is a reward_model or num_labels equals 1, this parameter is 'regression'; otherwise it is 'single_label_classification'.
 - rope_scaling: RoPE type, supports `linear`, `dynamic`, and `yarn`, or you can directly pass in a JSON string: `"{\"factor\":2.0,\"type\":\"yarn\"}"`. Please use in conjunction with `max_model_len`. Default is None.
 - max_model_len: If using `rope_scaling`, you can set `max_model_len`. This parameter can be used to calculate the RoPE `factor` multiplier. The final `max_position_embeddings` will be set to the original value multiplied by the `factor`. If `rope_scaling` is a JSON string, this value will not take effect.
 - device_map: Device map configuration used by the model, such as 'auto', 'cpu', JSON string, or the path of a JSON file. The default is None, automatically set based on the device and distributed training conditions.

diff --git a/docs/source_en/Megatron-SWIFT/Command-line-parameters.md b/docs/source_en/Megatron-SWIFT/Command-line-parameters.md
@@ -283,7 +283,7 @@ Megatron training parameters are inherited from Megatron parameters and basic pa
 - enable_channel_loss: Enable channel loss, default is `false`. You need to prepare a "channel" field in your dataset; ms-swift will compute and aggregate the loss grouped by this field. For dataset format, please refer to [channel loss](../Customization/Custom-dataset.md#channel-loss).
 - 🔥task_type: Defaults to "causal_lm". Options: "causal_lm", "seq_cls".
 - num_labels: Required for classification models (i.e., `--task_type seq_cls`). Represents the number of labels; default is None.
-- problem_type: Required for classification models (i.e., `--task_type seq_cls`). Options: "regression", "single_label_classification", "multi_label_classification". Default is "single_label_classification".
+- problem_type: Required for classification models (i.e., `--task_type seq_cls`). Options: "regression", "single_label_classification", "multi_label_classification". Defaults to None. If the model is a reward_model or num_labels equals 1, this parameter is 'regression'; otherwise it is 'single_label_classification'.
 
 
 ## RLHF Parameters

diff --git a/swift/llm/argument/base_args/model_args.py b/swift/llm/argument/base_args/model_args.py
@@ -46,8 +46,7 @@ class ModelArguments:
     new_special_tokens: List[str] = field(default_factory=list)
 
     num_labels: Optional[int] = None
-    problem_type: Literal['regression', 'single_label_classification',
-                          'multi_label_classification'] = 'single_label_classification'
+    problem_type: Literal['regression', 'single_label_classification', 'multi_label_classification'] = None
     rope_scaling: Optional[str] = None
     device_map: Optional[Union[dict, str]] = None
     max_memory: Optional[Union[dict, str]] = None

diff --git a/swift/llm/model/register.py b/swift/llm/model/register.py
@@ -232,6 +232,7 @@ def get_model_tokenizer_from_local(model_dir: str,
     rope_scaling = kwargs.get('rope_scaling')
     max_model_len = kwargs.get('max_model_len')
     return_dummy_model = kwargs.get('return_dummy_model')
+    model_meta = kwargs.get('model_meta')
     if rope_scaling:
         HfConfigFactory.set_config_attr(model_config, 'rope_scaling', rope_scaling)
     if max_model_len:
@@ -245,6 +246,15 @@ def get_model_tokenizer_from_local(model_dir: str,
         model_info.num_labels = num_labels
         model_config.num_labels = num_labels
 
+    if model_info.task_type == 'seq_cls':
+        problem_type = kwargs.get('problem_type')
+        if problem_type is None:
+            if model_info.num_labels == 1 or model_meta.is_reward:
+                problem_type = 'regression'
+            else:
+                problem_type = 'single_label_classification'
+        model_config.problem_type = problem_type
+
     if model_info.quant_method == 'fp8':
         torch_dtype = 'auto'
     model = None
@@ -260,7 +270,6 @@ def get_model_tokenizer_from_local(model_dir: str,
                     model = None
 
         automodel_class = automodel_class or AutoModelForCausalLM
-        model_meta = kwargs['model_meta']
         context_kwargs = {
             'model_info': model_info,
             'model_meta': model_meta,
@@ -715,12 +724,6 @@ def get_model_tokenizer(
                     # fix transformers==4.52.4 qwen2.5-vl
                     HfConfigFactory.set_config_attr(llm_model.config, 'vocab_size', vocab_size)
 
-    if task_type == 'seq_cls':
-        problem_type = kwargs.get('problem_type')
-        if problem_type is None and model_info.num_labels == 1:
-            problem_type = 'regression'
-        if problem_type is not None:
-            model_info.config.problem_type = problem_type
     tokenizer.model_info = model_info
     tokenizer.model_meta = model_meta
 

diff --git a/swift/megatron/argument/megatron_args.py b/swift/megatron/argument/megatron_args.py
@@ -102,8 +102,7 @@ class ExtraMegatronArguments(RLHFMegatronArgumentsMixin, MegatronTunerMixin):
     enable_channel_loss: bool = False
     task_type: Literal['causal_lm', 'seq_cls'] = None
     num_labels: Optional[int] = None
-    problem_type: Literal['regression', 'single_label_classification',
-                          'multi_label_classification'] = 'single_label_classification'
+    problem_type: Literal['regression', 'single_label_classification', 'multi_label_classification'] = None
 
     original_max_position_embeddings: Optional[int] = None
     partial_rotary_factor: Optional[float] = None