From ce1147f27572cd716db97fc582868bb0b73e975c Mon Sep 17 00:00:00 2001 From: lvyufeng Date: Thu, 10 Jul 2025 15:23:10 +0800 Subject: [PATCH] fix all bert ut and skip useless ut --- mindnlp/core/_tensor.py | 4 ++ mindnlp/core/dispatcher.py | 48 ------------------- .../core/distributed/c10d/process_group.py | 3 -- mindnlp/core/executor.py | 41 ---------------- mindnlp/core/nn/functional.py | 41 +++++----------- tests/run_test.py | 10 +++- 6 files changed, 25 insertions(+), 122 deletions(-) delete mode 100644 mindnlp/core/dispatcher.py delete mode 100644 mindnlp/core/executor.py diff --git a/mindnlp/core/_tensor.py b/mindnlp/core/_tensor.py index f9c116e63..1d7f3a7b0 100644 --- a/mindnlp/core/_tensor.py +++ b/mindnlp/core/_tensor.py @@ -335,6 +335,10 @@ def data(self, new_value): Tensor.data = data StubTensor.data = data + Tensor.narrow = ops.narrow + StubTensor.narrow = ops.narrow + + def _rebuild_from_type_v2(func, new_type, args, state): ret = func(*args) return ret \ No newline at end of file diff --git a/mindnlp/core/dispatcher.py b/mindnlp/core/dispatcher.py deleted file mode 100644 index 165dd5617..000000000 --- a/mindnlp/core/dispatcher.py +++ /dev/null @@ -1,48 +0,0 @@ -from mindnlp import core -from mindnlp.core.types import device as device_ -from mindnlp.core._prims import ascend, cpu - -device_map = { - 'cpu': 'CPU', - 'npu': 'Ascend', - 'cuda': 'GPU' -} - -class SingletonMeta(type): - _instances = {} - - def __call__(cls, *args, **kwargs): - if cls not in cls._instances: - instance = super().__call__(*args, **kwargs) - cls._instances[cls] = instance - return cls._instances[cls] - -class Dispatcher(metaclass=SingletonMeta): - def __init__(self): - self._registry = { - 'cpu': {}, - 'npu': {}, - 'gpu': {} - } - - def register(self, func_name, device, func): - self._registry[device][func_name] = func - - def dispatch(self, func_name, *args, **kwargs): - device = kwargs.pop('device', None) - if isinstance(device, str): - device = device_(device) - - if device is None: - device = args[0].device - - func = self._registry[device.type].get(func_name, None) - if func is None: - raise RuntimeError(f"No implementation for function: {func_name} on {device.type}.") - return func(*args), device - -dispatcher = Dispatcher() -for func_name in ascend.__all__: - dispatcher.register(func_name.replace('_npu', ''), 'npu', getattr(ascend, func_name)) -for func_name in cpu.__all__: - dispatcher.register(func_name.replace('_cpu', ''), 'cpu', getattr(cpu, func_name)) diff --git a/mindnlp/core/distributed/c10d/process_group.py b/mindnlp/core/distributed/c10d/process_group.py index 047a37b4d..b21c6c336 100644 --- a/mindnlp/core/distributed/c10d/process_group.py +++ b/mindnlp/core/distributed/c10d/process_group.py @@ -3,9 +3,6 @@ from typing import List, Optional, Dict, Any from enum import Enum -from mindnlp.core.executor import execute - - class BackendType(Enum): UNDEFINED = 0 GLOO = 1 diff --git a/mindnlp/core/executor.py b/mindnlp/core/executor.py deleted file mode 100644 index c353c69fd..000000000 --- a/mindnlp/core/executor.py +++ /dev/null @@ -1,41 +0,0 @@ -import mindspore -from mindspore._c_expression import TensorNode, SequenceNode, NoneTypeNode, AnyTypeNode, Tensor as MSTensor -import mindspore.common._stub_tensor -from mindspore.common.api import _pynative_executor -from mindspore.common._stub_tensor import _convert_python_data - -from mindnlp import core -from ._tensor import Tensor -from .dispatcher import dispatcher - -def _convert_stub(stub, device): - "convert stub to StubNode or Value" - if isinstance(stub, (MSTensor, TensorNode)): - return Tensor(stub, device=device) - if isinstance(stub, tuple): - return tuple(_convert_stub(e, device) for e in stub) - if isinstance(stub, SequenceNode): - elements = stub.get_elements() - return tuple(_convert_stub(e, device) for e in elements) - if isinstance(stub, NoneTypeNode): - val = stub.get_real_value() - return _convert_python_data(val) - if isinstance(stub, AnyTypeNode): - val = stub.get_real_node() - return _convert_stub(val, device) - return _convert_python_data(stub) - - -def execute(func_name, *args, **kwargs): - requires_grad = kwargs.pop('requires_grad', False) - user_created = kwargs.pop('user_created', False) - out, device = dispatcher.dispatch(func_name, *args, **kwargs) - out_tensor = _convert_stub(out, device=device) - if requires_grad: - out_tensor._requires_grad = True - if user_created: - out_tensor._user_created = True - out_tensor.attach_grad() - - return out_tensor - diff --git a/mindnlp/core/nn/functional.py b/mindnlp/core/nn/functional.py index 241df33ae..e09ba5c74 100644 --- a/mindnlp/core/nn/functional.py +++ b/mindnlp/core/nn/functional.py @@ -4,12 +4,9 @@ from typing import Optional, Tuple, List import numpy as np from mindspore import ops, mint -from mindspore.ops.auto_generate.gen_arg_handler import dtype_to_type_id -from mindspore.common.generator import default_generator from mindspore.ops._primitive_cache import _get_cache_prim from mindnlp import core -from mindnlp.core.executor import execute from ..configs import DEVICE_TARGET, ON_ORANGE_PI, use_pyboost, ON_A1 generator_step_ = 12 @@ -237,28 +234,6 @@ def apply_rotary_pos_emb(query, key, cos, sin, position_ids, cos_format=0): query, key, cos, sin, position_ids, cos_format ) -def _reflection_pad(input, pad): - """reflection pad""" - out = input - if len(pad) == 2: - out = execute('reflection_pad_1d', input, pad) - elif len(pad) == 4: - out = execute('reflection_pad_2d', input, pad) - else: - out = execute('reflection_pad_3d', input, pad) - return out - -def _replication_pad(input, pad): - """replication pad""" - out = input - if len(pad) == 2: - out = execute('replication_pad_1d', input, pad) - elif len(pad) == 4: - out = execute('replication_pad_2d', input, pad) - else: - out = execute('replication_pad_3d', input, pad) - return out - def pad(input, pad, mode='constant', value=0.0): if sum(pad) == 0: return input @@ -268,7 +243,16 @@ def pad(input, pad, mode='constant', value=0.0): return mint.nn.functional.pad(input, pad, mode, value) if mode in ['reflect', 'circular']: return ops.pad(input, pad, mode) - return ops.pad(input, pad, mode, value) + new_pad = () + for idx, pad_v in enumerate(pad): + if pad_v < 0: + dim = idx // 2 + input = input.narrow(dim, 0, input.shape[dim] + pad_v) + pad_v = 0 + new_pad += (pad_v,) + if sum(new_pad) == 0: + return input + return ops.pad(input, new_pad, mode, value) def nll_loss(input, target, weight=None, ignore_index=-100, reduction='mean', label_smoothing=0.0): return _inner_nll_loss(input, target, weight, ignore_index, reduction, label_smoothing) @@ -656,8 +640,9 @@ def scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_cau query = query / scaling_factor if is_causal: - L = query.shape[-2], S = key.shape[-2] - attn_mask = ops.ones((L, S), mindspore.bool_).tril() + L = query.shape[-2] + S = key.shape[-2] + attn_mask = ops.ones((L, S), core.bool_).tril() attn = ops.matmul(query, key.swapaxes(-2, -1) / scaling_factor) if attn_mask is not None: diff --git a/tests/run_test.py b/tests/run_test.py index 6e2ae52f3..d92826b3e 100644 --- a/tests/run_test.py +++ b/tests/run_test.py @@ -17,11 +17,17 @@ def run_tests(): """ # 获取命令行参数(排除脚本名本身) pytest_args = sys.argv[1:] - + skip_ut = "not sdpa " \ + "and not headmasking " \ + "and not gradient_checkpointing " \ + "and not retain_grad " \ + "and not data_parallel" + + pytest_args.extend(['-k', skip_ut]) if not pytest_args: print("未提供参数,默认运行当前目录下所有测试") print("使用示例: python run_test.py -v tests/") - + # 执行测试并获取退出码 exit_code = pytest.main(pytest_args)