diff --git a/mindnlp/core/npu/amp/__init__.py b/mindnlp/core/npu/amp/__init__.py index de8bea8a6..34b9bfbaa 100644 --- a/mindnlp/core/npu/amp/__init__.py +++ b/mindnlp/core/npu/amp/__init__.py @@ -1,8 +1,9 @@ from .autocast_mode import autocast, custom_bwd, custom_fwd - +from .grad_scaler import GradScaler __all__ = [ "autocast", "custom_bwd", "custom_fwd", + "GradScaler" ] \ No newline at end of file diff --git a/mindnlp/core/npu/amp/grad_scaler.py b/mindnlp/core/npu/amp/grad_scaler.py new file mode 100644 index 000000000..07adf1524 --- /dev/null +++ b/mindnlp/core/npu/amp/grad_scaler.py @@ -0,0 +1,38 @@ +from typing_extensions import deprecated + +from mindnlp import core + +# We need to keep this unused import for BC reasons +from ...amp.grad_scaler import OptState # noqa: F401 + + +__all__ = ["GradScaler"] + + +class GradScaler(core.amp.GradScaler): + r""" + See :class:`torch.amp.GradScaler`. + ``torch.npu.amp.GradScaler(args...)`` is deprecated. Please use ``torch.amp.GradScaler("npu", args...)`` instead. + """ + + @deprecated( + "`torch.npu.amp.GradScaler(args...)` is deprecated. " + "Please use `torch.amp.GradScaler('npu', args...)` instead.", + category=FutureWarning, + ) + def __init__( + self, + init_scale: float = 2.0**16, + growth_factor: float = 2.0, + backoff_factor: float = 0.5, + growth_interval: int = 2000, + enabled: bool = True, + ) -> None: + super().__init__( + "npu", + init_scale=init_scale, + growth_factor=growth_factor, + backoff_factor=backoff_factor, + growth_interval=growth_interval, + enabled=enabled, + ) \ No newline at end of file diff --git a/mindnlp/core/ops/creation.py b/mindnlp/core/ops/creation.py index 91e07f539..f5f89ae59 100644 --- a/mindnlp/core/ops/creation.py +++ b/mindnlp/core/ops/creation.py @@ -189,11 +189,13 @@ def empty(*size, dtype=None, device=None, requires_grad=False, pin_memory=False, device = 'CPU' elif device.lower() == 'npu': device = 'Ascend' - else: + elif device.lower() == 'cuda': device = 'GPU' + else: + device = 'meta' # To avoid the problem in irecv and recv of using empty. - if has_empty and use_pyboost(): + if device != 'meta': out = mindspore.mint.empty(size, dtype=dtype, device=device) else: out = CTensor(dtype=dtype, shape=size) diff --git a/mindnlp/utils/torch_proxy.py b/mindnlp/utils/torch_proxy.py index 81950036e..ae1fccae8 100644 --- a/mindnlp/utils/torch_proxy.py +++ b/mindnlp/utils/torch_proxy.py @@ -6,6 +6,8 @@ import importlib.machinery from types import ModuleType +from mindnlp.core.configs import DEVICE_TARGET + TORCH_VERSION = '2.7.1+dev' class RedirectFinder(importlib.abc.MetaPathFinder): @@ -19,6 +21,8 @@ def find_spec(self, fullname, path, target=None): if fullname == proxy_prefix or fullname.startswith(proxy_prefix + "."): # 计算实际模块名 target_name = fullname.replace(proxy_prefix, target_prefix, 1) + if DEVICE_TARGET == 'Ascend': + target_name = target_name.replace('cuda', 'npu') try: importlib.import_module(target_name) except Exception as e: