pytorch · XuehaiPan · May 16, 2023 · May 16, 2023
diff --git a/torch/_C/_distributed_c10d.pyi b/torch/_C/_distributed_c10d.pyi
@@ -96,15 +96,16 @@ class DebugLevel(Enum):
 class ReduceOp:
     def __init__(self, op: "RedOpType"): ...
 
-    SUM = ...
-    PRODUCT = ...
-    MIN = ...
-    MAX = ...
-    BAND = ...
-    BOR = ...
-    BXOR = ...
-    PREMUL_SUM = ...
-    UNUSED = ...
+    SUM: "RedOpType" = ...
+    AVG: "RedOpType" = ...
+    PRODUCT: "RedOpType" = ...
+    MIN: "RedOpType" = ...
+    MAX: "RedOpType" = ...
+    BAND: "RedOpType" = ...
+    BOR: "RedOpType" = ...
+    BXOR: "RedOpType" = ...
+    PREMUL_SUM: "RedOpType" = ...
+    UNUSED: "RedOpType" = ...
 
     class RedOpType(Enum): ...
 

diff --git a/torch/distributed/_tensor/device_mesh.py b/torch/distributed/_tensor/device_mesh.py
@@ -390,7 +390,7 @@ def all_gather(
     def all_reduce(
         self,
         tensor: torch.Tensor,
-        op: ReduceOp = ReduceOp.SUM,  # type: ignore[assignment]
+        op: ReduceOp.RedOpType = ReduceOp.SUM,
         mesh_dim: int = 0,
         async_op: bool = False,
     ) -> torch.Tensor:
@@ -409,7 +409,7 @@ def all_reduce(
             A :class:`torch.Tensor` object
         """
         dim_group = self._dim_groups[mesh_dim]
-        op_name: str = op.name  # type: ignore[attr-defined]
+        op_name: str = op.name
         return funcol.all_reduce(
             tensor,
             reduceOp=op_name,
@@ -422,7 +422,7 @@ def all_reduce(
     def reduce_scatter(
         self,
         input: torch.Tensor,
-        op: ReduceOp = ReduceOp.SUM,  # type: ignore[assignment]
+        op: ReduceOp.RedOpType = ReduceOp.SUM,
         mesh_dim: int = 0,
         scatter_dim: int = 0,
     ) -> torch.Tensor:
@@ -441,7 +441,7 @@ def reduce_scatter(
         Returns:
             A :class:`torch.Tensor` object
         """
-        op_name: str = op.name  # type: ignore[attr-defined]
+        op_name: str = op.name
         if self._backend == "nccl" or self._backend == "threaded":
             dim_group = self._dim_groups[mesh_dim]
             scatter_tensor = funcol.reduce_scatter_tensor(

diff --git a/torch/distributed/_tensor/ops/math_ops.py b/torch/distributed/_tensor/ops/math_ops.py
@@ -101,7 +101,7 @@ def mean_rule(op_schema: OpSchema) -> OutputSharding:
         for placement in output_sharding.output_spec.placements:
             if placement.is_partial():
                 partial_placement = cast(_Partial, placement)
-                partial_placement.reduce_op = c10d.ReduceOp.AVG  # type: ignore[attr-defined]
+                partial_placement.reduce_op = c10d.ReduceOp.AVG
 
     return output_sharding
 

diff --git a/torch/distributed/_tensor/placement_types.py b/torch/distributed/_tensor/placement_types.py
@@ -175,7 +175,7 @@ def _reduce_shard_tensor(
         self,
         tensor: torch.Tensor,
         mesh: DeviceMesh,
-        reduce_op: c10d.ReduceOp,
+        reduce_op: c10d.ReduceOp.RedOpType,
         mesh_dim: int,
     ) -> torch.Tensor:
         """
@@ -328,15 +328,13 @@ class _Partial(Placement):
     # We can implement custom reductions as needed by subclassing this
     # class and override those contracts.
 
-    def __init__(self, reduce_op: c10d.ReduceOp = c10d.ReduceOp.SUM):  # type: ignore[assignment]
-        self.reduce_op: c10d.ReduceOp = reduce_op
+    def __init__(self, reduce_op: c10d.ReduceOp.RedOpType = c10d.ReduceOp.SUM):
+        self.reduce_op: c10d.ReduceOp.RedOpType = reduce_op
 
     def _to_replicate(
         self, tensor: torch.Tensor, mesh: DeviceMesh, mesh_dim: int
     ) -> torch.Tensor:
-        return mesh.all_reduce(
-            tensor, self.reduce_op, mesh_dim=mesh_dim  # type: ignore[call-arg]
-        )
+        return mesh.all_reduce(tensor, self.reduce_op, mesh_dim=mesh_dim)
 
     def _to_shard(
         self,
@@ -347,9 +345,7 @@ def _to_shard(
     ) -> torch.Tensor:
         # by default call reduce_shard_tensor of the shard_spec.
         shard_spec = cast(Shard, shard_spec)
-        return shard_spec._reduce_shard_tensor(
-            tensor, mesh, self.reduce_op, mesh_dim  # type: ignore[call-arg]
-        )
+        return shard_spec._reduce_shard_tensor(tensor, mesh, self.reduce_op, mesh_dim)
 
     def __eq__(self, other: object) -> bool:
         if not isinstance(other, _Partial):