pytorch · razarmehr · Dec 8, 2022 · Jan 4, 2023 · Jan 4, 2023
diff --git a/aten/src/ATen/native/mps/operations/TensorCompare.mm b/aten/src/ATen/native/mps/operations/TensorCompare.mm
@@ -2,7 +2,7 @@
 
 #include <ATen/native/mps/OperationUtils.h>
 #include <ATen/native/TensorCompare.h>
-#include <ATen/TensorUtils.h>
+#include <ATen/native/Resize.h>
 
 namespace at {
 namespace native {
@@ -416,5 +416,104 @@ Tensor where_mps(const Tensor& condition,
 
 }
 
+Tensor& nan_to_num_out_mps(const Tensor& self,
+                           c10::optional<double> nan,
+                           c10::optional<double> pos_inf,
+                           c10::optional<double> neg_inf,
+                           Tensor& result) {
+  TORCH_CHECK(self.scalar_type() == result.scalar_type(), "nan_to_num: dtype of out: ",
+              result.scalar_type(), " should be same as input: ", self.scalar_type());
+  if (result.numel() == 0) {
+    return result;
+  }
+  if (c10::isIntegralType(self.scalar_type(), /*includeBool=*/true)) {
+    at::native::resize_output(result, self.sizes());
+    result.copy_(self);
+    return result;
+  }
+  using namespace mps;
+  struct CachedGraph : public MPSCachedGraph {
+    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+    MPSGraphTensor* selfTensor = nil;
+    MPSGraphTensor* outputTensor = nil;
+    MPSGraphTensor* nanReplacementTensor = nil;
+    MPSGraphTensor* posInfReplacementTensor = nil;
+    MPSGraphTensor* negInfReplacementTensor = nil;
+  };
+  MPSGraphCache* cache_ = MPSGraphCache::getInstance();
+
+  @autoreleasepool {
+    string key = "nan_to_num" + getTensorsStringKey({self});
+    MPSDataType self_dtype = getMPSScalarType(self.scalar_type());
+
+    CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);
+    if (!cachedGraph) {
+      cachedGraph = cache_->CreateCachedGraphAs<CachedGraph>(key, ^ MPSCachedGraph * () {
+        CachedGraph *newCachedGraph = nil;
+        @autoreleasepool {
+          MPSGraph* mpsGraph = make_mps_graph();
+          newCachedGraph = new CachedGraph(mpsGraph);
+
+          newCachedGraph->selfTensor = mpsGraphRankedPlaceHolder(mpsGraph, self);
+          newCachedGraph->nanReplacementTensor    = mpsGraphRankedPlaceHolder(mpsGraph, self_dtype, @[@1]);
+          newCachedGraph->posInfReplacementTensor = mpsGraphRankedPlaceHolder(mpsGraph, self_dtype, @[@1]);
+          newCachedGraph->negInfReplacementTensor = mpsGraphRankedPlaceHolder(mpsGraph, self_dtype, @[@1]);
+
+          MPSGraphTensor* nanFreeTensor = [mpsGraph selectWithPredicateTensor: [mpsGraph isNaNWithTensor: newCachedGraph->selfTensor name:nil]
+                                                          truePredicateTensor: newCachedGraph->nanReplacementTensor
+                                                         falsePredicateTensor: newCachedGraph->selfTensor
+                                                                         name: nil];
+          MPSGraphTensor* subZeroTensor = [mpsGraph lessThanWithPrimaryTensor: nanFreeTensor
+                                                              secondaryTensor: [mpsGraph constantWithScalar: 0.0 dataType: self_dtype]
+                                                                         name: nil];
+          // the cast is a workaround for the issue #103149520 (crash when bool and fp16 passed to binary ops)
+          MPSGraphTensor* isNegInfTensor = [mpsGraph logicalANDWithPrimaryTensor: [mpsGraph castTensor: subZeroTensor toType: self_dtype name: @"castTensor"]
+                                                                 secondaryTensor: [mpsGraph isInfiniteWithTensor: nanFreeTensor name:nil]
+                                                                            name: nil];
+          MPSGraphTensor* negInfFreeTensor = [mpsGraph selectWithPredicateTensor: isNegInfTensor
+                                                             truePredicateTensor: newCachedGraph->negInfReplacementTensor
+                                                            falsePredicateTensor: nanFreeTensor
+                                                                            name: nil];
+          newCachedGraph->outputTensor = [mpsGraph selectWithPredicateTensor: [mpsGraph isInfiniteWithTensor: negInfFreeTensor name:nil]
+                                                         truePredicateTensor: newCachedGraph->posInfReplacementTensor
+                                                        falsePredicateTensor: negInfFreeTensor
+                                                                        name: nil];
+        }
+        return newCachedGraph;
+      });
+    }
+    MPSScalar nanReplacementScalar, posInfReplacementScalar, negInfReplacementScalar;
+    AT_DISPATCH_FLOATING_TYPES_AND(kHalf, self.scalar_type(), "nan_to_num_mps", [&]() {
+        scalar_t nan_replacement = static_cast<scalar_t>(nan.value_or(0.));
+        scalar_t pos_inf_replacement = pos_inf.has_value() ?
+                                       static_cast<scalar_t>(pos_inf.value()) :
+                                       std::numeric_limits<scalar_t>::max();
+        scalar_t neg_inf_replacement = neg_inf.has_value() ?
+                                       static_cast<scalar_t>(neg_inf.value()) :
+                                       std::numeric_limits<scalar_t>::lowest();
+
+        nanReplacementScalar    = getMPSScalar(nan_replacement, self.scalar_type());
+        posInfReplacementScalar = getMPSScalar(pos_inf_replacement, self.scalar_type());
+        negInfReplacementScalar = getMPSScalar(neg_inf_replacement, self.scalar_type());
+    });
+
+    MPSStream* stream = getCurrentMPSStream();
+    Placeholder selfPlaceholder = Placeholder(cachedGraph->selfTensor, self);
+    Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor, result);
+
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
+      selfPlaceholder.getMPSGraphTensor()  : selfPlaceholder.getMPSGraphTensorData(),
+      cachedGraph->nanReplacementTensor    : getMPSGraphTensorFromScalar(stream, nanReplacementScalar),
+      cachedGraph->posInfReplacementTensor : getMPSGraphTensorFromScalar(stream, posInfReplacementScalar),
+      cachedGraph->negInfReplacementTensor : getMPSGraphTensorFromScalar(stream, negInfReplacementScalar),
+    };
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+      outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
+    };
+    runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+  }
+  return result;
+}
+
 } // namespace native
 } // namespace at
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -3099,6 +3099,7 @@
 - func: nan_to_num.out(Tensor self, float? nan=None, float? posinf=None, float? neginf=None, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CPU, CUDA: nan_to_num_out
+    MPS: nan_to_num_out_mps
     SparseCPU, SparseCUDA: nan_to_num_sparse_out
   tags: pointwise
 

diff --git a/test/test_mps.py b/test/test_mps.py
@@ -5288,6 +5288,13 @@ def helper(shape, alpha):
         helper((2, 8, 3, 5), 0.1)
         helper((2, 8, 3, 5), 0.2)
 
+    def test_nan_to_num(self):
+        inputCPU = torch.tensor([float('nan'), float('inf'), -float('inf'), 3.14])
+        inputMPS = inputCPU.detach().clone().to('mps').requires_grad_()
+        outputCPU = torch.nan_to_num(inputCPU, nan=2.0, posinf=1.0, neginf=-1.0)
+        outputMPS = torch.nan_to_num(inputMPS, nan=2.0, posinf=1.0, neginf=-1.0)
+        self.assertEqual(outputMPS, outputCPU)
+
     # Test where
     def test_where(self):
         def helper(shape, x_shape, y_shape, cond_dtype=torch.bool, x_dtype=torch.float):
@@ -8030,6 +8037,7 @@ class TestConsistency(TestCase):
         'matmul': ['f32'],
         'mm': ['f32'],
         'mv': ['f32'],
+        'nan_to_num': ['b8', 'f16', 'f32', 'i16', 'i32', 'i64', 'u8'],
         'neg': ['b8', 'f16', 'f32', 'i16', 'i32', 'i64'],
         'nn.functional.adaptive_max_pool1d': ['f32'],
         'nn.functional.adaptive_max_pool2d': ['f32'],