From 000bdb060827f262e9f056a02ac6bb36a56af7ac Mon Sep 17 00:00:00 2001
From: Mergen Nachin <mnachin@meta.com>
Date: Thu, 4 Sep 2025 11:07:40 -0400
Subject: [PATCH] Fix crash in op_upsample_bilinear2d_aa

Fixes: https://github.com/pytorch/executorch/issues/13553
---
 .../cpu/op_upsample_bilinear2d_aa.cpp         | 20 +++++++
 .../test/op_upsample_bilinear2d_aa_test.py    | 52 +++++++++++++++++++
 .../test/op_upsample_bilinear2d_aa_test.cpp   | 46 ++++++++++++++++
 3 files changed, 118 insertions(+)
diff --git a/kernels/portable/cpu/op_upsample_bilinear2d_aa.cpp b/kernels/portable/cpu/op_upsample_bilinear2d_aa.cpp
index 728122e8e14..1553471e79a 100644
--- a/kernels/portable/cpu/op_upsample_bilinear2d_aa.cpp
+++ b/kernels/portable/cpu/op_upsample_bilinear2d_aa.cpp
@@ -61,6 +61,20 @@ void compute_aa_weights_for_pixel(
 
   *num_contributors = std::min(xmax - xmin, static_cast<int64_t>(4));
 
+  // Ensure we have at least one contributor
+  if (*num_contributors <= 0) {
+    *num_contributors = 1;
+    indices[0] = std::max(
+        static_cast<int64_t>(0),
+        std::min(static_cast<int64_t>(center), input_size - 1));
+    weights[0] = static_cast<T>(1.0);
+    // Clear unused weight slots
+    for (int64_t j = 1; j < 4; ++j) {
+      weights[j] = static_cast<T>(0.0);
+    }
+    return;
+  }
+
   // PyTorch's weight computation
   T total_weight = static_cast<T>(0.0);
   const T invscale = (scale >= static_cast<T>(1.0))
@@ -84,6 +98,12 @@ void compute_aa_weights_for_pixel(
     for (int64_t j = 0; j < *num_contributors; ++j) {
       weights[j] /= total_weight;
     }
+  } else {
+    // Fallback: if total weight is 0, set equal weights
+    T equal_weight = static_cast<T>(1.0) / static_cast<T>(*num_contributors);
+    for (int64_t j = 0; j < *num_contributors; ++j) {
+      weights[j] = equal_weight;
+    }
   }
 
   // Clear unused weight slots
diff --git a/kernels/portable/test/op_upsample_bilinear2d_aa_test.py b/kernels/portable/test/op_upsample_bilinear2d_aa_test.py
index 4f63766801b..f86aa35465c 100644
--- a/kernels/portable/test/op_upsample_bilinear2d_aa_test.py
+++ b/kernels/portable/test/op_upsample_bilinear2d_aa_test.py
@@ -289,6 +289,58 @@ def test_upsample_bilinear2d_aa_scale_factors_vs_output_size(self):
             # Skip this test if et_test namespace setup issues persist
             print(f"Skipping scale factors test due to: {e}")
 
+    def test_upsample_bilinear2d_aa_extreme_scale_factors(self):
+        """Test the specific case that exposed the segfault bug with extreme scale factors."""
+        # Create input tensor with same data as C++ test to ensure consistency
+        input_tensor = torch.zeros(8, 2, 7, 1, dtype=torch.float32)
+        for i in range(8 * 2 * 7 * 1):
+            input_tensor.view(-1)[i] = i * 0.1
+
+        # Test the specific case that caused segfault before the fix
+        self.run_upsample_aa_test(
+            input_tensor,
+            output_size=[7, 2],
+            align_corners=False,
+            scale_factors=None,  # Use explicit scale factors via direct call
+            atol=1e-2,  # Relaxed tolerance for extreme scale factors
+        )
+
+        # Also test with direct ExecuTorch call using the extreme scale factors
+        try:
+            et_result = torch.zeros(8, 2, 7, 2, dtype=torch.float32)
+            et_result = torch.ops.et_test._upsample_bilinear2d_aa(
+                input_tensor,
+                [7, 2],  # output_size
+                False,  # align_corners
+                0.010000000000000002,  # scales_h (very small)
+                10.0,  # scales_w (very large)
+                out=et_result,
+            )
+
+            # Verify no NaN or Inf values (the bug would cause these)
+            self.assertFalse(
+                torch.isnan(et_result).any().item(),
+                "Output should not contain NaN values after bug fix",
+            )
+            self.assertFalse(
+                torch.isinf(et_result).any().item(),
+                "Output should not contain Inf values after bug fix",
+            )
+
+            # Verify reasonable output values
+            self.assertTrue(
+                et_result.min().item() >= -100.0,
+                "Output values should be reasonable (not extremely negative)",
+            )
+            self.assertTrue(
+                et_result.max().item() <= 100.0,
+                "Output values should be reasonable (not extremely positive)",
+            )
+
+        except RuntimeError as e:
+            # Skip the direct test if et_test namespace setup issues persist
+            print(f"Skipping direct extreme scale factors test due to: {e}")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/kernels/test/op_upsample_bilinear2d_aa_test.cpp b/kernels/test/op_upsample_bilinear2d_aa_test.cpp
index b6a9e6c5bdb..c0cc69f35d9 100644
--- a/kernels/test/op_upsample_bilinear2d_aa_test.cpp
+++ b/kernels/test/op_upsample_bilinear2d_aa_test.cpp
@@ -625,3 +625,49 @@ TEST_F(OpUpsampleBilinear2dAAOutTest, TestPrecisionConsistency) {
     EXPECT_EQ(out1_data[i], out2_data[i]);
   }
 }
+
+TEST_F(OpUpsampleBilinear2dAAOutTest, TestSpecificInputCase) {
+  TensorFactory<ScalarType::Float> tf;
+
+  // Test case with specific inputs:
+  // Input shape: [8, 2, 7, 1]
+  // Output size: [7, 2]
+  // align_corners: false
+  // scales_h: 0.010000000000000002
+  // scales_w: 10.0
+  Tensor input = tf.zeros({8, 2, 7, 1});
+  auto in_data = input.mutable_data_ptr<float>();
+
+  // Fill with some test data
+  for (int i = 0; i < 8 * 2 * 7 * 1; i++) {
+    in_data[i] = static_cast<float>(i) * 0.1f;
+  }
+
+  // Output shape will be [8, 2, 7, 2]
+  Tensor out = tf.zeros({8, 2, 7, 2});
+
+  int64_t output_size_data[2] = {7, 2};
+  ArrayRef<int64_t> output_size(output_size_data, 2);
+
+  op_upsample_bilinear2d_aa_out(
+      input,
+      output_size,
+      /*align_corners=*/false,
+      0.010000000000000002,
+      10.0,
+      out);
+
+  // Verify output dimensions
+  EXPECT_EQ(out.size(0), 8);
+  EXPECT_EQ(out.size(1), 2);
+  EXPECT_EQ(out.size(2), 7);
+  EXPECT_EQ(out.size(3), 2);
+
+  // Verify that output has reasonable values
+  auto out_data = out.const_data_ptr<float>();
+  for (int i = 0; i < 8 * 2 * 7 * 2; i++) {
+    // Check for NaN or Inf
+    EXPECT_FALSE(std::isnan(out_data[i]));
+    EXPECT_FALSE(std::isinf(out_data[i]));
+  }
+}