add a unique function for guarding index and remove the unused function

pytorch · Feb 8, 2023 · a7b9eae · a7b9eae
1 parent b410189
commit a7b9eae
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 35 deletions.
diff --git a/aten/src/ATen/native/UpSample.h b/aten/src/ATen/native/UpSample.h
@@ -428,6 +428,18 @@ static inline scalar_t cubic_interp1d(
   return x0 * coeffs[0] + x1 * coeffs[1] + x2 * coeffs[2] + x3 * coeffs[3];
 }
 
+// when `real_input_index` becomes larger than the range the floating point
+// type can accurately represent, the type casting to `int64_t` might exceed
+// `input_size`, causing overflow. So we guard it with `std::min` below.
+template<typename scalar_t, typename opmath_t>
+static inline void guard_index_and_lambda(const opmath_t& real_input_index, const int64_t& input_size, int64_t& input_index, scalar_t& lambda) {
+  input_index = std::min(static_cast<int64_t>(floorf(real_input_index)), input_size - 1);
+  lambda = std::min(
+      std::max(real_input_index - input_index, static_cast<opmath_t>(0)),
+      static_cast<opmath_t>(1)
+    );
+}
+
 template<typename scalar_t, typename opmath_t>
 static inline void compute_source_index_and_lambda(
     int64_t& input_index0,
@@ -449,23 +461,20 @@ static inline void compute_source_index_and_lambda(
     const auto real_input_index =
         area_pixel_compute_source_index<opmath_t>(
             ratio, output_index, align_corners, /*cubic=*/false);
-    // when `real_input_index` becomes larger than the range the floating point
-    // type can accurately represent, the type casting to `int64_t` might exceed
-    // `input_size - 1`, causing overflow. So we guard it with `std::min` below.
-    input_index0 = std::min(static_cast<int64_t>(real_input_index), input_size - 1);
+    guard_index_and_lambda(real_input_index, input_size, input_index0, lambda1);
     int64_t offset = (input_index0 < input_size - 1) ? 1 : 0;
     input_index1 = input_index0 + offset;
-    lambda1 = std::min(
-      std::max(real_input_index - input_index0, static_cast<opmath_t>(0)),
-      static_cast<opmath_t>(1)
-    );
     lambda0 = static_cast<scalar_t>(1.) - lambda1;
   }
 }
 
-// For compilation, and it will not be used by data types other than BFloat16.
+// It will not be used by data types other than BFloat16.
 template <typename scalar_in, typename scalar_out>
-void inline apply_grad_input(scalar_out* buffer_ptr, scalar_in* gin, int64_t size) {
+void inline apply_grad_input(scalar_in* buffer_ptr, scalar_out* gin, int64_t size) {
+  TORCH_CHECK((std::is_same<scalar_out, BFloat16>::value),
+              "Upsample backward only support BFloat16 in the lower percision data types on CPU.")
+  TORCH_CHECK((std::is_same<scalar_in, float>::value),
+              "Upsample backward should use float as acc buffer for BFloat16 grad input on CPU.")
   return;
 }
 

diff --git a/aten/src/ATen/native/UpSampleBicubic2d.cpp b/aten/src/ATen/native/UpSampleBicubic2d.cpp
@@ -142,21 +142,14 @@ static void upsample_bicubic2d_backward_out_frame(
         for (const auto output_x : c10::irange(output_width)) {
 
           const opmath_t real_x = area_pixel_compute_source_index(width_scale, output_x, align_corners, /*cubic=*/true);
-          // when `real_x` becomes larger than the range the floating point
-          // type can accurately represent, the type casting to `int64_t` might exceed
-          // `input_width - 1`. So we guard it with `std::min` below.
-          int64_t input_x = std::min(static_cast<int64_t>(floorf(real_x)), input_width - 1);
-          opmath_t t_x = std::min(
-                std::max(real_x - input_x, static_cast<opmath_t>(0)),
-                static_cast<opmath_t>(1)
-              );
+          int64_t input_x;
+          opmath_t t_x;
+          guard_index_and_lambda(real_x, input_width, input_x, t_x);
 
           const opmath_t real_y = area_pixel_compute_source_index(height_scale, output_y, align_corners, /*cubic=*/true);
-          int64_t input_y = std::min(static_cast<int64_t>(floorf(real_y)), input_height - 1);
-          opmath_t t_y = std::min(
-                std::max(real_y - input_y, static_cast<opmath_t>(0)),
-                static_cast<opmath_t>(1)
-              );
+          int64_t input_y;
+          opmath_t t_y;
+          guard_index_and_lambda(real_y, input_height, input_y, t_y);
 
           // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
           opmath_t x_coeffs[4];

diff --git a/aten/src/ATen/native/cpu/UpSampleKernel.cpp b/aten/src/ATen/native/cpu/UpSampleKernel.cpp
@@ -980,14 +980,8 @@ struct HelperInterpCubic : public HelperInterpBase {
           const auto real_input_index =
               area_pixel_compute_source_index<opmath_t>(
                   scale, i, align_corners, /*cubic=*/true);
-          // when `real_input_index` becomes larger than the range the floating point
-          // type can accurately represent, the type casting to `int64_t` might exceed
-          // `input_size - 1`. So we guard it with `std::min` below.
-          input_index = std::min(static_cast<int64_t>(floorf(real_input_index)), input_size - 1);
-          auto lambda = std::min(
-            std::max(real_input_index - input_index, static_cast<opmath_t>(0)),
-            static_cast<opmath_t>(1)
-          );
+          opmath_t lambda;
+          guard_index_and_lambda(real_input_index, input_size, input_index, lambda);
           get_cubic_upsample_coefficients<opmath_t>(coeffs, lambda);
 
           for (const auto j : c10::irange(interp_size)) {

diff --git a/aten/src/ATen/native/cpu/UpSampleMoreKernel.cpp b/aten/src/ATen/native/cpu/UpSampleMoreKernel.cpp
@@ -14,8 +14,10 @@ namespace {
 
 using scale_t = std::vector<c10::optional<double>>;
 
-template <typename scalar_in, typename scalar_out>
-void inline nearest_channels_last_acc(scalar_in* gin, scalar_out* gout, int64_t size) {
+template <typename acc_t, typename scalar_t>
+void inline nearest_channels_last_acc(acc_t* gin, scalar_t* gout, int64_t size) {
+  TORCH_CHECK((std::is_same<acc_t, scalar_t>::value),
+              "acc data type of Upsample backward should be same as scalar_t for float or double on CPU.")
   using Vec = vec::Vectorized<scalar_in>;
   int64_t d = 0;
   for (; d < size - (size % Vec::size()); d += Vec::size()) {
@@ -46,8 +48,10 @@ void inline nearest_channels_last_acc(float* gin, BFloat16* gout, int64_t size)
   }
 }
 
-template <typename scalar_in, typename scalar_out>
-void inline linear_channels_last_acc(scalar_in* gin, scalar_out* gout, scalar_in w, int64_t size) {
+template <typename acc_t, typename scalar_t>
+void inline linear_channels_last_acc(acc_t* gin, scalar_t* gout, acc_t w, int64_t size) {
+  TORCH_CHECK((std::is_same<acc_t, scalar_t>::value),
+              "acc data type of Upsample backward should be same as scalar_t for float or double on CPU.")
   using Vec = vec::Vectorized<scalar_in>;
   int64_t d = 0;
   for (; d < size - (size % Vec::size()); d += Vec::size()) {