pytorch · manuelcandales · Apr 5, 2024
diff --git a/kernels/quantized/cpu/op_dequantize.cpp b/kernels/quantized/cpu/op_dequantize.cpp
@@ -33,7 +33,6 @@ void check_dequantize_per_tensor_args(
     int64_t quant_min,
     int64_t quant_max,
     ScalarType dtype,
-    exec_aten::optional<ScalarType>& out_dtype,
     Tensor& out) {
   ET_CHECK_MSG(
       input.scalar_type() == ScalarType::Byte ||
@@ -48,11 +47,10 @@ void check_dequantize_per_tensor_args(
       "input.scalar_type() %" PRId8 " is not matching dtype argumenta:",
       static_cast<int8_t>(input.scalar_type()));
 
-  if (out_dtype.has_value()) {
-    ET_CHECK_MSG(
-        out.scalar_type() == out_dtype.value(),
-        "output_dtype must match the dtype of the out tensor");
-  }
+  ET_CHECK_MSG(
+      out.scalar_type() == ScalarType::Float,
+      "out.scalar_type() %" PRId8 " is not supported:",
+      static_cast<int8_t>(out.scalar_type()));
 
   ET_CHECK_MSG(
       quant_min <= quant_max,
@@ -79,15 +77,13 @@ Tensor& dequantize_per_tensor_out(
     int64_t quant_min,
     int64_t quant_max,
     ScalarType dtype,
-    exec_aten::optional<ScalarType> out_dtype,
     Tensor& out) {
   torch::executor::Error err = resize_tensor(out, input.sizes());
   ET_CHECK_MSG(
       err == torch::executor::Error::Ok,
       "Failed to resize out Tensor in dequantize_per_tensor_out");
 
-  check_dequantize_per_tensor_args(
-      input, quant_min, quant_max, dtype, out_dtype, out);
+  check_dequantize_per_tensor_args(input, quant_min, quant_max, dtype, out);
 
   // calculate the dequantized output, cast scale to float to match fbgemm
   // behavior
@@ -132,7 +128,6 @@ Tensor& dequantize_per_tensor_tensor_args_out(
     int64_t quant_min,
     int64_t quant_max,
     ScalarType dtype,
-    exec_aten::optional<ScalarType> out_dtype,
     Tensor& out) {
   ET_CHECK_MSG(
       scale.scalar_type() == ScalarType::Double,
@@ -158,20 +153,18 @@ Tensor& dequantize_per_tensor_tensor_args_out(
       quant_min,
       quant_max,
       dtype,
-      out_dtype,
       out);
   return out;
 }
 
 Tensor& dequantize_per_channel_out(
     const Tensor& input,
     const Tensor& scale,
-    const optional<Tensor>& opt_zero_points,
+    const Tensor& zero_point,
     int64_t axis,
     int64_t quant_min,
     int64_t quant_max,
     ScalarType dtype,
-    exec_aten::optional<ScalarType> out_dtype,
     Tensor& out) {
   torch::executor::Error err = resize_tensor(out, input.sizes());
 
@@ -201,22 +194,18 @@ Tensor& dequantize_per_channel_out(
       ssize_t(scale.numel()),
       ssize_t(input.size(axis)));
 
-  if (opt_zero_points.has_value()) {
-    auto zero_point = opt_zero_points.value();
-    ET_CHECK_MSG(
-        zero_point.scalar_type() == ScalarType::Long,
-        "zero_point.scalar_type() %" PRId8 " is not integer type",
-        static_cast<int8_t>(zero_point.scalar_type()));
-
-    ET_CHECK_MSG(
-        zero_point.numel() == input.size(axis),
-        "zero_point.numel() %zd != input.size(axis) %zd",
-        ssize_t(zero_point.numel()),
-        ssize_t(input.size(axis)));
-  }
+  ET_CHECK_MSG(
+      zero_point.scalar_type() == ScalarType::Long,
+      "zero_point.scalar_type() %" PRId8 " is not integer type",
+      static_cast<int8_t>(zero_point.scalar_type()));
 
-  check_dequantize_per_tensor_args(
-      input, quant_min, quant_max, dtype, out_dtype, out);
+  ET_CHECK_MSG(
+      zero_point.numel() == input.size(axis),
+      "zero_point.numel() %zd != input.size(axis) %zd",
+      ssize_t(zero_point.numel()),
+      ssize_t(input.size(axis)));
+
+  check_dequantize_per_tensor_args(input, quant_min, quant_max, dtype, out);
 
   // a list contains all dimensions except axis
   int64_t dims[input.dim() - 1];
@@ -228,12 +217,7 @@ Tensor& dequantize_per_channel_out(
     }
   }
   const double* scale_data = scale.const_data_ptr<double>();
-  const int64_t* zero_point_data;
-  if (opt_zero_points.has_value()) {
-    zero_point_data = opt_zero_points.value().const_data_ptr<int64_t>();
-  } else {
-    zero_point_data = nullptr;
-  }
+  const int64_t* zero_point_data = zero_point.const_data_ptr<int64_t>();
 
   exec_aten::optional<exec_aten::ArrayRef<int64_t>> optional_dim_list{
       exec_aten::ArrayRef<int64_t>{dims, size_t(input.dim() - 1)}};
@@ -250,10 +234,7 @@ Tensor& dequantize_per_channel_out(
   case ScalarType::out_dtype:                                                  \
     for (size_t channel_ix = 0; channel_ix < input.size(axis); ++channel_ix) { \
       double _scale = scale_data[channel_ix];                                  \
-      int64_t _zero_point = 0;                                                 \
-      if (zero_point_data != nullptr) {                                        \
-        _zero_point = zero_point_data[channel_ix];                             \
-      }                                                                        \
+      int64_t _zero_point = zero_point_data[channel_ix];                       \
       apply_over_dim_list(                                                     \
           [input, out, _scale, _zero_point](size_t in_ix) {                    \
             out.mutable_data_ptr<CTYPE_OUT>()[in_ix] = static_cast<CTYPE_OUT>( \
@@ -295,24 +276,15 @@ Tensor& dequantize_per_channel_out(
     RuntimeContext& context,
     const Tensor& input,
     const Tensor& scale,
-    const optional<Tensor>& opt_zero_points,
+    const Tensor& zero_point,
     int64_t axis,
     int64_t quant_min,
     int64_t quant_max,
     ScalarType dtype,
-    exec_aten::optional<ScalarType> out_dtype,
     Tensor& out) {
   (void)context;
   return dequantize_per_channel_out(
-      input,
-      scale,
-      opt_zero_points,
-      axis,
-      quant_min,
-      quant_max,
-      dtype,
-      out_dtype,
-      out);
+      input, scale, zero_point, axis, quant_min, quant_max, dtype, out);
 }
 
 Tensor& dequantize_per_tensor_out(
@@ -323,13 +295,12 @@ Tensor& dequantize_per_tensor_out(
     int64_t quant_min,
     int64_t quant_max,
     ScalarType dtype,
-    exec_aten::optional<ScalarType> out_dtype,
     Tensor& out) {
   // TODO(larryliu): Add a context arg to the real op function and remove this
   // wrapper
   (void)context;
   return dequantize_per_tensor_out(
-      input, scale, zero_point, quant_min, quant_max, dtype, out_dtype, out);
+      input, scale, zero_point, quant_min, quant_max, dtype, out);
 }
 
 Tensor& dequantize_per_tensor_tensor_args_out(
@@ -340,13 +311,12 @@ Tensor& dequantize_per_tensor_tensor_args_out(
     int64_t quant_min,
     int64_t quant_max,
     ScalarType dtype,
-    exec_aten::optional<ScalarType> out_dtype,
     Tensor& out) {
   // TODO(larryliu): Add a context arg to the real op function and remove this
   // wrapper
   (void)context;
   return dequantize_per_tensor_tensor_args_out(
-      input, scale, zero_point, quant_min, quant_max, dtype, out_dtype, out);
+      input, scale, zero_point, quant_min, quant_max, dtype, out);
 }
 
 } // namespace native

diff --git a/kernels/quantized/quantized.yaml b/kernels/quantized/quantized.yaml
@@ -10,13 +10,13 @@
     - arg_meta: null
       kernel_name: torch::executor::choose_qparams_tensor_out
 
-- func: quantized_decomposed::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None, Tensor(a!) out) -> Tensor(a!)
+- func: quantized_decomposed::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
   kernels:
     - arg_meta: null
       kernel_name: torch::executor::dequantize_per_tensor_out
 
-- func: quantized_decomposed::dequantize_per_tensor.Tensor_out(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None, Tensor(a!) out) -> Tensor(a!)
+- func: quantized_decomposed::dequantize_per_tensor.Tensor_out(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
   kernels:
     - arg_meta: null
@@ -28,7 +28,7 @@
     - arg_meta: null
       kernel_name: torch::executor::quantize_per_channel_out
 
-- func: quantized_decomposed::dequantize_per_channel.out(Tensor input, Tensor scales, Tensor? zero_points, int axis, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None, Tensor(a!) out) -> Tensor(a!)
+- func: quantized_decomposed::dequantize_per_channel.out(Tensor input, Tensor scales, Tensor zero_points, int axis, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
   kernels:
     - arg_meta: null

diff --git a/kernels/quantized/test/op_add_test.cpp b/kernels/quantized/test/op_add_test.cpp
@@ -20,7 +20,6 @@
 
 using namespace ::testing;
 using exec_aten::ArrayRef;
-using exec_aten::optional;
 using exec_aten::RuntimeContext;
 using exec_aten::Scalar;
 using exec_aten::ScalarType;
@@ -191,8 +190,6 @@ TEST(OpQuantizeAddTest, ConsitencyWithReferencePattern) {
   Tensor qinput2 = tfo.zeros({3, 5});
   Tensor qoutput = tfo.zeros({3, 5});
 
-  optional<ScalarType> out_dtype = optional<ScalarType>();
-
   RuntimeContext context{};
   // q -> qadd -> dq
   // 3.5 / 0.5 + 1 = 8
@@ -238,7 +235,6 @@ TEST(OpQuantizeAddTest, ConsitencyWithReferencePattern) {
       quant_min,
       quant_max,
       ScalarType::Byte,
-      out_dtype,
       reference_op_output);
 
   // now get results for q -> dq -> fp add -> q -> dq
@@ -249,7 +245,6 @@ TEST(OpQuantizeAddTest, ConsitencyWithReferencePattern) {
       quant_min,
       quant_max,
       ScalarType::Byte,
-      out_dtype,
       dq_input1);
 
   dequantize_per_tensor_out(
@@ -259,7 +254,6 @@ TEST(OpQuantizeAddTest, ConsitencyWithReferencePattern) {
       quant_min,
       quant_max,
       ScalarType::Byte,
-      out_dtype,
       dq_input2);
 
   add_out(context, dq_input1, dq_input2, 1.0, fp_output);
@@ -280,7 +274,6 @@ TEST(OpQuantizeAddTest, ConsitencyWithReferencePattern) {
       quant_min,
       quant_max,
       ScalarType::Byte,
-      out_dtype,
       reference_pattern_output);
 
   Tensor expected = tf.full({3, 5}, 7.0);

diff --git a/kernels/quantized/test/op_dequantize_test.cpp b/kernels/quantized/test/op_dequantize_test.cpp
@@ -18,7 +18,6 @@
 
 using namespace ::testing;
 using exec_aten::ArrayRef;
-using exec_aten::optional;
 using exec_aten::Scalar;
 using exec_aten::ScalarType;
 using exec_aten::Tensor;
@@ -44,14 +43,7 @@ void test_dtype() {
   // (100 - 30) * 0.5
   Tensor expected = tfo.full({3, 5}, 35);
   dequantize_per_tensor_out(
-      input,
-      scale,
-      zero_point,
-      quant_min,
-      quant_max,
-      DTYPE,
-      optional<ScalarType>(),
-      out);
+      input, scale, zero_point, quant_min, quant_max, DTYPE, out);
 
   EXPECT_TENSOR_EQ(out, expected);
 }
@@ -74,14 +66,7 @@ TEST(OpDequantizeOutTest, NonWholeNumbers) {
   // (100 - 30) * 0.5
   Tensor expected = tfo.full({3, 5}, 31.5);
   dequantize_per_tensor_out(
-      input,
-      scale,
-      zero_point,
-      quant_min,
-      quant_max,
-      ScalarType::Byte,
-      optional<ScalarType>(),
-      out);
+      input, scale, zero_point, quant_min, quant_max, ScalarType::Byte, out);
 
   EXPECT_TENSOR_EQ(out, expected);
 }
@@ -102,14 +87,7 @@ TEST(OpDequantizeOutTest, TensorArgOverload) {
   // (100 - 30) * 0.5
   Tensor expected = tfo.full({3, 5}, 31.5);
   dequantize_per_tensor_tensor_args_out(
-      input,
-      scale,
-      zero_point,
-      quant_min,
-      quant_max,
-      ScalarType::Byte,
-      optional<ScalarType>(),
-      out);
+      input, scale, zero_point, quant_min, quant_max, ScalarType::Byte, out);
 
   EXPECT_TENSOR_EQ(out, expected);
 }
@@ -138,7 +116,6 @@ TEST(OpDequantizeOutTest, DequantizePerChannel) {
       quant_min,
       quant_max,
       ScalarType::Byte,
-      optional<ScalarType>(),
       out);
 
   EXPECT_TENSOR_EQ(out, expected);
@@ -159,7 +136,6 @@ TEST(OpDequantizeOutTest, DequantizePerChannel) {
       quant_min,
       quant_max,
       ScalarType::Byte,
-      optional<ScalarType>(),
       out);
 
   EXPECT_TENSOR_EQ(out, expected);

diff --git a/kernels/quantized/test/op_embedding_test.cpp b/kernels/quantized/test/op_embedding_test.cpp
@@ -20,7 +20,6 @@
 
 using namespace ::testing;
 using exec_aten::ArrayRef;
-using exec_aten::optional;
 using exec_aten::RuntimeContext;
 using exec_aten::Scalar;
 using exec_aten::ScalarType;
@@ -150,7 +149,6 @@ TEST(OpQuantizedEmbeddingTest, ConsitencyWithReferencePattern) {
       quant_min,
       quant_max,
       ScalarType::Byte,
-      optional<ScalarType>(),
       weight);
 
   embedding_out(