pytorch · chunyuan-w · Oct 30, 2023 · Oct 30, 2023 · desertfire · Oct 31, 2023
diff --git a/aten/src/ATen/native/quantized/cpu/OnednnUtils.h b/aten/src/ATen/native/quantized/cpu/OnednnUtils.h
@@ -316,13 +316,13 @@ namespace onednn_utils {
 
 static ideep::attr_t create_attr_by_post_op(
     const std::string& post_op_name,
-    const torch::List<double>& post_op_args) {
+    const torch::List<c10::optional<at::Scalar>>& post_op_args) {
   using ideep::tensor;
   PostOps post_op = POST_OP_TABLE[post_op_name];
   if (post_op == Relu) {
     return ideep::attr_t::fuse_relu();
   } else if (post_op == LeakyRelu) {
-    return ideep::attr_t::fuse_relu_v2(/*alpha=*/post_op_args[0]);
+    return ideep::attr_t::fuse_relu_v2(/*alpha=*/post_op_args[0].value().to<float>());
   } else if (post_op == Tanh) {
     return ideep::attr_t::fuse_tanh();
   }

diff --git a/aten/src/ATen/native/quantized/cpu/qlinear.cpp b/aten/src/ATen/native/quantized/cpu/qlinear.cpp
@@ -913,7 +913,7 @@ static at::Tensor linear_int8_with_onednn_weight(
     int64_t output_zero_point,
     bool fp32_output,
     std::string& post_op_name, // e.g. "none", "relu"
-    torch::List<double>& post_op_args,
+    torch::List<c10::optional<at::Scalar>>& post_op_args,
     std::string& post_op_algorithm) {
   using ideep::tensor;
   const int64_t dim = input.dim();
@@ -1119,7 +1119,7 @@ class QLinearOnednn final {
       int64_t output_zero_point,
       bool fp32_output,
       std::string post_op_name,
-      torch::List<double> post_op_args,
+      torch::List<c10::optional<at::Scalar>> post_op_args,
       std::string post_op_algorithm) {
 #if AT_MKLDNN_ENABLED()
     return linear_int8_with_onednn_weight(

diff --git a/aten/src/ATen/native/quantized/library.cpp b/aten/src/ATen/native/quantized/library.cpp
@@ -269,6 +269,6 @@ TORCH_LIBRARY(onednn, m) {
   m.def(TORCH_SELECTIVE_SCHEMA("onednn::qlinear_prepack(Tensor weight, int[]? x_shape) -> Tensor"));
 
   // Linear with unary postop
-  m.def(TORCH_SELECTIVE_SCHEMA("onednn::qlinear_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, float output_scale, int output_zero_point, bool fp32_output, str post_op_name, float[] post_op_args, str post_op_algorithm) -> Tensor"));
+  m.def(TORCH_SELECTIVE_SCHEMA("onednn::qlinear_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, float output_scale, int output_zero_point, bool fp32_output, str post_op_name, Scalar?[] post_op_args, str post_op_algorithm) -> Tensor"));
 
 }
diff --git a/test/forward_backward_compatibility/check_forward_backward_compatibility.py b/test/forward_backward_compatibility/check_forward_backward_compatibility.py
@@ -312,6 +312,7 @@
     ("aten::batch_norm_backward_elemt.out", datetime.date(2023, 12, 31)),
     ("aten::batch_norm_backward_elemt", datetime.date(2023, 12, 31)),
     ("aten::sym_constrain_range", datetime.date(2023, 12, 31)),
+    ("onednn::qlinear_pointwise", datetime.date(2023, 12, 31)),
 ]
 
 ALLOW_LIST_COMPILED = [

diff --git a/test/inductor/test_cpp_wrapper.py b/test/inductor/test_cpp_wrapper.py
@@ -223,6 +223,24 @@ class BaseTest(NamedTuple):
             test_mkldnn_pattern_matcher.TestPatternMatcher(),
             condition=torch.backends.mkldnn.is_available(),
         ),
+        BaseTest(
+            "test_qlinear",
+            "cpu",
+            test_mkldnn_pattern_matcher.TestPatternMatcher(),
+            condition=torch.backends.mkldnn.is_available(),
+        ),
+        BaseTest(
+            "test_qlinear_relu",
+            "cpu",
+            test_mkldnn_pattern_matcher.TestPatternMatcher(),
+            condition=torch.backends.mkldnn.is_available(),
+        ),
+        BaseTest(
+            "test_qlinear_dequant_promotion",
+            "cpu",
+            test_mkldnn_pattern_matcher.TestPatternMatcher(),
+            condition=torch.backends.mkldnn.is_available(),
+        ),
         BaseTest("test_randint"),
         BaseTest("test_randn_with_dtype_and_device"),
         BaseTest("test_reduction1"),  # Reduction

diff --git a/test/inductor/test_mkldnn_pattern_matcher.py b/test/inductor/test_mkldnn_pattern_matcher.py
@@ -692,7 +692,7 @@ def forward(self, x):
     @skipIfNoDynamoSupport
     @skipIfNoONEDNN
     @skipIfRocm
-    def test_qlinear(self):
+    def test_qlinear_cpu(self):
         r"""
         This testcase will quantize a single Linear Moduel.
         """
@@ -731,7 +731,7 @@ def forward(self, x):
     @skipIfNoDynamoSupport
     @skipIfNoONEDNN
     @skipIfRocm
-    def test_qlinear_relu(self):
+    def test_qlinear_relu_cpu(self):
         r"""
         This testcase will quantize a Linear->ReLU pattern.
         """
@@ -771,7 +771,7 @@ def forward(self, x):
     @skipIfNoDynamoSupport
     @skipIfNoONEDNN
     @skipIfRocm
-    def test_qlinear_dequant_promotion(self):
+    def test_qlinear_dequant_promotion_cpu(self):
         r"""
         This testcase test if dequant node before linear is promoted correctly:
                   X

diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py
@@ -5761,7 +5761,30 @@ def __init__(
               fp32_output, unary_attr, unary_scalars, unary_algorithm]
         """
         self.has_bias = len(inputs) == 5
-        super().__init__(layout, inputs, constant_args)
+        super().__init__(
+            layout,
+            inputs,
+            constant_args,
+            None,
+            kernel="torch.ops.onednn.qlinear_pointwise",
+            cpp_kernel="onednn::qlinear_pointwise",
+        )
+        self.cpp_kernel_key = "qlinear_pointwise"
+        self.cpp_op_schema = """
+            at::Tensor(
+                at::Tensor act,
+                double act_scale,
+                int64_t act_zero_point,
+                at::Tensor weight,
+                at::Tensor weight_scales,
+                at::Tensor weight_zero_points,
+                c10::optional<at::Tensor> bias,
+                double inv_output_scale,
+                int64_t output_zero_point,
+                bool fp32_output,
+                std::string post_op_name,
+                torch::List<c10::optional<at::Scalar>> post_op_args,
+                std::string post_op_algorithm)"""
 
     def codegen(self, wrapper):
         # Parser the inputs and constant
@@ -5784,23 +5807,28 @@ def codegen(self, wrapper):
             unary_algorithm,
         ) = const_args[-8:]
 
-        self.kernel = "torch.ops.onednn.qlinear_pointwise"
         codegen_args = (
-            f"{x}"
-            + f", {x_scale}"
-            + f", {x_zp}"
-            + f", {packed_weight}"
-            + f", {w_scale}"
-            + f", {w_zp}"
-            + f", {bias}"
-            + f", {o_inv_scale}"
-            + f", {o_zp}"
-            + f", {fp32_output}"
-            + f", {unary_attr}"
-            + f", {unary_scalars}"
-            + f", {unary_algorithm}"
-        )
-        wrapper.writeline(f"{self.get_name()} = {self.kernel}({codegen_args})")
+            x,
+            x_scale,
+            x_zp,
+            packed_weight,
+            w_scale,
+            w_zp,
+            bias,
+            o_inv_scale,
+            o_zp,
+            fp32_output,
+            unary_attr,
+            unary_scalars,
+            unary_algorithm,
+        )
+        wrapper.generate_extern_kernel_alloc_and_find_schema_if_needed(
+            self.get_name(),
+            self.kernel,
+            codegen_args,
+            self.cpp_op_schema,
+            self.cpp_kernel_key,
+        )
         if isinstance(self.layout, Layout):
             self.codegen_size_asserts(wrapper)
 
@@ -5838,7 +5866,7 @@ def create(
             output_zero_point,
             fp32_output,
             unary_attr,
-            unary_scalars,
+            may_convert_to_optional(unary_scalars),
             unary_algorithm,
         ]