Enable qconv for quantization 2.0 export

ghstack-source-id: 3b60dda2f7fdcf0fbda54936efbb615dab5e212f Pull Request resolved: pytorch#104580
leslie-fang-intel · Jul 6, 2023 · dc45230 · dc45230
1 parent 6426889
commit dc45230
Show file tree

Hide file tree

Showing 5 changed files with 1,141 additions and 7 deletions.
diff --git a/aten/src/ATen/native/quantized/cpu/OnednnUtils.h b/aten/src/ATen/native/quantized/cpu/OnednnUtils.h
@@ -8,7 +8,18 @@
 #include <cpuinfo.h>
 
 #include <c10/util/CallOnce.h>
+#endif // #if AT_MKLDNN_ENABLED()
 
+enum PostOps {
+  NoPostOp,
+  Relu,
+  LeakyRelu,
+  Tanh,
+  Add,
+  AddRelu,
+};
+
+#if AT_MKLDNN_ENABLED()
 using PrimitiveCacheKey = std::tuple<
     double, // input_scale
     int64_t, // input_zero_point
@@ -107,13 +118,6 @@ struct DeconvPrimitiveCache : PrimitiveCache {
   }
 };
 
-enum PostOps {
-  NoPostOp,
-  Relu,
-  LeakyRelu,
-  Tanh,
-};
-
 struct PackedLinearWeightsOnednn : public LinearPackedParamsBase {
   PackedLinearWeightsOnednn(
       std::unique_ptr<ideep::tensor> weight,
@@ -379,4 +383,37 @@ static bool should_use_onednn_quant(
 
 } // onednn_utils
 
+at::Tensor _qconv_prepack_pt2e(
+    at::Tensor weight, // from CPU backend instead of QuantizedCPU
+    at::Tensor weight_scales, // Weight zero points must be 0 for onednn
+    torch::List<int64_t> input_shape,
+    double input_scale,
+    int64_t input_zero_point,
+    torch::List<int64_t> stride,
+    torch::List<int64_t> padding,
+    torch::List<int64_t> dilation,
+    int64_t groups);
+
+template <PostOps postOpFused>
+static at::Tensor _quantized_convolution_pt2e(
+    at::Tensor act, // contains quantized values but not QTensor
+    double act_scale,
+    int64_t act_zero_point,
+    at::Tensor weight, // MKLDNN tensor with quantized values
+    at::Tensor weight_scales,
+    at::Tensor weight_zero_points,
+    c10::optional<at::Tensor> bias, // Bias is packed if not None
+    torch::List<int64_t> stride,
+    torch::List<int64_t> padding,
+    torch::List<int64_t> dilation,
+    bool transposed,
+    int64_t groups,
+    double output_scale,
+    int64_t output_zero_point,
+    c10::optional<at::Tensor> accum=c10::nullopt, // accum to fused with conv add
+    double accum_scale=1.0,
+    int64_t accum_zero_point=0,
+    bool fp32_output=false,
+    const c10::optional<c10::ArrayRef<c10::IValue>>& post_op_args=c10::nullopt);
+
 #endif // #if AT_MKLDNN_ENABLED()