From 41cff0f8502183072d1bdabff2a87386f2a384d5 Mon Sep 17 00:00:00 2001 From: leslie-fang-intel Date: Tue, 16 Apr 2024 19:12:57 -0700 Subject: [PATCH] [Inductor][Quant] Change the QConv output scale name [ghstack-poisoned] --- .../ATen/native/quantized/cpu/OnednnUtils.h | 2 +- aten/src/ATen/native/quantized/cpu/qconv.cpp | 20 +++++++++---------- aten/src/ATen/native/quantized/library.cpp | 8 ++++---- torch/_inductor/fx_passes/quantization.py | 4 ++-- torch/_inductor/ir.py | 6 +++--- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/aten/src/ATen/native/quantized/cpu/OnednnUtils.h b/aten/src/ATen/native/quantized/cpu/OnednnUtils.h index 8744ea9a2bab..8887bb83deb9 100644 --- a/aten/src/ATen/native/quantized/cpu/OnednnUtils.h +++ b/aten/src/ATen/native/quantized/cpu/OnednnUtils.h @@ -485,7 +485,7 @@ static at::Tensor _quantized_convolution_onednn( torch::List dilation, bool transposed, int64_t groups, - double inv_output_scale, + double output_scale, int64_t output_zero_point, c10::optional accum=c10::nullopt, // accum to fused with conv add double accum_scale=1.0, diff --git a/aten/src/ATen/native/quantized/cpu/qconv.cpp b/aten/src/ATen/native/quantized/cpu/qconv.cpp index 47a1bb59f177..d9545747e0fb 100644 --- a/aten/src/ATen/native/quantized/cpu/qconv.cpp +++ b/aten/src/ATen/native/quantized/cpu/qconv.cpp @@ -1397,7 +1397,7 @@ static at::Tensor _quantized_convolution_onednn( torch::List dilation, bool transposed, int64_t groups, - double inv_output_scale, // inv_output_scale is the reciprocal of scale in fake quant + double output_scale, int64_t output_zero_point, c10::optional accum, // accum to fused with conv add double accum_scale, @@ -1420,10 +1420,10 @@ static at::Tensor _quantized_convolution_onednn( bool bfloat16_output = output_dtype.has_value() && (output_dtype.value() == c10::kBFloat16); if (fp32_output || bfloat16_output) { // When fp32 or bf16 output, oneDNN expects op_attr doesn't set_scales and set_zero_points. - // So, we will use default inv_output_scale as 1.0 and output_zero_point as 0, since - // when inv_output_scale is 1.0, we will skip invoking of op_attr.set_scales in ideep; + // So, we will use default output_scale as 1.0 and output_zero_point as 0, since + // when output_scale is 1.0, we will skip invoking of op_attr.set_scales in ideep; // when output_zero_point is 0, we will skip invoking of op_attr.set_zero_points in ideep. - TORCH_CHECK(inv_output_scale == 1.0, " (ONEDNN): fp32 or bf16 output, inv_output_scale must be 1.0."); + TORCH_CHECK(output_scale == 1.0, " (ONEDNN): fp32 or bf16 output, output_scale must be 1.0."); TORCH_CHECK(output_zero_point == 0, " (ONEDNN): fp32 or bf16 output, output_zero_point must be 0"); } @@ -1649,7 +1649,7 @@ static at::Tensor _quantized_convolution_onednn( const ideep::scale_t accum_ideep_scale = ideep::scale_t(1, 1.0/accum_scale); const ideep::zero_point_t accum_ideep_zero_points = ideep::zero_point_t(1, accum_zero_point); // Set the dst scale and zero point with the value of accum. - // The true scale and zero point is stored in ideep::scale_t(scale_size, inv_output_scale) and dst_zero_points. + // The true scale and zero point is stored in ideep::scale_t(scale_size, output_scale) and dst_zero_points. dst.set_scale(accum_ideep_scale); dst.set_zero_point(accum_ideep_zero_points); } @@ -1659,7 +1659,7 @@ static at::Tensor _quantized_convolution_onednn( ideep::convolution_forward::prepare( params, src, packed_weight, expected_bias, dst_dims, dst, stride.vec(), dilation.vec(), padding.vec(), padding.vec(), groups, - src_scales, weights_scales, ideep::scale_t(1, 1.0f / inv_output_scale), + src_scales, weights_scales, ideep::scale_t(1, 1.0f / output_scale), src_zero_points, dst_zero_points, op_attr, dnnl::algorithm::convolution_direct, dnnl::prop_kind::forward_inference, @@ -1823,7 +1823,7 @@ class QConvoneDNN final { torch::List padding, torch::List dilation, int64_t groups, - double inv_output_scale, // inv_output_scale is the reciprocal of scale in fake quant + double output_scale, int64_t output_zero_point, c10::optional output_dtype, c10::string_view attr, @@ -1851,7 +1851,7 @@ class QConvoneDNN final { act, act_scale, act_zero_point, weight, weight_scales, weight_zero_points, bias, stride, padding, dilation, /*transposed*/false, - groups, inv_output_scale, output_zero_point, + groups, output_scale, output_zero_point, /*accum*/c10::nullopt, /*accum_scale*/0.0, /*accum_zero_point*/0, /*output_dtype*/output_dtype, /*binary_attr*/c10::nullopt, /*binary_alpha*/c10::nullopt, /*unary_attr*/attr, /*unary_scalars*/scalars, /*unary_algorithm*/algorithm @@ -1875,7 +1875,7 @@ class QConvoneDNN final { torch::List padding, torch::List dilation, int64_t groups, - double inv_output_scale, // inv_output_scale is the reciprocal of scale in fake quant + double output_scale, int64_t output_zero_point, c10::optional output_dtype, c10::string_view binary_attr, @@ -1903,7 +1903,7 @@ class QConvoneDNN final { act, act_scale, act_zero_point, weight, weight_scales, weight_zero_points, bias, stride, padding, dilation, /*transposed*/false, - groups, inv_output_scale, output_zero_point, + groups, output_scale, output_zero_point, accum, accum_scale, accum_zero_point, /*output_dtype*/output_dtype, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm diff --git a/aten/src/ATen/native/quantized/library.cpp b/aten/src/ATen/native/quantized/library.cpp index 1e7ff27efec9..97de2cfbf078 100644 --- a/aten/src/ATen/native/quantized/library.cpp +++ b/aten/src/ATen/native/quantized/library.cpp @@ -257,12 +257,12 @@ TORCH_LIBRARY(onednn, m) { m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv_prepack(Tensor weight, Tensor w_scales, float x_scale, int x_zp, int[] stride, int[] padding, int[] dilation, int groups, int[]? x_shape=None) -> Tensor")); // Conv1D/2D/3D with unary postop - m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv1d_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float inv_output_scale, int output_zero_point, ScalarType? output_dtype, str attr, Scalar?[] scalars, str? algorithm) -> Tensor")); - m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv2d_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float inv_output_scale, int output_zero_point, ScalarType? output_dtype, str attr, Scalar?[] scalars, str? algorithm) -> Tensor")); - m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv3d_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float inv_output_scale, int output_zero_point, ScalarType? output_dtype, str attr, Scalar?[] scalars, str? algorithm) -> Tensor")); + m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv1d_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float output_scale, int output_zero_point, ScalarType? output_dtype, str attr, Scalar?[] scalars, str? algorithm) -> Tensor")); + m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv2d_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float output_scale, int output_zero_point, ScalarType? output_dtype, str attr, Scalar?[] scalars, str? algorithm) -> Tensor")); + m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv3d_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float output_scale, int output_zero_point, ScalarType? output_dtype, str attr, Scalar?[] scalars, str? algorithm) -> Tensor")); // Conv2D with binary postop - m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv2d_pointwise.binary(Tensor qx, float x_scale, int x_zero_point, Tensor qaccum, float accum_scale, int accum_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float inv_output_scale, int output_zero_point, ScalarType? output_dtype, str binary_attr, Scalar? alpha, str? unary_attr, Scalar?[] unary_scalars, str? unary_algorithm) -> Tensor")); + m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv2d_pointwise.binary(Tensor qx, float x_scale, int x_zero_point, Tensor qaccum, float accum_scale, int accum_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float output_scale, int output_zero_point, ScalarType? output_dtype, str binary_attr, Scalar? alpha, str? unary_attr, Scalar?[] unary_scalars, str? unary_algorithm) -> Tensor")); // Linear prepack m.def(TORCH_SELECTIVE_SCHEMA("onednn::qlinear_prepack(Tensor weight, int[]? x_shape) -> Tensor")); diff --git a/torch/_inductor/fx_passes/quantization.py b/torch/_inductor/fx_passes/quantization.py index 14c98d3366ac..08c0f982f9aa 100644 --- a/torch/_inductor/fx_passes/quantization.py +++ b/torch/_inductor/fx_passes/quantization.py @@ -161,7 +161,7 @@ def get_dequantize_qconv_pt2e_pattern(users=1): KeywordArg("padding"), KeywordArg("dilation"), KeywordArg("groups"), - KeywordArg("inv_output_scale"), # inv_output_scale = 1.0 + KeywordArg("output_scale"), # output_scale = 1.0 KeywordArg("output_zero_point"), # output_zero_point = 0 KeywordArg("output_dtype"), # output_dtype = None KeywordArg("attr"), # attr = "none" @@ -1496,7 +1496,7 @@ def qconv_weight_prepack(match: Match, *args, **kwargs): padding, dilation, groups, - 1.0, # inv_output_scale + 1.0, # output_scale 0, # output_zero_point dtype, # output_dtype "none", # attr diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py index 860b71c5459e..eb36065cfe0c 100644 --- a/torch/_inductor/ir.py +++ b/torch/_inductor/ir.py @@ -6424,7 +6424,7 @@ def __init__( torch::List padding, torch::List dilation, int64_t groups, - double inv_output_scale, + double output_scale, int64_t output_zero_point, c10::optional output_dtype, c10::string_view attr, @@ -6600,7 +6600,7 @@ def __init__( torch::List padding, torch::List dilation, int64_t groups, - double inv_output_scale, + double output_scale, int64_t output_zero_point, c10::optional output_dtype, c10::string_view binary_attr, @@ -6816,7 +6816,7 @@ def __init__( at::Tensor weight_scales, at::Tensor weight_zero_points, c10::optional bias, - double inv_output_scale, + double output_scale, int64_t output_zero_point, c10::optional output_dtype, c10::string_view post_op_name,