diff --git a/aten/src/ATen/native/quantized/cpu/OnednnUtils.h b/aten/src/ATen/native/quantized/cpu/OnednnUtils.h index 8744ea9a2bab6..8887bb83deb91 100644 --- a/aten/src/ATen/native/quantized/cpu/OnednnUtils.h +++ b/aten/src/ATen/native/quantized/cpu/OnednnUtils.h @@ -485,7 +485,7 @@ static at::Tensor _quantized_convolution_onednn( torch::List dilation, bool transposed, int64_t groups, - double inv_output_scale, + double output_scale, int64_t output_zero_point, c10::optional accum=c10::nullopt, // accum to fused with conv add double accum_scale=1.0, diff --git a/aten/src/ATen/native/quantized/cpu/qconv.cpp b/aten/src/ATen/native/quantized/cpu/qconv.cpp index 374d1f62de615..f915c014af143 100644 --- a/aten/src/ATen/native/quantized/cpu/qconv.cpp +++ b/aten/src/ATen/native/quantized/cpu/qconv.cpp @@ -1397,7 +1397,7 @@ static at::Tensor _quantized_convolution_onednn( torch::List dilation, bool transposed, int64_t groups, - double inv_output_scale, // inv_output_scale is the reciprocal of scale in fake quant + double output_scale, int64_t output_zero_point, c10::optional accum, // accum to fused with conv add double accum_scale, @@ -1420,10 +1420,10 @@ static at::Tensor _quantized_convolution_onednn( bool bfloat16_output = output_dtype.has_value() && (output_dtype.value() == c10::kBFloat16); if (fp32_output || bfloat16_output) { // When fp32 or bf16 output, oneDNN expects op_attr doesn't set_scales and set_zero_points. - // So, we will use default inv_output_scale as 1.0 and output_zero_point as 0, since - // when inv_output_scale is 1.0, we will skip invoking of op_attr.set_scales in ideep; + // So, we will use default output_scale as 1.0 and output_zero_point as 0, since + // when output_scale is 1.0, we will skip invoking of op_attr.set_scales in ideep; // when output_zero_point is 0, we will skip invoking of op_attr.set_zero_points in ideep. - TORCH_CHECK(inv_output_scale == 1.0, " (ONEDNN): fp32 or bf16 output, inv_output_scale must be 1.0."); + TORCH_CHECK(output_scale == 1.0, " (ONEDNN): fp32 or bf16 output, output_scale must be 1.0."); TORCH_CHECK(output_zero_point == 0, " (ONEDNN): fp32 or bf16 output, output_zero_point must be 0"); } @@ -1634,7 +1634,7 @@ static at::Tensor _quantized_convolution_onednn( int oc_per_group = packed_weight.get_dim(0) / groups; int wei_scale_mask = ideep::utils::conv_weight_scale_mask(weight_scales.numel(), oc_per_group, groups, false); op_attr.set_scales_mask(DNNL_ARG_WEIGHTS, wei_scale_mask); - if (inv_output_scale != 1.0f) { + if (output_scale != 1.0f) { op_attr.set_scales_mask(DNNL_ARG_DST, 0); } if (output_zero_point != 0) { @@ -1671,13 +1671,13 @@ static at::Tensor _quantized_convolution_onednn( } tensor src_scales_t = tensor(ideep::scale_t(1, act_scale)); tensor wei_scales_t = tensor(weights_scales); - tensor dst_scales_t = tensor(ideep::scale_t(1, inv_output_scale)); + tensor dst_scales_t = tensor(ideep::scale_t(1, output_scale)); tensor src_zp_t = tensor(ideep::zero_point_t(1, act_zero_point)); tensor dst_zp_t = tensor(ideep::zero_point_t(1, output_zero_point)); if (act_scale != 1.0f) { args.insert({DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC, src_scales_t}); } - if (inv_output_scale != 1.0f) { + if (output_scale != 1.0f) { args.insert({DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST, dst_scales_t}); } args.insert({DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS, wei_scales_t}); @@ -1697,7 +1697,7 @@ static at::Tensor _quantized_convolution_onednn( const ideep::scale_t accum_ideep_scale = ideep::scale_t(1, 1.0/accum_scale); const ideep::zero_point_t accum_ideep_zero_points = ideep::zero_point_t(1, accum_zero_point); // Set the dst scale and zero point with the value of accum. - // The true scale and zero point is stored in ideep::scale_t(scale_size, inv_output_scale) and dst_zero_points. + // The true scale and zero point is stored in ideep::scale_t(scale_size, output_scale) and dst_zero_points. dst.set_scale(accum_ideep_scale); dst.set_zero_point(accum_ideep_zero_points); } @@ -1707,7 +1707,7 @@ static at::Tensor _quantized_convolution_onednn( ideep::convolution_forward::prepare( params, src, packed_weight, expected_bias, dst_dims, dst, stride.vec(), dilation.vec(), padding.vec(), padding.vec(), groups, - src_scales, weights_scales, ideep::scale_t(1, 1.0f / inv_output_scale), + src_scales, weights_scales, ideep::scale_t(1, 1.0f / output_scale), src_zero_points, dst_zero_points, op_attr, dnnl::algorithm::convolution_direct, dnnl::prop_kind::forward_inference, @@ -1872,7 +1872,7 @@ class QConvoneDNN final { torch::List padding, torch::List dilation, int64_t groups, - double inv_output_scale, // inv_output_scale is the reciprocal of scale in fake quant + double output_scale, int64_t output_zero_point, c10::optional output_dtype, c10::string_view attr, @@ -1900,7 +1900,7 @@ class QConvoneDNN final { act, act_scale, act_zero_point, weight, weight_scales, weight_zero_points, bias, stride, padding, dilation, /*transposed*/false, - groups, inv_output_scale, output_zero_point, + groups, output_scale, output_zero_point, /*accum*/c10::nullopt, /*accum_scale*/0.0, /*accum_zero_point*/0, /*output_dtype*/output_dtype, /*binary_attr*/c10::nullopt, /*binary_alpha*/c10::nullopt, /*unary_attr*/attr, /*unary_scalars*/scalars, /*unary_algorithm*/algorithm @@ -1924,7 +1924,7 @@ class QConvoneDNN final { torch::List padding, torch::List dilation, int64_t groups, - double inv_output_scale, // inv_output_scale is the reciprocal of scale in fake quant + double output_scale, int64_t output_zero_point, c10::optional output_dtype, c10::string_view binary_attr, @@ -1952,7 +1952,7 @@ class QConvoneDNN final { act, act_scale, act_zero_point, weight, weight_scales, weight_zero_points, bias, stride, padding, dilation, /*transposed*/false, - groups, inv_output_scale, output_zero_point, + groups, output_scale, output_zero_point, accum, accum_scale, accum_zero_point, /*output_dtype*/output_dtype, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm diff --git a/aten/src/ATen/native/quantized/library.cpp b/aten/src/ATen/native/quantized/library.cpp index 1e7ff27efec98..97de2cfbf078a 100644 --- a/aten/src/ATen/native/quantized/library.cpp +++ b/aten/src/ATen/native/quantized/library.cpp @@ -257,12 +257,12 @@ TORCH_LIBRARY(onednn, m) { m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv_prepack(Tensor weight, Tensor w_scales, float x_scale, int x_zp, int[] stride, int[] padding, int[] dilation, int groups, int[]? x_shape=None) -> Tensor")); // Conv1D/2D/3D with unary postop - m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv1d_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float inv_output_scale, int output_zero_point, ScalarType? output_dtype, str attr, Scalar?[] scalars, str? algorithm) -> Tensor")); - m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv2d_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float inv_output_scale, int output_zero_point, ScalarType? output_dtype, str attr, Scalar?[] scalars, str? algorithm) -> Tensor")); - m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv3d_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float inv_output_scale, int output_zero_point, ScalarType? output_dtype, str attr, Scalar?[] scalars, str? algorithm) -> Tensor")); + m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv1d_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float output_scale, int output_zero_point, ScalarType? output_dtype, str attr, Scalar?[] scalars, str? algorithm) -> Tensor")); + m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv2d_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float output_scale, int output_zero_point, ScalarType? output_dtype, str attr, Scalar?[] scalars, str? algorithm) -> Tensor")); + m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv3d_pointwise(Tensor qx, float x_scale, int x_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float output_scale, int output_zero_point, ScalarType? output_dtype, str attr, Scalar?[] scalars, str? algorithm) -> Tensor")); // Conv2D with binary postop - m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv2d_pointwise.binary(Tensor qx, float x_scale, int x_zero_point, Tensor qaccum, float accum_scale, int accum_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float inv_output_scale, int output_zero_point, ScalarType? output_dtype, str binary_attr, Scalar? alpha, str? unary_attr, Scalar?[] unary_scalars, str? unary_algorithm) -> Tensor")); + m.def(TORCH_SELECTIVE_SCHEMA("onednn::qconv2d_pointwise.binary(Tensor qx, float x_scale, int x_zero_point, Tensor qaccum, float accum_scale, int accum_zero_point, Tensor qw, Tensor w_scale, Tensor w_zero_point, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups, float output_scale, int output_zero_point, ScalarType? output_dtype, str binary_attr, Scalar? alpha, str? unary_attr, Scalar?[] unary_scalars, str? unary_algorithm) -> Tensor")); // Linear prepack m.def(TORCH_SELECTIVE_SCHEMA("onednn::qlinear_prepack(Tensor weight, int[]? x_shape) -> Tensor")); diff --git a/torch/_inductor/fx_passes/quantization.py b/torch/_inductor/fx_passes/quantization.py index ff319cdaa5c79..0f6e4b5a59780 100644 --- a/torch/_inductor/fx_passes/quantization.py +++ b/torch/_inductor/fx_passes/quantization.py @@ -174,7 +174,7 @@ def get_dequantize_qconv_pt2e_pattern(users=1): KeywordArg("padding"), KeywordArg("dilation"), KeywordArg("groups"), - KeywordArg("inv_output_scale"), # inv_output_scale = 1.0 + KeywordArg("output_scale"), # output_scale = 1.0 KeywordArg("output_zero_point"), # output_zero_point = 0 KeywordArg("output_dtype"), # output_dtype = None KeywordArg("attr"), # attr = "none" @@ -1509,7 +1509,7 @@ def qconv_weight_prepack(match: Match, *args, **kwargs): padding, dilation, groups, - 1.0, # inv_output_scale + 1.0, # output_scale 0, # output_zero_point dtype, # output_dtype "none", # attr diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py index 64a83e31e199e..aa059955971ee 100644 --- a/torch/_inductor/ir.py +++ b/torch/_inductor/ir.py @@ -6634,7 +6634,7 @@ def __init__( torch::List padding, torch::List dilation, int64_t groups, - double inv_output_scale, + double output_scale, int64_t output_zero_point, c10::optional output_dtype, c10::string_view attr, @@ -6810,7 +6810,7 @@ def __init__( torch::List padding, torch::List dilation, int64_t groups, - double inv_output_scale, + double output_scale, int64_t output_zero_point, c10::optional output_dtype, c10::string_view binary_attr, @@ -7026,7 +7026,7 @@ def __init__( at::Tensor weight_scales, at::Tensor weight_zero_points, c10::optional bias, - double inv_output_scale, + double output_scale, int64_t output_zero_point, c10::optional output_dtype, c10::string_view post_op_name,