Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enabled per channel quantized static linear/conv #37622

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
979fb96
Enabled per channel quantized static linear/conv
kimishpatel Apr 30, 2020
b257fe6
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 1, 2020
c69e8e6
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 1, 2020
1233eee
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 1, 2020
270a9d2
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 4, 2020
90127d8
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 5, 2020
a6b5c5f
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 5, 2020
4adaa3e
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 5, 2020
7811cc0
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 6, 2020
cabc5db
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 7, 2020
f78692d
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 7, 2020
b264c8c
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 7, 2020
65237d1
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 9, 2020
3df561f
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 11, 2020
a7831ae
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 12, 2020
d6e5509
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 12, 2020
4310888
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 12, 2020
776f69c
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 14, 2020
64609df
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 15, 2020
1236628
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 15, 2020
786e626
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 15, 2020
0c3b806
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 19, 2020
25acda6
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 19, 2020
7ff077c
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 20, 2020
8725f83
Update on "Enabled per channel quantized static linear/conv"
kimishpatel May 20, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 14 additions & 2 deletions aten/src/ATen/native/quantized/cpu/qconv.cpp
Expand Up @@ -537,9 +537,21 @@ at::Tensor PackedConvWeightsQnnp<kSpatialDim>::apply_impl(
for (int i = 0; i < wt_numel; ++i) {
qnnp_w_data[i] = static_cast<c10::quint8>(w_data[i] + 128);
}
at::Tensor qbias;
// Original bias was float, so we requantize it here.
auto qbias = at::quantize_per_tensor(
bias_fp32, weight_scales_data[0] * act_input_scale, 0, c10::kQInt32);
if (is_per_channel) {
at::Tensor bias_quant_scales =
weight_contig.q_per_channel_scales() * act_input_scale;
at::Tensor bias_zp = at::zeros(bias_quant_scales.sizes(), c10::kInt);
qbias = at::native::quantize_per_channel_cpu(
bias_fp32, bias_quant_scales, bias_zp, 0, c10::kQInt32);
} else {
qbias = at::native::quantize_per_tensor(
bias_fp32,
weight_contig.q_scale() * act_input_scale,
0,
c10::kQInt32);
}

conv_p = qnnpack::conv_param_t(
{kernel_w, kernel_h},
Expand Down
4 changes: 0 additions & 4 deletions aten/src/ATen/native/quantized/cpu/qconv_prepack.cpp
Expand Up @@ -168,10 +168,6 @@ c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> PackedConvWeightsQnnp<kSpa
weight.ndimension() == 4,
"quantized::conv2d_prepack (qnnpack): Weights are expected to have 4 "
"dimensions");
TORCH_CHECK(
weight.qscheme() == c10::kPerTensorAffine,
"quantized::conv2d_prepack (qnnpack): only supports Per Tensor "
"Quantization Scheme")
TORCH_CHECK(
stride.size() == 2,
"quantized::conv2d_prepack (qnnpack): 2D convolution only");
Expand Down
19 changes: 15 additions & 4 deletions aten/src/ATen/native/quantized/cpu/qlinear.cpp
Expand Up @@ -266,17 +266,28 @@ at::Tensor PackedLinearWeightsQnnp::apply_impl(
qnnp_w_data[i] = static_cast<c10::quint8>(w_data[i] + 128);
}
// Original bias was float, so we requantize it here.
auto qbias = at::quantize_per_tensor(
bias_fp32, weight_scales_data[0] * input_scale, 0, c10::kQInt32);
const bool is_per_channel = orig_weight.qscheme() == at::kPerChannelAffine;
at::Tensor qbias;
// Original bias was float, so we requantize it here.
if (is_per_channel) {
at::Tensor bias_quant_scales =
weight_contig.q_per_channel_scales() * input_scale;
at::Tensor bias_zp = at::zeros(bias_quant_scales.sizes(), c10::kInt);
qbias = at::native::quantize_per_channel_cpu(
bias_fp32, bias_quant_scales, bias_zp, 0, c10::kQInt32);
} else {
qbias = at::native::quantize_per_tensor(
bias_fp32, weight_contig.q_scale() * input_scale, 0, c10::kQInt32);
}

// Update the input scale to not pack again.
this->input_scale = input_scale;
w.reset();
w = std::make_unique<qnnpack::PackBMatrix>(
cols_w /* input_channels */,
rows_w /* output_channels */,
weight_zp_data[0],
requantization_scale.data()[0],
weight_zp_data,
requantization_scale.data(),
reinterpret_cast<uint8_t*>(qnnp_w_data),
reinterpret_cast<int32_t*>(qbias.data_ptr<c10::qint32>()));
packB = w.get();
Expand Down
5 changes: 5 additions & 0 deletions aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp
Expand Up @@ -280,6 +280,11 @@ at::Tensor PackedLinearWeightsQnnp::apply_dynamic_impl(at::Tensor input) {
(uint8_t*)qnnp_w_data,
nullptr);
packB = w.get();
// Need to move the check here since we are releasing the weights.
TORCH_CHECK(
orig_weight.qscheme() == at::kPerTensorAffine,
"quantized::linear_dynamic (qnnpack) only supports "
"Per Tensor Quantization Scheme");
Comment on lines +281 to +285
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't get this. Can the scheme change between when you call prepack and here? Also, I think I understand "move" in the context of this diff, but I don't think someone looking at the new version of the code in the future will understand what's moving here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will be removed by a later PR in the stack. I just split the PRs for ease of review and ease of dissecting any future failures. I do understand though it is a cryptic comment.
Regarding qscheme changing. I don't think this is possible. Do you have a reason to believe that it can?

if (at::globalContext().releaseWeightsWhenPrepacking()) {
// On mobile, we release the original weight by resetting the intrusive_ptr.
// Calling unpack after this will throw an assertion.
Expand Down
3 changes: 0 additions & 3 deletions aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp
Expand Up @@ -119,9 +119,6 @@ c10::intrusive_ptr<LinearPackedParamsBase> PackedLinearWeightsQnnp::prepack(
TORCH_CHECK(
weight.dim() == 2,
"quantized::linear_prepack (qnnpack): Weight tensor rank should be == 2");
TORCH_CHECK(
weight.qscheme() == c10::kPerTensorAffine,
"quantized::linear_prepack (qnnpack) only supports Per Tensor Quantization Scheme")

int64_t rows_w = weight.size(0);
at::Tensor bias_fp32;
Expand Down
1 change: 0 additions & 1 deletion test/quantization/test_quantized_functional.py
Expand Up @@ -101,7 +101,6 @@ def test_conv2d_api(
if qengine == 'qnnpack':
if IS_PPC or TEST_WITH_UBSAN:
return
use_channelwise = False

input_feature_map_size = (H, W)
kernel_size = (kernel_h, kernel_w)
Expand Down
2 changes: 0 additions & 2 deletions test/quantization/test_quantized_module.py
Expand Up @@ -378,8 +378,6 @@ def test_conv2d_api(
stride = (stride_h, stride_w)
padding = (pad_h, pad_w)
dilation = (dilation, dilation)
if torch.backends.quantized.engine == 'qnnpack':
use_channelwise = False
if use_fused:
module_name = "QuantizedConvReLU2d"
qconv_module = nnq_fused.ConvReLU2d(
Expand Down
8 changes: 0 additions & 8 deletions test/quantization/test_quantized_op.py
Expand Up @@ -2064,7 +2064,6 @@ def test_qlinear(self, batch_size, input_channels, output_channels, use_bias,
use_relu, use_multi_dim_input, use_channelwise):
decimal_val = 4
if torch.backends.quantized.engine == 'qnnpack':
use_channelwise = False
use_multi_dim_input = False
# QNNPACK supports uint8 in the kernels. In the op we shift the int8
# weight values to uint8 to be on par with fbgemm. However, this causes
Expand Down Expand Up @@ -2180,8 +2179,6 @@ def test_qlinear(self, batch_size, input_channels, output_channels, use_bias,
use_channelwise=st.booleans())
@override_qengines
def test_qlinear_unpack(self, W, use_channelwise):
if torch.backends.quantized.engine == 'qnnpack':
use_channelwise = False

W, (W_scale, W_zp, torch_type) = W
if use_channelwise:
Expand Down Expand Up @@ -2435,9 +2432,6 @@ def test_qconv2d(
use_relu,
use_channelwise,
):
if torch.backends.quantized.engine == 'qnnpack':
use_channelwise = False

input_channels = input_channels_per_group * groups
output_channels = output_channels_per_group * groups
kernels = (kernel_h, kernel_w)
Expand Down Expand Up @@ -2488,8 +2482,6 @@ def test_qconv2d(
def test_qconv_unpack(
self, inputs, stride_h, stride_w, pad_h, pad_w, channelwise
):
if torch.backends.quantized.engine == 'qnnpack':
channelwise = False

qconv_prepack = torch.ops.quantized.conv2d_prepack
qconv_unpack = torch.ops.quantized.conv2d_unpack
Expand Down