Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[quant] Add quantized::sigmoid that take output_scale/output_zero_point as input #45882

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 2 additions & 14 deletions aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp
Expand Up @@ -486,27 +486,15 @@ static void leaky_qrelu_out_kernel(Tensor& out, const Tensor& qx,
});
}

void qsigmoid_kernel(const Tensor& qx, Tensor& qy) {
void qsigmoid_kernel(
const Tensor& qx, Tensor& qy, double output_scale, int64_t output_zero_point ) {
int64_t zero_point = qx.q_zero_point();
float scale = qx.q_scale();
auto scale_vec = Vec256<float>(scale);
auto zero_point_vec = Vec256<float>((float)zero_point);
auto scale_neg_zp_premul_vec = scale_vec * zero_point_vec.neg();

AT_DISPATCH_QINT_TYPES(qx.scalar_type(), "qsigmoid", [&]() {
// Naive implemenentation: uses dequantize/execute/quantize routine
// - Output scale is set to 1.0 / 2^(BIT_NUM)
// - For signed types output zero point is set to 0
// - For unsigned types output zero point is set to (qmax + qmin) / 2.0
// See https://stackoverflow.com/a/34448562/3606192 for potential
// optimizations
float output_scale = 0.00390625; // 1.0 / 2^8
int64_t output_zero_point = 0;
if (SCALAR_TYPE == at::kQInt32) {
output_scale = 2.3283064365386963e-10; // 1.0 / 2^32
} else if (SCALAR_TYPE == at::kQInt8) {
output_zero_point = -128;
}
float inv_output_scale = 1.0 / output_scale;

qy = at::_empty_affine_quantized(
Expand Down
55 changes: 47 additions & 8 deletions aten/src/ATen/native/quantized/cpu/qsigmoid.cpp
Expand Up @@ -17,15 +17,11 @@ namespace native {
DEFINE_DISPATCH(qsigmoid_stub);

#ifdef USE_PYTORCH_QNNPACK
// This ALWAYS outputs scale=1.0/256, dtype=quint8
// The zero_point is 0 for qint32 and quint8, but -128 for qint8.
Tensor qnnpack_sigmoid(Tensor input) {
Tensor qnnpack_sigmoid(
Tensor input, double output_scale, int64_t output_zero_point) {
TORCH_CHECK(input.ndimension() > 0, "qnnpack_sigmoid(): Got empty input tensor");

Tensor qy;
constexpr float output_scale = 1.0f / 256.0f;
constexpr int32_t output_zero_point = 0;

initQNNPACK();

Tensor input_contig = input.contiguous(input.suggest_memory_format());
Expand Down Expand Up @@ -76,17 +72,60 @@ Tensor qnnpack_sigmoid(Tensor input) {
"failed to run QNNPACK sigmoid operator");
return qy;
}

#endif // USE_PYTORCH_QNNPACK

// This ALWAYS outputs scale=1.0/256, dtype=quint8
// The zero_point is 0 for qint32 and quint8, but -128 for qint8.
Tensor sigmoid_quantized_cpu(const Tensor& qx) {
#ifdef USE_PYTORCH_QNNPACK
if (at::globalContext().qEngine() == at::QEngine::QNNPACK &&
qx.scalar_type() == kQUInt8) {
return qnnpack_sigmoid(qx);
constexpr double output_scale = 1.0f / 256.0f;
constexpr int64_t output_zero_point = 0;
return qnnpack_sigmoid(qx, output_scale, output_zero_point);
}
#endif // USE_PYTORCH_QNNPACK
Tensor qy;
qsigmoid_stub(qx.device().type(), qx, qy);
AT_DISPATCH_QINT_TYPES(qx.scalar_type(), "qsigmoid", [&]() {
// Naive implemenentation: uses dequantize/execute/quantize routine
// - Output scale is set to 1.0 / 2^(BIT_NUM)
// - For signed types output zero point is set to 0
// - For unsigned types output zero point is set to (qmax + qmin) / 2.0
// See https://stackoverflow.com/a/34448562/3606192 for potential
// optimizations
double output_scale = 0.00390625; // 1.0 / 2^8
int64_t output_zero_point = 0;
if (SCALAR_TYPE == at::kQInt32) {
output_scale = 2.3283064365386963e-10; // 1.0 / 2^32
} else if (SCALAR_TYPE == at::kQInt8) {
output_zero_point = -128;
}
qsigmoid_stub(qx.device().type(), qx, qy, output_scale, output_zero_point);
});
return qy;
}

namespace {

class QSigmoid final {
public:
static Tensor run(Tensor qx, double output_scale, int64_t output_zero_point) {
#ifdef USE_PYTORCH_QNNPACK
if (at::globalContext().qEngine() == at::QEngine::QNNPACK &&
qx.scalar_type() == kQUInt8) {
return qnnpack_sigmoid(qx, output_scale, output_zero_point);
}
#endif // USE_PYTORCH_QNNPACK
Tensor qy;
qsigmoid_stub(qx.device().type(), qx, qy, output_scale, output_zero_point);
return qy;
}
};

TORCH_LIBRARY_IMPL(quantized, QuantizedCPU, m) {
m.impl(TORCH_SELECTIVE_NAME("quantized::sigmoid"), TORCH_FN(QSigmoid::run));
}
} // namespace

}} // namespace at::native
2 changes: 1 addition & 1 deletion aten/src/ATen/native/quantized/cpu/quantized_ops.h
Expand Up @@ -8,7 +8,7 @@ namespace native {
using qrelu_fn = void (*)(const at::Tensor& /*qx*/, at::Tensor& /*qy*/);
using qrelu_leaky_fn = void (*)(Tensor& /*out*/, const Tensor& /*qx*/,
Scalar /*negval_*/);
using qsigmoid_fn = void (*)(const at::Tensor& /*qx*/, at::Tensor& /*qy*/);
using qsigmoid_fn = void (*)(const at::Tensor& /*qx*/, at::Tensor& /*qy*/, double output_scale, int64_t output_zero_point);
using qhardsigmoid_fn = void (*)(const at::Tensor& /*qx*/, at::Tensor& /*qy*/);
using qclamp_fn = void (*)(
const at::Tensor& /*qx*/,
Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/native/quantized/library.cpp
Expand Up @@ -158,6 +158,7 @@ TORCH_LIBRARY(quantized, m) {
m.def(TORCH_SELECTIVE_SCHEMA("quantized::max_pool2d(Tensor qx, int[] kernel_size, int[] stride, int[] padding, int[] dilation, bool ceil_mode) -> Tensor"));
m.def(TORCH_SELECTIVE_SCHEMA("quantized::relu6(Tensor qx, bool inplace=False) -> Tensor"));
m.def(TORCH_SELECTIVE_SCHEMA("quantized::leaky_relu(Tensor qx, Scalar negative_slope, bool inplace, float output_scale, int output_zero_point) -> Tensor"));
m.def(TORCH_SELECTIVE_SCHEMA("quantized::sigmoid(Tensor qx, float output_scale, int output_zero_point) -> Tensor"));
}

// According to #33294: The "_" prefix registration will be
Expand Down
24 changes: 22 additions & 2 deletions test/quantization/test_quantized_op.py
Expand Up @@ -195,7 +195,7 @@ def _test_activation_function(self, X, fn_name, test_configs):
dtype=torch_type)

if output_is_observed:
extra_kwargs.update({'output_scale': scale, 'output_zero_point': zero_point})
extra_kwargs.update({'output_scale': output_scale, 'output_zero_point': output_zero_point})

# Finds qY using in-place or non-in-place quantized operators.
qY = q_op(qX, **extra_kwargs)
Expand Down Expand Up @@ -253,7 +253,7 @@ def test_qrelu6(self, X):
@override_qengines
@given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5),
qparams=hu.qparams()))
def test_qsigmoid(self, X):
def test_sigmoid_non_observed(self, X):
sigmoid_test_configs = [
{
'quantized_fn': [
Expand All @@ -266,6 +266,26 @@ def test_qsigmoid(self, X):
]
self._test_activation_function(X, 'sigmoid', sigmoid_test_configs)

"""Tests the correctness of the quantized::sigmoid op."""
# TODO: enable after observed output is supported in qnnpack
# @override_qengines
@skipIfNoFBGEMM
@given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5),
qparams=hu.qparams()))
def test_sigmoid(self, X):
sigmoid_test_configs = [
{
'quantized_fn': [
torch.ops.quantized.sigmoid
],
'reference_fn': torch.sigmoid,
'output_range': (0.0, 1.0),
'change_zero_point': True,
'output_is_observed': True,
}
]
self._test_activation_function(X, 'sigmoid', sigmoid_test_configs)

"""Tests the correctness of the quantized::hardsigmoid op."""
@override_qengines
@given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5),
Expand Down