From 3cf111455a69c453b8b534765c9429041fd8ae6f Mon Sep 17 00:00:00 2001 From: Sebastian Larsson Date: Wed, 4 Dec 2024 14:32:09 +0100 Subject: [PATCH] Arm backend: Explicitly convert quantized value to int64 Previously, dequantizing a value with dequantize_value() in backends/arm/tosa_quant_utils.py could result in integer overflow when using numpy 2.1.3. The offending part of the formula is `qx - qargs.zp`. If the subtraction results in a value outside of the range of the dtype of `qx` the following warning is printed: "RuntimeWarning: overflow encountered in scalar subtract" With numpy 1.21.3 the dtype is implicitly convert to a dtype that can store the correct value. However, in numpy 2.1.3 there's no such conversion, leading the function to return an incorrect value. Here's a concrete example: ``` import numpy as np a = np.int8(127) b = -128 print(a-b) ``` Numpy 1.21.3: a - b = 255 Numpy 2.1.3: a - b = -1 To remedy this, explicitly convert qx to int64. Change-Id: Ie0e9e7745a424103ce650e2d58fe1a1a4cbd30e1 --- backends/arm/tosa_quant_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/arm/tosa_quant_utils.py b/backends/arm/tosa_quant_utils.py index 19397fe6b21..3727674d6c0 100644 --- a/backends/arm/tosa_quant_utils.py +++ b/backends/arm/tosa_quant_utils.py @@ -71,7 +71,7 @@ def quantize_value(x, qargs: QuantArgs, dtype=np.int8): def dequantize_value(qx, qargs: QuantArgs): - return (qx - qargs.zp) * qargs.scale + return (np.int64(qx) - qargs.zp) * qargs.scale def qargs_from_qnode(node: torch.fx.Node):