pytorch · facebook-github-bot · Oct 4, 2025 · Oct 4, 2025
@@ -182,11 +182,11 @@ def define_node(
             acc_type = ts.DType.FP32
 
         tosa_graph.addConst(
-            [1], output.dtype, [input_zp], name=f"{conv2d_output_name}_input_zp"
+            [1], inputs[0].dtype, [input_zp], name=f"{conv2d_output_name}_input_zp"
         )
         tosa_graph.addConst(
             [1],
-            output.dtype,
+            inputs[1].dtype,
             weight_zp,
             name=f"{conv2d_output_name}_weight_zp",
         )
@@ -269,7 +269,7 @@ def define_node(
 
         # For quantized convolution, rescale the output value back to the same
         # integer value domain of the next op. Otherwise return float32 output.
-        if inputs[0].dtype == ts.DType.INT8 or inputs[0].dtype == ts.DType.INT16:
+        if output.dtype == ts.DType.INT8 or output.dtype == ts.DType.INT16:
             # Get scale_factor from input, weight, and output.
             input_scale = input_qparams[0].get_scale_per_tensor()  # type: ignore[possibly-undefined]  # pyre-ignore [61]
             per_channel_quant = input_qparams[1].per_channel  # pyre-ignore [61]

@@ -8,8 +8,6 @@
 
 from typing import Tuple
 
-import pytest
-
 import torch
 from executorch.backends.arm.quantizer.arm_quantizer import (
     get_symmetric_a16w8_quantization_config,
@@ -313,12 +311,8 @@ def test_linear_16a8w_tosa_INT(test_data: torch.Tensor):
     pipeline.run()
 
 
-@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT)
+@common.parametrize("test_data", test_data_all_16a8w)
 @common.XfailIfNoCorstone300
-@pytest.mark.xfail(
-    reason="Ethos-U55 A16W8 linear: int16 matmul not yet supported; pending backend support or linear->conv1x1 lowering. See: https://github.com/pytorch/executorch/issues/13947",
-    strict=False,
-)
 def test_linear_16a8w_u55_INT16(test_data: torch.Tensor):
     """Test linear operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
     test_data, out_features, has_bias, per_channel_quantization = test_data()
@@ -347,12 +341,8 @@ def test_linear_16a8w_u55_INT16(test_data: torch.Tensor):
     pipeline.run()
 
 
-@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT)
+@common.parametrize("test_data", test_data_all_16a8w)
 @common.XfailIfNoCorstone320
-@pytest.mark.xfail(
-    reason="Ethos-U55 A16W8 linear: int16 matmul not yet supported; pending backend support or linear->conv1x1 lowering. See: https://github.com/pytorch/executorch/issues/13947",
-    strict=False,
-)
 def test_linear_16a8w_u85_INT16(test_data: torch.Tensor):
     """Test linear operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
     test_data, out_features, has_bias, per_channel_quantization = test_data()