From a237e062721e8e98ec5def597bf800c18de45b6e Mon Sep 17 00:00:00 2001
From: Nitin Jain <jainnitin@meta.com>
Date: Thu, 28 Aug 2025 23:42:59 -0700
Subject: [PATCH] Add 16A8W support and test for cat operation

Add 16A8W quantization support and test for the cat operation in ExecutorTorch ARM backend.

This follows the pattern established for linear, mul, sigmoid, tanh, slice, and view/transpose operations, extending int16 support to cat operations.

Changes:
- Add test_cat_tensor_16a8w_tosa_INT test function
- Enable test_cat.py in test targets configuration

The 16A8W configuration uses 16-bit activations with 8-bit weights, enabling higher precision for activations while maintaining weight efficiency.

Differential Revision: [D80511455](https://our.internmc.facebook.com/intern/diff/D80511455/)

[ghstack-poisoned]
---
 backends/arm/test/ops/test_cat.py | 108 +++++++++++++++++++++++++++++-
 backends/arm/test/targets.bzl     |   1 +
 2 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/backends/arm/test/ops/test_cat.py b/backends/arm/test/ops/test_cat.py
index 55578aa15c6..45915d4b743 100644
--- a/backends/arm/test/ops/test_cat.py
+++ b/backends/arm/test/ops/test_cat.py
@@ -8,8 +8,13 @@
 
 from typing import Tuple
 
+import pytest
 import torch
-from executorch.backends.arm.test import common
+from executorch.backends.arm.quantizer.arm_quantizer import (
+    get_symmetric_a16w8_quantization_config,
+    TOSAQuantizer,
+)
+from executorch.backends.arm.test import common, conftest
 
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
@@ -18,6 +23,8 @@
     TosaPipelineINT,
     VgfPipeline,
 )
+from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.xnnpack.test.tester import Quantize
 
 input_t1 = Tuple[torch.Tensor]  # Input x
 
@@ -151,3 +158,102 @@ def test_cat_vgf_INT(test_data: Tuple):
         tosa_version="TOSA-1.0+INT",
     )
     pipeline.run()
+
+
+def get_symmetric_a16w8_cat_quantizer(per_channel_quantization=False):
+    tosa_version = conftest.get_option("tosa_version")
+    tosa_profiles = {
+        "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT+int16"),
+    }
+
+    quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
+    quantizer.set_global(
+        get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
+    )
+
+    return Quantize(
+        quantizer,
+        get_symmetric_a16w8_quantization_config(
+            is_per_channel=per_channel_quantization
+        ),
+    )
+
+
+@common.parametrize("test_data", Cat.test_parameters)
+def test_cat_16a8w_tosa_INT(test_data: Tuple):
+    """Test cat operation with 16A8W quantization (16-bit activations, 8-bit weights)"""
+    per_channel_quantization = False
+
+    pipeline = TosaPipelineINT[input_t1](
+        Cat(),
+        test_data(),
+        aten_op,
+        exir_op=[],
+        per_channel_quantization=per_channel_quantization,
+        use_to_edge_transform_and_lower=True,
+        tosa_extensions=["int16"],
+    )
+
+    pipeline.change_args(
+        "quantize",
+        get_symmetric_a16w8_cat_quantizer(
+            per_channel_quantization=per_channel_quantization
+        ),
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Cat.test_parameters)
+@common.XfailIfNoCorstone300
+@pytest.mark.xfail(
+    reason="Vela compilation fails with 'Invalid arguments' for int16 cat operations"
+)
+def test_cat_16a8w_u55_INT16(test_data: Tuple):
+    """Test cat operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
+    per_channel_quantization = False
+
+    pipeline = EthosU55PipelineINT[input_t1](
+        Cat(),
+        test_data(),
+        aten_op,
+        exir_op,
+        per_channel_quantization=per_channel_quantization,
+        use_to_edge_transform_and_lower=True,
+        run_on_fvp=True,
+    )
+
+    pipeline.change_args(
+        "quantize",
+        get_symmetric_a16w8_cat_quantizer(
+            per_channel_quantization=per_channel_quantization
+        ),
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Cat.test_parameters)
+@common.XfailIfNoCorstone320
+@pytest.mark.xfail(
+    reason="Vela compilation fails with 'Invalid arguments' for int16 cat operations"
+)
+def test_cat_16a8w_u85_INT16(test_data: Tuple):
+    """Test cat operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
+    per_channel_quantization = False
+
+    pipeline = EthosU85PipelineINT[input_t1](
+        Cat(),
+        test_data(),
+        aten_op,
+        exir_op,
+        per_channel_quantization=per_channel_quantization,
+        use_to_edge_transform_and_lower=True,
+        run_on_fvp=True,
+    )
+
+    pipeline.change_args(
+        "quantize",
+        get_symmetric_a16w8_cat_quantizer(
+            per_channel_quantization=per_channel_quantization
+        ),
+    )
+    pipeline.run()
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
index 5714039d134..68223eff3ee 100644
--- a/backends/arm/test/targets.bzl
+++ b/backends/arm/test/targets.bzl
@@ -15,6 +15,7 @@ def define_arm_tests():
     test_files += [
         "ops/test_add.py",
         "ops/test_avg_pool2d.py",
+        "ops/test_cat.py",
         "ops/test_linear.py", 
         "ops/test_mul.py",
         "ops/test_slice.py",