From d374c89e7e6db7b9724b63848dc396048166836f Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 25 Aug 2025 17:20:14 -0700 Subject: [PATCH 1/3] Enable per-row/per-col grouping in CoreML LUT ops --- backends/apple/coreml/compiler/torch_ops.py | 11 +++- backends/apple/coreml/test/test_torch_ops.py | 65 ++++++++++++++++++-- third-party/ao | 2 +- 3 files changed, 71 insertions(+), 7 deletions(-) diff --git a/backends/apple/coreml/compiler/torch_ops.py b/backends/apple/coreml/compiler/torch_ops.py index e53670951e0..8cbc43b5e89 100644 --- a/backends/apple/coreml/compiler/torch_ops.py +++ b/backends/apple/coreml/compiler/torch_ops.py @@ -175,11 +175,18 @@ def dequantize_codebook(context, node): # Assert codebook is as expected. codebook.dim() = codes.dim() + 2 assert len(codebook.shape) == 4, "Only rank 4 inputs are supported for codebook" - assert codebook.shape[0] == 1, "Only grouped_channel granularity is supported" - n_luts = codebook.shape[1] + assert (codebook.shape[0] == 1) or ( + codebook.shape[1] == 1 + ), "Only grouped_channel granularity is supported" + if codebook.shape[0] == 1: + n_luts = codebook.shape[1] + else: + n_luts = codebook.shape[0] + assert ( codes.shape[1] % n_luts == 0 ), "codes.shape[1] must be divisible by codebook.shape[1]" + assert codebook.shape[2] == 2**nbits assert codebook.shape[3] == 1, "Only scalar look up values are supported" diff --git a/backends/apple/coreml/test/test_torch_ops.py b/backends/apple/coreml/test/test_torch_ops.py index 4fdbfdd8f21..f399e6ab4bd 100644 --- a/backends/apple/coreml/test/test_torch_ops.py +++ b/backends/apple/coreml/test/test_torch_ops.py @@ -158,7 +158,7 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self): et_prog = delegated_program.to_executorch() self._compare_outputs(et_prog, model, example_inputs) - def test_dequantize_codebook_linear(self): + def test_dequantize_codebook_linear_per_grouped_col(self): model, example_inputs = self._get_test_model() quantize_( model, @@ -185,7 +185,34 @@ def test_dequantize_codebook_linear(self): et_prog = delegated_program.to_executorch() self._compare_outputs(et_prog, model, example_inputs) - def test_dequantize_codebook_embedding(self): + def test_dequantize_codebook_linear_per_grouped_row(self): + model, example_inputs = self._get_test_model() + quantize_( + model, + CodebookWeightOnlyConfig(dtype=torch.uint2, block_size=[16, -1]), + ) + ep = torch.export.export(model, example_inputs) + assert "torch.ops.quant.dequantize_codebook.default" in ep.graph_module.code + delegated_program = executorch.exir.to_edge_transform_and_lower( + ep, + partitioner=[self._coreml_partitioner()], + ) + for node in delegated_program.exported_program().graph.nodes: + if node.op == "call_function": + assert node.target.__name__ in [ + "executorch_call_delegate", + "getitem", + ], f"Got unexpected node target after delegation: {node.target.__name__}" + + assert ( + "executorch.exir.dialects.edge._ops.quant.dequantize_codebook.default" + in format_delegated_graph(delegated_program.exported_program().graph_module) + ) + + et_prog = delegated_program.to_executorch() + self._compare_outputs(et_prog, model, example_inputs) + + def test_dequantize_codebook_embedding_per_grouped_col(self): model, example_inputs = self._get_test_model() quantize_( model, @@ -213,6 +240,34 @@ def test_dequantize_codebook_embedding(self): et_prog = delegated_program.to_executorch() self._compare_outputs(et_prog, model, example_inputs) + def test_dequantize_codebook_embedding_per_grouped_row(self): + model, example_inputs = self._get_test_model() + quantize_( + model, + CodebookWeightOnlyConfig(dtype=torch.uint3, block_size=[16, -1]), + lambda m, fqn: isinstance(m, torch.nn.Embedding), + ) + ep = torch.export.export(model, example_inputs) + assert "torch.ops.quant.dequantize_codebook.default" in ep.graph_module.code + delegated_program = executorch.exir.to_edge_transform_and_lower( + ep, + partitioner=[self._coreml_partitioner()], + ) + for node in delegated_program.exported_program().graph.nodes: + if node.op == "call_function": + assert node.target.__name__ in [ + "executorch_call_delegate", + "getitem", + ], f"Got unexpected node target after delegation: {node.target.__name__}" + + assert ( + "executorch.exir.dialects.edge._ops.quant.dequantize_codebook.default" + in format_delegated_graph(delegated_program.exported_program().graph_module) + ) + + et_prog = delegated_program.to_executorch() + self._compare_outputs(et_prog, model, example_inputs) + if __name__ == "__main__": test_runner = TestTorchOps() @@ -221,5 +276,7 @@ def test_dequantize_codebook_embedding(self): test_runner.test_dequantize_affine_c4w_embedding() test_runner.test_dequantize_affine_c4w_linear() test_runner.test_dequantize_affine_c8w_embedding_b4w_linear() - test_runner.test_dequantize_codebook_linear() - test_runner.test_dequantize_codebook_embedding() + test_runner.test_dequantize_codebook_linear_per_grouped_col() + test_runner.test_dequantize_codebook_linear_per_grouped_row() + test_runner.test_dequantize_codebook_embedding_per_grouped_col() + test_runner.test_dequantize_codebook_embedding_per_grouped_row() diff --git a/third-party/ao b/third-party/ao index 1526dfe50cb..f03a737582b 160000 --- a/third-party/ao +++ b/third-party/ao @@ -1 +1 @@ -Subproject commit 1526dfe50cbce877ddb1d0055af46287caae7470 +Subproject commit f03a737582b6a247fa86301678b4e9ebdd8fca57 From 89910fee005f5bf6ecb887ab0f06af521831f570 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 28 Aug 2025 11:44:32 -0700 Subject: [PATCH 2/3] up --- backends/apple/coreml/test/test_torch_ops.py | 2 +- third-party/ao | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/apple/coreml/test/test_torch_ops.py b/backends/apple/coreml/test/test_torch_ops.py index f399e6ab4bd..bcb2ba34cf1 100644 --- a/backends/apple/coreml/test/test_torch_ops.py +++ b/backends/apple/coreml/test/test_torch_ops.py @@ -35,7 +35,7 @@ def _coreml_partitioner(self): def _get_test_model(self): model = torch.nn.Sequential( - torch.nn.Embedding(64, 128), torch.nn.Linear(128, 128), torch.nn.ReLU() + torch.nn.Embedding(64, 128), torch.nn.Linear(128, 256), torch.nn.ReLU() ) example_inputs = (torch.LongTensor([0]),) return model, example_inputs diff --git a/third-party/ao b/third-party/ao index f03a737582b..364ad471b28 160000 --- a/third-party/ao +++ b/third-party/ao @@ -1 +1 @@ -Subproject commit f03a737582b6a247fa86301678b4e9ebdd8fca57 +Subproject commit 364ad471b287702df9fb499a511440b4aa69ee93 From 1c5d7b4a2c46fa28afbc8dfce13890f392bdbcc3 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 28 Aug 2025 11:53:50 -0700 Subject: [PATCH 3/3] up --- backends/apple/coreml/compiler/torch_ops.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/backends/apple/coreml/compiler/torch_ops.py b/backends/apple/coreml/compiler/torch_ops.py index 8cbc43b5e89..481f474dc00 100644 --- a/backends/apple/coreml/compiler/torch_ops.py +++ b/backends/apple/coreml/compiler/torch_ops.py @@ -179,13 +179,17 @@ def dequantize_codebook(context, node): codebook.shape[1] == 1 ), "Only grouped_channel granularity is supported" if codebook.shape[0] == 1: + # LUT is per column group n_luts = codebook.shape[1] + assert ( + codes.shape[1] % n_luts == 0 + ), "codes.shape[1] must be divisible by codebook.shape[1]" else: + # LUT is per row group n_luts = codebook.shape[0] - - assert ( - codes.shape[1] % n_luts == 0 - ), "codes.shape[1] must be divisible by codebook.shape[1]" + assert ( + codes.shape[0] % n_luts == 0 + ), "codes.shape[0] must be divisible by codebook.shape[0]" assert codebook.shape[2] == 2**nbits assert codebook.shape[3] == 1, "Only scalar look up values are supported"