Skip to content

Commit 4db72ed

Browse files
authored
Merge branch 'main' into marlin-remove-quant-utils
2 parents 1ab5168 + c85ece4 commit 4db72ed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+805
-671
lines changed

.github/workflows/trunk.yml

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,45 +1043,6 @@ jobs:
10431043
build-tool: cmake
10441044
docker-image: ci-image:executorch-ubuntu-22.04-clang12
10451045

1046-
test-mcu-models:
1047-
name: test-mcu-models
1048-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1049-
strategy:
1050-
matrix:
1051-
include:
1052-
- build-tool: cmake
1053-
fail-fast: false
1054-
permissions:
1055-
id-token: write
1056-
contents: read
1057-
with:
1058-
runner: linux.2xlarge
1059-
docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
1060-
submodules: 'recursive'
1061-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
1062-
timeout: 90
1063-
script: |
1064-
BUILD_TOOL=${{ matrix.build-tool }}
1065-
1066-
# The generic Linux job chooses to use base env, not the one setup by the image
1067-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
1068-
conda activate "${CONDA_ENV}"
1069-
1070-
# Try to mirror these as closely as possible
1071-
source .ci/scripts/utils.sh
1072-
install_executorch "--use-pt-pinned-commit"
1073-
1074-
.ci/scripts/setup-arm-baremetal-tools.sh
1075-
source examples/arm/ethos-u-scratch/setup_path.sh
1076-
1077-
# Run selective Build
1078-
chmod +x examples/selective_build/test_selective_build.sh
1079-
examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
1080-
1081-
# Run MCU models
1082-
chmod +x examples/arm/run_mcu_models_fvp.sh
1083-
examples/arm/run_mcu_models_fvp.sh --target=cortex-m55
1084-
10851046
test-models-windows:
10861047
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
10871048
strategy:

CODEOWNERS

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,31 +49,31 @@
4949
/extension/export_util @kimishpatel
5050
/extension/flat_tensor @lucylq
5151
/extension/gguf_util @larryliu0820
52-
/extension/kernel_util @kimishpatel @manuelcandales @swolchok
53-
/extension/llm @jackzhxng @larryliu0820 @swolchok @mergennachin
54-
/extension/memory_allocator @JacobSzwejbka @swolchok
52+
/extension/kernel_util @kimishpatel @manuelcandales
53+
/extension/llm @jackzhxng @larryliu0820 @mergennachin
54+
/extension/memory_allocator @JacobSzwejbka
5555
/extension/module @shoumikhin
56-
/extension/parallel @kimishpatel @swolchok
56+
/extension/parallel @kimishpatel
5757
/extension/pybindings @JacobSzwejbka @larryliu0820
58-
/extension/pytree @JacobSzwejbka @swolchok
59-
/extension/runner_util @swolchok
58+
/extension/pytree @JacobSzwejbka
59+
/extension/runner_util
6060
/extension/tensor @shoumikhin
61-
/extension/testing_util @swolchok
62-
/extension/threadpool @kimishpatel @swolchok
61+
/extension/testing_util
62+
/extension/threadpool @kimishpatel
6363
/extension/training @JacobSzwejbka
6464

65-
/kernels @manuelcandales @swolchok
65+
/kernels @manuelcandales
6666

6767
/profiler @Gasoonjia
6868

69-
/runtime @JacobSzwejbka @lucylq @swolchok
69+
/runtime @JacobSzwejbka @lucylq
7070
/runtime/backend @cccclai
7171

7272
/schema @JacobSzwejbka @lucylq
7373

74-
/scripts @GregoryComer @swolchok
74+
/scripts @GregoryComer
7575

76-
/shim @larryliu0820 @GregoryComer @swolchok
76+
/shim @larryliu0820 @GregoryComer
7777

7878
/third-party @GregoryComer
7979

backends/aoti/aoti_partitioner.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from typing import Callable, Dict, List, Optional, Tuple
8+
9+
import torch
10+
from executorch.exir._warnings import experimental
11+
from executorch.exir.backend.compile_spec_schema import CompileSpec
12+
from executorch.exir.backend.partitioner import (
13+
DelegationSpec,
14+
Partitioner,
15+
PartitionResult,
16+
)
17+
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
18+
from torch._export.utils import is_buffer, is_lifted_tensor_constant, is_param
19+
from torch.export.exported_program import ExportedProgram
20+
21+
22+
@experimental(
23+
"This API and all of cuda backend related functionality are experimental."
24+
)
25+
class AotiPartitioner(Partitioner):
26+
"""
27+
Base partitioner for AOTInductor-driven backend integration.
28+
29+
This partitioner creates a single partition containing all operators from the input graph.
30+
It skips core ATen decomposition, allowing the backend to handle decomposition using
31+
AOTInductor's backend-specific decomposition table.
32+
33+
Only operators that cannot be handled by the aoti library will be excluded from
34+
the partition and fall back to ExecuTorch's default or custom handling.
35+
"""
36+
37+
def __init__(self, backend_name: str, compile_spec: List[CompileSpec]) -> None:
38+
"""
39+
Initialize the AOTI partitioner.
40+
41+
Args:
42+
backend_name: The name of the backend (e.g., "CudaBackend", "MetalBackend")
43+
compile_spec: List of compilation specifications
44+
"""
45+
self.delegation_spec = DelegationSpec(backend_name, compile_spec)
46+
47+
def partition(self, exported_program: ExportedProgram) -> PartitionResult:
48+
"""
49+
Fully delegate the graph to AOTInductor by tagging all nodes as a single partition.
50+
"""
51+
52+
partition_tags: Dict[str, DelegationSpec] = {}
53+
tag = "tag0"
54+
55+
for node in exported_program.graph.nodes:
56+
if node.op != "call_function":
57+
continue
58+
node.meta["delegation_tag"] = tag
59+
60+
partition_tags[tag] = self.delegation_spec
61+
62+
tag_constant_data(exported_program)
63+
tag_mutated_buffer(exported_program)
64+
65+
# Tag constant placeholders that have no users
66+
# tag_constant_data only tags constants that have users with delegation_tag
67+
# but we need to tag all constants for this partition
68+
for node in exported_program.graph.nodes:
69+
if node.op == "placeholder" and (
70+
is_param(exported_program, node)
71+
or is_buffer(exported_program, node)
72+
or is_lifted_tensor_constant(exported_program, node)
73+
):
74+
if "delegation_tag" not in node.meta:
75+
node.meta["delegation_tag"] = tag
76+
77+
return PartitionResult(
78+
tagged_exported_program=exported_program, partition_tags=partition_tags
79+
)
80+
81+
def ops_to_not_decompose(
82+
self, ep: ExportedProgram
83+
) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
84+
"""
85+
Return a list of operations that should not be decomposed and let the AOT compiler handle them.
86+
Currently we skip ATen decompositon for all ops, and let the backend handle them.
87+
"""
88+
do_not_decompose = set()
89+
90+
for node in ep.graph.nodes:
91+
if node.op == "call_function" and isinstance(
92+
node.target, torch._ops.OpOverload
93+
):
94+
do_not_decompose.add(node.target)
95+
return list(do_not_decompose), None

backends/aoti/targets.bzl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
11
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
22

33
def define_common_targets():
4+
runtime.python_library(
5+
name = "aoti_partitioner",
6+
srcs = [
7+
"aoti_partitioner.py",
8+
],
9+
visibility = [
10+
"//executorch/...",
11+
],
12+
deps = [
13+
"//caffe2:torch",
14+
"//executorch/exir/backend:partitioner",
15+
"//executorch/exir/backend:utils",
16+
],
17+
)
18+
419
# AOTI common shims functionality
520
runtime.cxx_library(
621
name = "common_shims",

backends/apple/metal/metal_partitioner.py

Lines changed: 5 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -4,74 +4,22 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7-
from typing import Callable, Dict, final, List, Optional, Tuple
7+
from typing import final, List
88

9-
import torch
9+
from executorch.backends.aoti.aoti_partitioner import AotiPartitioner
1010
from executorch.backends.apple.metal.metal_backend import MetalBackend # usort: skip
1111
from executorch.exir._warnings import experimental
1212
from executorch.exir.backend.compile_spec_schema import CompileSpec
13-
from executorch.exir.backend.partitioner import (
14-
DelegationSpec,
15-
Partitioner,
16-
PartitionResult,
17-
)
18-
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
19-
from torch.export.exported_program import ExportedProgram
2013

2114

2215
@final
2316
@experimental(
2417
"This API and all of Metal backend related functionality are experimental."
2518
)
26-
class MetalPartitioner(Partitioner):
19+
class MetalPartitioner(AotiPartitioner):
2720
"""
28-
Metal partitioner for AOTInductor backend integration.
29-
30-
This partitioner creates a single partition containing all operators from the input graph.
31-
It skips core ATen decomposition, allowing the Metal backend to handle decomposition using
32-
AOTInductor's MPS-specific decomposition table.
33-
34-
Only operators that cannot be handled by the aoti-mps library will be excluded from
35-
the partition and fall back to ExecuTorch's default or custom handling.
21+
Metal partitioner driven by AOTInductor backend.
3622
"""
3723

3824
def __init__(self, compile_spec: List[CompileSpec]) -> None:
39-
self.delegation_spec = DelegationSpec(MetalBackend.__name__, compile_spec)
40-
41-
def partition(self, exported_program: ExportedProgram) -> PartitionResult:
42-
"""
43-
Fully delegate the graph to AOTInductor by tagging all nodes as a single partition.
44-
"""
45-
46-
partition_tags: Dict[str, DelegationSpec] = {}
47-
tag = "tag0"
48-
49-
for node in exported_program.graph.nodes:
50-
if node.op != "call_function":
51-
continue
52-
node.meta["delegation_tag"] = tag
53-
54-
partition_tags[tag] = self.delegation_spec
55-
56-
tag_constant_data(exported_program)
57-
tag_mutated_buffer(exported_program)
58-
59-
return PartitionResult(
60-
tagged_exported_program=exported_program, partition_tags=partition_tags
61-
)
62-
63-
def ops_to_not_decompose(
64-
self, ep: ExportedProgram
65-
) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
66-
"""
67-
Return a list of operations that should not be decomposed and let the AOT compiler handle them.
68-
Currently we skip ATen decompositon for all ops, and let the Metal backend handle them.
69-
"""
70-
do_not_decompose = set()
71-
72-
for node in ep.graph.nodes:
73-
if node.op == "call_function" and isinstance(
74-
node.target, torch._ops.OpOverload
75-
):
76-
do_not_decompose.add(node.target)
77-
return list(do_not_decompose), None
25+
super().__init__(MetalBackend.__name__, compile_spec)

backends/cadence/aot/ops_registrations.py

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
# pyre-strict
88

9-
import logging
109
from math import prod
1110
from typing import Callable, Optional, Tuple
1211

@@ -49,36 +48,16 @@ def _validate_ref_impl_exists() -> None:
4948
"cadence::roi_align_box_processor",
5049
}
5150

52-
# All of these should either
53-
# 1. be removed
54-
# 2. have a reference implementation added to ref_implementations.py
55-
_WARN_ONLY = {
56-
"cadence::quantized_softmax.per_tensor",
57-
"cadence::quantized_softmax",
58-
}
59-
6051
ref_impls = get_registered_ref_implementations()
61-
warn_impls = []
6252
error_impls = []
6353
for op_name in _REGISTERED_META_KERNELS:
6454
# Strip the namespace prefix if present (e.g., "cadence::" -> "")
6555
op_name_clean = op_name.split("::")[-1] if "::" in op_name else op_name
6656

6757
if op_name_clean not in ref_impls:
68-
if op_name in _WARN_ONLY:
69-
warn_impls.append(op_name)
70-
elif op_name not in _SKIP_OPS:
58+
if op_name not in _SKIP_OPS:
7159
error_impls.append(op_name)
7260

73-
if warn_impls:
74-
warn_msg = (
75-
f"The following {len(warn_impls)} meta kernel registrations are missing reference implementations:\n"
76-
+ "\n".join(f" - {op}" for op in warn_impls)
77-
+ "\n\nPlease add reference implementations in ref_implementations.py using "
78-
+ "@impl_tracked(m, '<op_name>')."
79-
)
80-
logging.warning(warn_msg)
81-
8261
if error_impls:
8362
error_msg = (
8463
f"The following {len(error_impls)} meta kernel registrations are missing reference implementations:\n"

0 commit comments

Comments
 (0)