Skip to content

Commit 43e5927

Browse files
authored
Merge branch 'main' into add-profiling-to-xnn-executor-runner-2
2 parents 2ed3b4f + 40e6e52 commit 43e5927

File tree

454 files changed

+3565
-1074
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

454 files changed

+3565
-1074
lines changed

.github/workflows/android-perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,8 @@ jobs:
292292
# Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
293293
# It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
294294
# one app+flavor that could load and run the model.
295-
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug.apk
296-
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug-androidTest.apk
295+
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/minibench/app-debug.apk
296+
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/minibench/app-debug-androidTest.apk
297297
# NB: Need to set the default spec here so that it works for periodic too
298298
test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
299299
# Uploaded to S3 from the previous job

.github/workflows/apple-perf.yml

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -204,22 +204,19 @@ jobs:
204204
if-no-files-found: ignore
205205
path: ${{ runner.temp }}/artifacts/
206206

207-
build-llm-demo:
208-
name: build-llm-demo
207+
build-benchmark-app:
208+
name: build-benchmark-app
209209
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
210210
needs:
211211
- set-parameters
212212
secrets: inherit
213-
strategy:
214-
matrix:
215-
tokenizer: [bpe]
216213
with:
217214
runner: macos-latest-xlarge
218215
python-version: '3.11'
219216
submodules: 'true'
220217
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
221218
upload-artifact: ios-apps
222-
secrets-env: BUILD_CERTIFICATE_BASE64 BUILD_PROVISION_PROFILE_BASE64 KEYCHAIN_PASSWORD
219+
secrets-env: BUILD_CERTIFICATE_BASE64 EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64 KEYCHAIN_PASSWORD
223220
timeout: 90
224221
script: |
225222
set -eux
@@ -234,7 +231,7 @@ jobs:
234231
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
235232
236233
# Setup Apple certificate for iOS development
237-
BUILD_PROVISION_PROFILE_BASE64="${SECRET_BUILD_PROVISION_PROFILE_BASE64}" \
234+
BUILD_PROVISION_PROFILE_BASE64="${SECRET_EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64}" \
238235
BUILD_CERTIFICATE_BASE64="${SECRET_BUILD_CERTIFICATE_BASE64}" \
239236
KEYCHAIN_PASSWORD="${SECRET_KEYCHAIN_PASSWORD}" \
240237
.ci/scripts/setup-ios.sh
@@ -248,11 +245,38 @@ jobs:
248245
backends/apple/mps/install_requirements.sh
249246
echo "::endgroup::"
250247
248+
echo "::group::Build ExecuTorch iOS frameworks"
249+
FRAMEWORKS=(
250+
"executorch"
251+
"backend_coreml"
252+
"backend_mps"
253+
"backend_xnnpack"
254+
"kernels_custom"
255+
"kernels_optimized"
256+
"kernels_portable"
257+
"kernels_quantized"
258+
)
259+
260+
# Build Release iOS Frameworks
261+
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
262+
build/build_apple_frameworks.sh --coreml --custom --mps --optimized --portable --quantized --xnnpack
263+
264+
mkdir -p extension/apple/Benchmark/Frameworks
265+
for FRAMEWORK in "${FRAMEWORKS[@]}"; do (
266+
cp -r "cmake-out/${FRAMEWORK}.xcframework" extension/apple/Benchmark/Frameworks/
267+
) done
268+
echo "::endgroup::"
269+
270+
# NB: Although exported models can be copied to this directory and bundled together with the
271+
# app, we don't use this in CI and rely on AWS extra data parameter to make the model and the
272+
# tokenizer available to the benchmark. This decouples the app and the model. We just need to
273+
# create the directory here to pass the build
274+
mkdir -p extension/apple/Benchmark/Models
251275
${CONDA_RUN} --no-capture-output \
252-
build/build_apple_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
276+
build/build_apple_llm_demo.sh ${ARTIFACTS_DIR_NAME}
253277
254-
upload-ios-apps:
255-
needs: build-llm-demo
278+
upload-benchmark-app:
279+
needs: build-benchmark-app
256280
runs-on: linux.2xlarge
257281
steps:
258282
- name: Download the apps from GitHub
@@ -281,7 +305,7 @@ jobs:
281305
benchmark-on-device:
282306
needs:
283307
- set-parameters
284-
- upload-ios-apps
308+
- upload-benchmark-app
285309
- upload-models
286310
permissions:
287311
id-token: write
@@ -302,7 +326,7 @@ jobs:
302326
project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
303327
device-pool-arn: ${{ matrix.device }}
304328
# Uploaded to S3 from the previous job
305-
ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/LLaMAPerfBenchmark.ipa
306-
ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/LLaMAPerfBenchmark.xctestrun.zip
329+
ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/Benchmark.ipa
330+
ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/Benchmark.xctestrun.zip
307331
test-spec: ${{ inputs.test_spec || 'https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml' }}
308332
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/${{ matrix.model }}_${{ matrix.delegate }}/model.zip

.github/workflows/apple.yml

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,27 +15,18 @@ on:
1515
- build/build_apple_frameworks.sh
1616
- build/create_frameworks.sh
1717
- build/test_ios_ci.sh
18-
- examples/demo-apps/apple/**
18+
- examples/demo-apps/apple_ios/**
1919
- extension/apple/**
2020
- extension/module/**
2121
workflow_dispatch:
22-
# TODO (huydhn): This is used to validate the test spec. Eventually, we need a proper
23-
# perf benchmark workflow like android-perf. This can be cleaned up once that workflow
24-
# is ready
25-
workflow_call:
26-
inputs:
27-
test_spec:
28-
description: The test spec to drive the test on AWS devices
29-
required: false
30-
type: string
3122

3223
concurrency:
3324
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
3425
cancel-in-progress: true
3526

3627
jobs:
3728
build-demo-ios:
38-
name: test-demo-ios
29+
name: build-demo-ios
3930
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
4031
secrets: inherit
4132
with:
@@ -116,7 +107,7 @@ jobs:
116107
# Uploaded to S3 from the previous job
117108
ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/ExecuTorchDemo.ipa
118109
ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/ExecuTorchDemo.xctestrun.zip
119-
test-spec: ${{ inputs.test_spec || 'https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml' }}
110+
test-spec: https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml
120111

121112
build-frameworks-ios:
122113
name: build-frameworks-ios

.github/workflows/upload-android-test-specs.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,18 @@ on:
44
pull_request:
55
paths:
66
- .github/workflows/upload-android-test-specs.yml
7-
- examples/demo-apps/android/LlamaDemo/android-llm-device-farm-test-spec.yml
7+
- extension/android/benchmark/android-llm-device-farm-test-spec.yml
88
push:
99
branches:
1010
- main
1111
paths:
1212
- .github/workflows/upload-android-test-specs.yml
13-
- examples/demo-apps/android/LlamaDemo/android-llm-device-farm-test-spec.yml
13+
- extension/android/benchmark/android-llm-device-farm-test-spec.yml
1414

1515
concurrency:
16-
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
16+
# NB: This concurency group needs to be different than the one used in android-perf, otherwise
17+
# GH complains about concurrency deadlock
18+
group: android-spec-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
1719
cancel-in-progress: true
1820

1921
jobs:
@@ -30,7 +32,7 @@ jobs:
3032
${{ github.repository }}/${{ github.run_id }}/artifact
3133
retention-days: 1
3234
if-no-files-found: error
33-
path: examples/demo-apps/android/LlamaDemo/android-llm-device-farm-test-spec.yml
35+
path: extension/android/benchmark/android-llm-device-farm-test-spec.yml
3436

3537
validate-android-test-spec:
3638
needs: upload-android-test-spec-for-validation
@@ -75,7 +77,7 @@ jobs:
7577
7678
- name: Upload the spec to S3 ossci-android bucket
7779
shell: bash
78-
working-directory: examples/demo-apps/android/LlamaDemo/
80+
working-directory: extension/android/benchmark/
7981
env:
8082
SPEC_FILE: android-llm-device-farm-test-spec.yml
8183
run: |

.github/workflows/upload-apple-test-specs.yml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ on:
1313
- examples/demo-apps/apple_ios/default-ios-device-farm-appium-test-spec.yml
1414

1515
concurrency:
16-
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
16+
# NB: This concurency group needs to be different than the one used in apple-perf, otherwise
17+
# GH complains about concurrency deadlock
18+
group: apple-spec-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
1719
cancel-in-progress: true
1820

1921
jobs:
@@ -32,16 +34,18 @@ jobs:
3234
if-no-files-found: error
3335
path: examples/demo-apps/apple_ios/default-ios-device-farm-appium-test-spec.yml
3436

35-
# TODO (huydhn): An example on how to validate the test spec using the iOS demo app, but we need a proper
36-
# perf benchmark workflow like android-perf
3737
validate-apple-test-spec:
3838
needs: upload-apple-test-spec-for-validation
39-
uses: ./.github/workflows/apple.yml
39+
uses: ./.github/workflows/apple-perf.yml
4040
secrets: inherit
4141
permissions:
4242
id-token: write
4343
contents: read
4444
with:
45+
# Just use a small model here with a minimal amount of configuration to test the spec
46+
models: stories110M
47+
devices: apple_iphone_15
48+
delegates: xnnpack
4549
test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/default-ios-device-farm-appium-test-spec.yml
4650

4751
upload-apple-test-spec:

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF)
198198

199199
option(EXECUTORCH_BUILD_KERNELS_QUANTIZED "Build the quantized kernels" OFF)
200200

201-
option(EXECUTORCH_BUILD_SDK "Build the ExecuTorch SDK")
201+
option(EXECUTORCH_BUILD_SDK "Build the ExecuTorch Developer Tools")
202202

203203
option(EXECUTORCH_BUILD_SIZE_TEST "Build the size test" OFF)
204204

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ Key value propositions of ExecuTorch are:
1010
- **Portability:** Compatibility with a wide variety of computing platforms,
1111
from high-end mobile phones to highly constrained embedded systems and
1212
microcontrollers.
13-
- **Productivity:** Enabling developers to use the same toolchains and SDK from
14-
PyTorch model authoring and conversion, to debugging and deployment to a wide
15-
variety of platforms.
13+
- **Productivity:** Enabling developers to use the same toolchains and Developer
14+
Tools from PyTorch model authoring and conversion, to debugging and deployment
15+
to a wide variety of platforms.
1616
- **Performance:** Providing end users with a seamless and high-performance
1717
experience due to a lightweight runtime and utilizing full hardware
1818
capabilities such as CPUs, NPUs, and DSPs.

backends/apple/coreml/compiler/coreml_preprocess.py

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# CoreML backend for delegating a EdgeProgram to CoreML.
44

55
import json
6+
import logging
67

78
import shutil
89
import uuid
@@ -14,6 +15,7 @@
1415
from typing import Any, Dict, final, List, Optional, Tuple
1516

1617
import coremltools as ct
18+
import coremltools.optimize as cto
1719
import executorchcoreml
1820

1921
from executorch.exir.backend.backend_details import (
@@ -23,12 +25,16 @@
2325
)
2426
from executorch.exir.backend.compile_spec_schema import CompileSpec
2527

28+
logger = logging.getLogger(__name__)
29+
logger.setLevel(logging.WARNING)
30+
2631

2732
class COMPILE_SPEC_KEYS(Enum):
2833
COMPUTE_UNITS = "compute_units"
2934
MODEL_TYPE = "model_type"
3035
MIN_DEPLOYMENT_TARGET = "min_deployment_target"
3136
MODEL_COMPUTE_PRECISION = "model_compute_precision"
37+
OP_LINEAR_QUANTIZER_CONFIG = "op_linear_quantizer_config"
3238

3339

3440
class MODEL_PATHS(Enum):
@@ -169,12 +175,44 @@ def generate_compute_unit_compile_spec(
169175
compute_unit.name.lower().encode("utf-8"),
170176
)
171177

178+
@staticmethod
179+
def generate_op_linear_quantizer_config_compile_spec(
180+
op_linear_quantizer_config: Dict,
181+
) -> CompileSpec:
182+
"""
183+
Returns the compile spec representing the model post conversion quantization,
184+
which is a dict that will construct cto.coreml.OpLinearQuantizerConfig
185+
"""
186+
str_representation = json.dumps(op_linear_quantizer_config)
187+
byte_representation = str_representation.encode("utf-8")
188+
return CompileSpec(
189+
COMPILE_SPEC_KEYS.OP_LINEAR_QUANTIZER_CONFIG.value,
190+
byte_representation,
191+
)
192+
193+
@staticmethod
194+
def op_linear_quantizer_config_from_compile_specs(
195+
compile_specs: List[CompileSpec],
196+
) -> cto.coreml.OpLinearQuantizerConfig:
197+
"""
198+
Returns the model's post conversion quantization by parsing the list of compile specs.
199+
"""
200+
for compile_spec in compile_specs:
201+
if compile_spec.key == COMPILE_SPEC_KEYS.OP_LINEAR_QUANTIZER_CONFIG.value:
202+
config_dict_str = compile_spec.value.decode("utf-8")
203+
config_dict = json.loads(config_dict_str)
204+
config = cto.coreml.OpLinearQuantizerConfig._from_dict(config_dict)
205+
return config
206+
207+
return None
208+
172209
@staticmethod
173210
def generate_compile_specs(
174211
compute_unit: ct.ComputeUnit = ct.ComputeUnit.ALL,
175212
minimum_deployment_target: ct.target = ct.target.iOS15,
176213
compute_precision: ct.precision = ct.precision.FLOAT16,
177214
model_type: MODEL_TYPE = MODEL_TYPE.MODEL,
215+
op_linear_quantizer_config: Optional[Dict] = None,
178216
) -> List[CompileSpec]:
179217
"""
180218
Returns the list of compile specs that's used by CoreMLBackend to lower the module.
@@ -192,6 +230,12 @@ def generate_compile_specs(
192230
CoreMLBackend.generate_compute_precision_compile_spec(compute_precision)
193231
)
194232
compile_specs.append(CoreMLBackend.generate_model_type_compile_spec(model_type))
233+
if op_linear_quantizer_config is not None:
234+
compile_specs.append(
235+
CoreMLBackend.generate_op_linear_quantizer_config_compile_spec(
236+
op_linear_quantizer_config
237+
)
238+
)
195239

196240
return compile_specs
197241

@@ -368,18 +412,18 @@ def preprocess(
368412
compile_specs,
369413
)
370414
)
371-
372415
model_compute_precision: ct.precision = (
373416
CoreMLBackend.model_compute_precision_from_compile_specs(compile_specs)
374417
)
375-
376418
minimum_deployment_target: ct.target = (
377419
CoreMLBackend.min_deployment_target_from_compile_specs(compile_specs)
378420
)
379-
380421
compute_units: ct.ComputeUnit = CoreMLBackend.compute_unit_from_compile_specs(
381422
compile_specs
382423
)
424+
op_linear_quantizer_config = (
425+
CoreMLBackend.op_linear_quantizer_config_from_compile_specs(compile_specs)
426+
)
383427

384428
mlmodel = ct.convert(
385429
model=edge_program,
@@ -392,4 +436,15 @@ def preprocess(
392436
compute_units=compute_units,
393437
)
394438

439+
if op_linear_quantizer_config is not None:
440+
logger.warning(
441+
"Core ML Backend op_linear_quantizer_config API is experimental"
442+
)
443+
config = cto.coreml.OptimizationConfig(
444+
global_config=op_linear_quantizer_config,
445+
# skip embedding
446+
op_type_configs={"gather": None},
447+
)
448+
mlmodel = cto.coreml.linear_quantize_weights(mlmodel, config=config)
449+
395450
return CoreMLBackend.preprocess_model(mlmodel, model_type=model_type)

backends/apple/coreml/runtime/delegate/ETCoreMLModel.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,6 @@
88
#import <CoreML/CoreML.h>
99
#import <vector>
1010

11-
#if !defined(MODEL_STATE_IS_SUPPORTED) && __has_include(<CoreML/MLModel+MLState.h>)
12-
#define MODEL_STATE_IS_SUPPORTED 1
13-
#endif
14-
1511
NS_ASSUME_NONNULL_BEGIN
1612

1713
@class ETCoreMLAsset;
@@ -45,7 +41,7 @@ __attribute__((objc_subclassing_restricted))
4541
@property (strong, readonly, nonatomic) MLModel* mlModel;
4642

4743
/// The model state.
48-
@property (strong, readonly, nonatomic) id state API_AVAILABLE(macos(15.0), ios(18.0), tvos(18.0), watchos(11.0));
44+
@property (strong, readonly, nonatomic, nullable) id state;
4945

5046
/// The asset from which the model is loaded.
5147
@property (strong, readonly, nonatomic) ETCoreMLAsset* asset;

0 commit comments

Comments
 (0)