pytorch
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/apple-perf.yml‎
Lines changed: 37 additions & 13 deletions b/‎.github/workflows/apple-perf.yml‎
Lines changed: 37 additions & 13 deletions
diff --git a/‎.github/workflows/apple.yml‎
Lines changed: 3 additions & 12 deletions b/‎.github/workflows/apple.yml‎
Lines changed: 3 additions & 12 deletions
diff --git a/‎.github/workflows/upload-android-test-specs.yml‎
Lines changed: 7 additions & 5 deletions b/‎.github/workflows/upload-android-test-specs.yml‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎.github/workflows/upload-apple-test-specs.yml‎
Lines changed: 8 additions & 4 deletions b/‎.github/workflows/upload-apple-test-specs.yml‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 3 additions & 3 deletions b/‎README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 58 additions & 3 deletions b/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 58 additions & 3 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLModel.h‎
Lines changed: 1 addition & 5 deletions b/‎backends/apple/coreml/runtime/delegate/ETCoreMLModel.h‎
Lines changed: 1 addition & 5 deletions
@@ -292,8 +292,8 @@ jobs:
       # Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
       # It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
       # one app+flavor that could load and run the model.
-      android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug.apk
-      android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug-androidTest.apk
+      android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/minibench/app-debug.apk
+      android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/minibench/app-debug-androidTest.apk
       # NB: Need to set the default spec here so that it works for periodic too
       test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
       # Uploaded to S3 from the previous job
 
@@ -204,22 +204,19 @@ jobs:
           if-no-files-found: ignore
           path: ${{ runner.temp }}/artifacts/
 
-  build-llm-demo:
-    name: build-llm-demo
+  build-benchmark-app:
+    name: build-benchmark-app
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     needs:
       - set-parameters
     secrets: inherit
-    strategy:
-      matrix:
-          tokenizer: [bpe]
     with:
       runner: macos-latest-xlarge
       python-version: '3.11'
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       upload-artifact: ios-apps
-      secrets-env: BUILD_CERTIFICATE_BASE64 BUILD_PROVISION_PROFILE_BASE64 KEYCHAIN_PASSWORD
+      secrets-env: BUILD_CERTIFICATE_BASE64 EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64 KEYCHAIN_PASSWORD
       timeout: 90
       script: |
         set -eux
@@ -234,7 +231,7 @@ jobs:
         export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
 
         # Setup Apple certificate for iOS development
-        BUILD_PROVISION_PROFILE_BASE64="${SECRET_BUILD_PROVISION_PROFILE_BASE64}" \
+        BUILD_PROVISION_PROFILE_BASE64="${SECRET_EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64}" \
         BUILD_CERTIFICATE_BASE64="${SECRET_BUILD_CERTIFICATE_BASE64}" \
         KEYCHAIN_PASSWORD="${SECRET_KEYCHAIN_PASSWORD}" \
         .ci/scripts/setup-ios.sh
@@ -248,11 +245,38 @@ jobs:
           backends/apple/mps/install_requirements.sh
         echo "::endgroup::"
 
+        echo "::group::Build ExecuTorch iOS frameworks"
+        FRAMEWORKS=(
+          "executorch"
+          "backend_coreml"
+          "backend_mps"
+          "backend_xnnpack"
+          "kernels_custom"
+          "kernels_optimized"
+          "kernels_portable"
+          "kernels_quantized"
+        )
+
+        # Build Release iOS Frameworks
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
+          build/build_apple_frameworks.sh --coreml --custom --mps --optimized --portable --quantized --xnnpack
+
+        mkdir -p extension/apple/Benchmark/Frameworks
+        for FRAMEWORK in "${FRAMEWORKS[@]}"; do (
+          cp -r "cmake-out/${FRAMEWORK}.xcframework" extension/apple/Benchmark/Frameworks/
+        ) done
+        echo "::endgroup::"
+
+        # NB: Although exported models can be copied to this directory and bundled together with the
+        # app, we don't use this in CI and rely on AWS extra data parameter to make the model and the
+        # tokenizer available to the benchmark. This decouples the app and the model. We just need to
+        # create the directory here to pass the build
+        mkdir -p extension/apple/Benchmark/Models
         ${CONDA_RUN} --no-capture-output \
-          build/build_apple_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
+          build/build_apple_llm_demo.sh ${ARTIFACTS_DIR_NAME}
 
-  upload-ios-apps:
-    needs: build-llm-demo
+  upload-benchmark-app:
+    needs: build-benchmark-app
     runs-on: linux.2xlarge
     steps:
       - name: Download the apps from GitHub
@@ -281,7 +305,7 @@ jobs:
   benchmark-on-device:
     needs:
       - set-parameters
-      - upload-ios-apps
+      - upload-benchmark-app
       - upload-models
     permissions:
       id-token: write
@@ -302,7 +326,7 @@ jobs:
       project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
       device-pool-arn: ${{ matrix.device }}
       # Uploaded to S3 from the previous job
-      ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/LLaMAPerfBenchmark.ipa
-      ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/LLaMAPerfBenchmark.xctestrun.zip
+      ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/Benchmark.ipa
+      ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/Benchmark.xctestrun.zip
       test-spec: ${{ inputs.test_spec || 'https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml' }}
       extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
@@ -15,27 +15,18 @@ on:
       - build/build_apple_frameworks.sh
       - build/create_frameworks.sh
       - build/test_ios_ci.sh
-      - examples/demo-apps/apple/**
+      - examples/demo-apps/apple_ios/**
       - extension/apple/**
       - extension/module/**
   workflow_dispatch:
-  # TODO (huydhn): This is used to validate the test spec. Eventually, we need a proper
-  # perf benchmark workflow like android-perf. This can be cleaned up once that workflow
-  # is ready
-  workflow_call:
-    inputs:
-      test_spec:
-        description: The test spec to drive the test on AWS devices
-        required: false
-        type: string
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
   cancel-in-progress: true
 
 jobs:
   build-demo-ios:
-    name: test-demo-ios
+    name: build-demo-ios
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     secrets: inherit
     with:
@@ -116,7 +107,7 @@ jobs:
       # Uploaded to S3 from the previous job
       ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/ExecuTorchDemo.ipa
       ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/ExecuTorchDemo.xctestrun.zip
-      test-spec: ${{ inputs.test_spec || 'https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml' }}
+      test-spec: https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml
 
   build-frameworks-ios:
     name: build-frameworks-ios
 
@@ -4,16 +4,18 @@ on:
   pull_request:
     paths:
       - .github/workflows/upload-android-test-specs.yml
-      - examples/demo-apps/android/LlamaDemo/android-llm-device-farm-test-spec.yml
+      - extension/android/benchmark/android-llm-device-farm-test-spec.yml
   push:
     branches:
       - main
     paths:
       - .github/workflows/upload-android-test-specs.yml
-      - examples/demo-apps/android/LlamaDemo/android-llm-device-farm-test-spec.yml
+      - extension/android/benchmark/android-llm-device-farm-test-spec.yml
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  # NB: This concurency group needs to be different than the one used in android-perf, otherwise
+  # GH complains about concurrency deadlock
+  group: android-spec-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
   cancel-in-progress: true
 
 jobs:
@@ -30,7 +32,7 @@ jobs:
             ${{ github.repository }}/${{ github.run_id }}/artifact
           retention-days: 1
           if-no-files-found: error
-          path: examples/demo-apps/android/LlamaDemo/android-llm-device-farm-test-spec.yml
+          path: extension/android/benchmark/android-llm-device-farm-test-spec.yml
 
   validate-android-test-spec:
     needs: upload-android-test-spec-for-validation
@@ -75,7 +77,7 @@ jobs:
 
       - name: Upload the spec to S3 ossci-android bucket
         shell: bash
-        working-directory: examples/demo-apps/android/LlamaDemo/
+        working-directory: extension/android/benchmark/
         env:
           SPEC_FILE: android-llm-device-farm-test-spec.yml
         run: |
 
@@ -13,7 +13,9 @@ on:
       - examples/demo-apps/apple_ios/default-ios-device-farm-appium-test-spec.yml
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  # NB: This concurency group needs to be different than the one used in apple-perf, otherwise
+  # GH complains about concurrency deadlock
+  group: apple-spec-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
   cancel-in-progress: true
 
 jobs:
@@ -32,16 +34,18 @@ jobs:
           if-no-files-found: error
           path: examples/demo-apps/apple_ios/default-ios-device-farm-appium-test-spec.yml
 
-  # TODO (huydhn): An example on how to validate the test spec using the iOS demo app, but we need a proper
-  # perf benchmark workflow like android-perf
   validate-apple-test-spec:
     needs: upload-apple-test-spec-for-validation
-    uses: ./.github/workflows/apple.yml
+    uses: ./.github/workflows/apple-perf.yml
     secrets: inherit
     permissions:
       id-token: write
       contents: read
     with:
+      # Just use a small model here with a minimal amount of configuration to test the spec
+      models: stories110M
+      devices: apple_iphone_15
+      delegates: xnnpack
       test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/default-ios-device-farm-appium-test-spec.yml
 
   upload-apple-test-spec:
 
@@ -198,7 +198,7 @@ option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF)
 
 option(EXECUTORCH_BUILD_KERNELS_QUANTIZED "Build the quantized kernels" OFF)
 
-option(EXECUTORCH_BUILD_SDK "Build the ExecuTorch SDK")
+option(EXECUTORCH_BUILD_SDK "Build the ExecuTorch Developer Tools")
 
 option(EXECUTORCH_BUILD_SIZE_TEST "Build the size test" OFF)
 
 
@@ -10,9 +10,9 @@ Key value propositions of ExecuTorch are:
 - **Portability:** Compatibility with a wide variety of computing platforms,
   from high-end mobile phones to highly constrained embedded systems and
   microcontrollers.
-- **Productivity:** Enabling developers to use the same toolchains and SDK from
-  PyTorch model authoring and conversion, to debugging and deployment to a wide
-  variety of platforms.
+- **Productivity:** Enabling developers to use the same toolchains and Developer
+  Tools from PyTorch model authoring and conversion, to debugging and deployment
+  to a wide variety of platforms.
 - **Performance:** Providing end users with a seamless and high-performance
   experience due to a lightweight runtime and utilizing full hardware
   capabilities such as CPUs, NPUs, and DSPs.
 
@@ -3,6 +3,7 @@
 # CoreML backend for delegating a EdgeProgram to CoreML.
 
 import json
+import logging
 
 import shutil
 import uuid
@@ -14,6 +15,7 @@
 from typing import Any, Dict, final, List, Optional, Tuple
 
 import coremltools as ct
+import coremltools.optimize as cto
 import executorchcoreml
 
 from executorch.exir.backend.backend_details import (
@@ -23,12 +25,16 @@
 )
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.WARNING)
+
 
 class COMPILE_SPEC_KEYS(Enum):
     COMPUTE_UNITS = "compute_units"
     MODEL_TYPE = "model_type"
     MIN_DEPLOYMENT_TARGET = "min_deployment_target"
     MODEL_COMPUTE_PRECISION = "model_compute_precision"
+    OP_LINEAR_QUANTIZER_CONFIG = "op_linear_quantizer_config"
 
 
 class MODEL_PATHS(Enum):
@@ -169,12 +175,44 @@ def generate_compute_unit_compile_spec(
             compute_unit.name.lower().encode("utf-8"),
         )
 
+    @staticmethod
+    def generate_op_linear_quantizer_config_compile_spec(
+        op_linear_quantizer_config: Dict,
+    ) -> CompileSpec:
+        """
+        Returns the compile spec representing the model post conversion quantization,
+        which is a dict that will construct cto.coreml.OpLinearQuantizerConfig
+        """
+        str_representation = json.dumps(op_linear_quantizer_config)
+        byte_representation = str_representation.encode("utf-8")
+        return CompileSpec(
+            COMPILE_SPEC_KEYS.OP_LINEAR_QUANTIZER_CONFIG.value,
+            byte_representation,
+        )
+
+    @staticmethod
+    def op_linear_quantizer_config_from_compile_specs(
+        compile_specs: List[CompileSpec],
+    ) -> cto.coreml.OpLinearQuantizerConfig:
+        """
+        Returns the model's post conversion quantization by parsing the list of compile specs.
+        """
+        for compile_spec in compile_specs:
+            if compile_spec.key == COMPILE_SPEC_KEYS.OP_LINEAR_QUANTIZER_CONFIG.value:
+                config_dict_str = compile_spec.value.decode("utf-8")
+                config_dict = json.loads(config_dict_str)
+                config = cto.coreml.OpLinearQuantizerConfig._from_dict(config_dict)
+                return config
+
+        return None
+
     @staticmethod
     def generate_compile_specs(
         compute_unit: ct.ComputeUnit = ct.ComputeUnit.ALL,
         minimum_deployment_target: ct.target = ct.target.iOS15,
         compute_precision: ct.precision = ct.precision.FLOAT16,
         model_type: MODEL_TYPE = MODEL_TYPE.MODEL,
+        op_linear_quantizer_config: Optional[Dict] = None,
     ) -> List[CompileSpec]:
         """
         Returns the list of compile specs that's used by CoreMLBackend to lower the module.
@@ -192,6 +230,12 @@ def generate_compile_specs(
             CoreMLBackend.generate_compute_precision_compile_spec(compute_precision)
         )
         compile_specs.append(CoreMLBackend.generate_model_type_compile_spec(model_type))
+        if op_linear_quantizer_config is not None:
+            compile_specs.append(
+                CoreMLBackend.generate_op_linear_quantizer_config_compile_spec(
+                    op_linear_quantizer_config
+                )
+            )
 
         return compile_specs
 
@@ -368,18 +412,18 @@ def preprocess(
                 compile_specs,
             )
         )
-
         model_compute_precision: ct.precision = (
             CoreMLBackend.model_compute_precision_from_compile_specs(compile_specs)
         )
-
         minimum_deployment_target: ct.target = (
             CoreMLBackend.min_deployment_target_from_compile_specs(compile_specs)
         )
-
         compute_units: ct.ComputeUnit = CoreMLBackend.compute_unit_from_compile_specs(
             compile_specs
         )
+        op_linear_quantizer_config = (
+            CoreMLBackend.op_linear_quantizer_config_from_compile_specs(compile_specs)
+        )
 
         mlmodel = ct.convert(
             model=edge_program,
@@ -392,4 +436,15 @@ def preprocess(
             compute_units=compute_units,
         )
 
+        if op_linear_quantizer_config is not None:
+            logger.warning(
+                "Core ML Backend op_linear_quantizer_config API is experimental"
+            )
+            config = cto.coreml.OptimizationConfig(
+                global_config=op_linear_quantizer_config,
+                # skip embedding
+                op_type_configs={"gather": None},
+            )
+            mlmodel = cto.coreml.linear_quantize_weights(mlmodel, config=config)
+
         return CoreMLBackend.preprocess_model(mlmodel, model_type=model_type)
@@ -8,10 +8,6 @@
 #import <CoreML/CoreML.h>
 #import <vector>
 
-#if !defined(MODEL_STATE_IS_SUPPORTED) && __has_include(<CoreML/MLModel+MLState.h>)
-#define MODEL_STATE_IS_SUPPORTED 1
-#endif
-
 NS_ASSUME_NONNULL_BEGIN
 
 @class ETCoreMLAsset;
@@ -45,7 +41,7 @@ __attribute__((objc_subclassing_restricted))
 @property (strong, readonly, nonatomic) MLModel* mlModel;
 
 /// The model state.
-@property (strong, readonly, nonatomic) id state API_AVAILABLE(macos(15.0), ios(18.0), tvos(18.0), watchos(11.0));
+@property (strong, readonly, nonatomic, nullable) id state;
 
 /// The asset from which the model is loaded.
 @property (strong, readonly, nonatomic) ETCoreMLAsset* asset;