diff --git a/.buckconfig b/.buckconfig
index 8ccb8ef4aae..6aaf7221b3e 100644
--- a/.buckconfig
+++ b/.buckconfig
@@ -23,3 +23,14 @@
[parser]
target_platform_detector_spec = target:root//...->prelude//platforms:default target:shim//...->prelude//platforms:default
+
+# Limit the number of files that the buck daemon needs to monitor. If every
+# submodule is cloned recursively, some system can fail to build with "OS file
+# watch limit reached".
+[project]
+ ignore = \
+ .git, \
+ **/.git, \
+ third-party/pytorch/third_party, \
+ cmake-out, \
+ pip-out
diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
index 44632703e32..1fcaede5ad1 100644
--- a/.ci/docker/ci_commit_pins/pytorch.txt
+++ b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-0a038cf0cff2d071b7359ac0491fd2ba7798a438
+release/2.3
diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py
index ef65b6f9b42..717eff6157d 100755
--- a/.ci/scripts/gather_test_models.py
+++ b/.ci/scripts/gather_test_models.py
@@ -23,7 +23,7 @@
"w2l": "linux.12xlarge",
"ic4": "linux.12xlarge",
"resnet50": "linux.12xlarge",
- "llava_encoder": "linux.4xlarge",
+ "llava_encoder": "linux.12xlarge",
# This one causes timeout on smaller runner, the root cause is unclear (T161064121)
"dl3": "linux.12xlarge",
"emformer_join": "linux.12xlarge",
diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh
index c7c00be2574..6b543c15267 100644
--- a/.ci/scripts/utils.sh
+++ b/.ci/scripts/utils.sh
@@ -18,8 +18,11 @@ retry () {
install_executorch() {
which pip
# Install executorch, this assumes that Executorch is checked out in the
- # current directory
- pip install . --no-build-isolation -v
+ # current directory. The --extra-index-url options tell pip to look on the
+ # pytorch servers for nightly and pre-release versions of torch packages.
+ pip install . --no-build-isolation -v \
+ --extra-index-url https://download.pytorch.org/whl/test/cpu \
+ --extra-index-url https://download.pytorch.org/whl/nightly/cpu
# Just print out the list of packages for debugging
pip list
}
diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml
index c36c5861168..81a4bd60e9e 100644
--- a/.github/workflows/_unittest.yml
+++ b/.github/workflows/_unittest.yml
@@ -14,7 +14,7 @@ on:
jobs:
linux:
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
strategy:
matrix:
include:
@@ -44,7 +44,7 @@ jobs:
pytest -n auto --cov=./ --cov-report=xml
macos:
- uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.3
strategy:
matrix:
include:
diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml
index 0d8931cf102..f08aeede385 100644
--- a/.github/workflows/android.yml
+++ b/.github/workflows/android.yml
@@ -10,7 +10,8 @@ on:
- .ci/docker/**
- .github/workflows/android.yml
- install_requirements.sh
- - examples/demo-apps/**
+ - examples/demo-apps/android/**
+ - extension/android/**
- extension/module/**
workflow_dispatch:
@@ -33,6 +34,7 @@ jobs:
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
+ upload-artifact: android-apps
script: |
set -eux
@@ -45,3 +47,44 @@ jobs:
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
# Build Android demo app
bash build/test_android_ci.sh
+
+ mkdir -p artifacts-to-be-uploaded
+ mkdir -p artifacts-to-be-uploaded/arm64-v8a/
+ mkdir -p artifacts-to-be-uploaded/x86_64/
+ # Copy the jar to S3
+ cp extension/android/build/libs/executorch.jar artifacts-to-be-uploaded/
+ # Copy the app to S3
+ cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/debug/*.apk artifacts-to-be-uploaded/
+ # Also copy the libraries
+ cp cmake-out-android-arm64-v8a/lib/*.a artifacts-to-be-uploaded/arm64-v8a/
+ cp cmake-out-android-arm64-v8a/extension/android/*.so artifacts-to-be-uploaded/arm64-v8a/
+ cp cmake-out-android-x86_64/lib/*.a artifacts-to-be-uploaded/x86_64/
+ cp cmake-out-android-x86_64/extension/android/*.so artifacts-to-be-uploaded/x86_64/
+
+ # Upload the app and its test suite to S3 so that they can be downloaded by the test job
+ upload-artifacts:
+ needs: test-demo-android
+ runs-on: linux.2xlarge
+ steps:
+ - name: Download the artifacts
+ uses: actions/download-artifact@v3
+ with:
+ # The name here needs to match the name of the upload-artifact parameter
+ name: android-apps
+ path: ${{ runner.temp }}/artifacts/
+
+ - name: Verify the artifacts
+ shell: bash
+ working-directory: ${{ runner.temp }}/artifacts/
+ run: |
+ ls -lah ./
+
+ - name: Upload the artifacts to S3
+ uses: seemethere/upload-artifact-s3@v5
+ with:
+ s3-bucket: gha-artifacts
+ s3-prefix: |
+ ${{ github.repository }}/${{ github.run_id }}/artifact
+ retention-days: 14
+ if-no-files-found: ignore
+ path: ${{ runner.temp }}/artifacts/
diff --git a/.github/workflows/apple.yml b/.github/workflows/apple.yml
index 06aa6a66e98..667ddb500d3 100644
--- a/.github/workflows/apple.yml
+++ b/.github/workflows/apple.yml
@@ -26,7 +26,7 @@ concurrency:
jobs:
test-demo-ios:
name: test-demo-ios
- uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.3
with:
runner: macos-latest-xlarge
python-version: '3.11'
@@ -52,7 +52,7 @@ jobs:
build-frameworks-ios:
name: build-frameworks-ios
- uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.3
with:
runner: macos-latest-xlarge
python-version: '3.11'
@@ -64,7 +64,7 @@ jobs:
WORKSPACE=$(pwd)
pushd "${WORKSPACE}/pytorch/executorch"
BUILD_TOOL=cmake
- VERSION="0.1.0"
+ VERSION="0.2.0"
FRAMEWORKS=(
"executorch"
"coreml_backend"
@@ -137,8 +137,8 @@ jobs:
# NB: The name here needs to match the upload-artifact name from build-frameworks-ios job
name: executorch-frameworks-ios
path: ${{ runner.temp }}/frameworks-ios/
- - name: Only push to S3 when running the workflow manually from main branch
- if: ${{ github.event_name == 'workflow_dispatch' && github.ref == 'refs/heads/main' }}
+ - name: Only push to S3 when running the workflow manually from release/0.2 branch
+ if: ${{ github.event_name == 'workflow_dispatch' && github.ref == 'refs/heads/release/0.2' }}
shell: bash
run: |
set -eux
diff --git a/.github/workflows/build-wheels-linux.yml b/.github/workflows/build-wheels-linux.yml
index a2f86b219f8..abe680f946e 100644
--- a/.github/workflows/build-wheels-linux.yml
+++ b/.github/workflows/build-wheels-linux.yml
@@ -19,12 +19,12 @@ on:
jobs:
generate-matrix:
- uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
+ uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@release/2.3
with:
package-type: wheel
os: linux
test-infra-repository: pytorch/test-infra
- test-infra-ref: main
+ test-infra-ref: release/2.3
with-cuda: disabled
with-rocm: disabled
@@ -43,13 +43,18 @@ jobs:
smoke-test-script: build/packaging/smoke_test.py
package-name: executorch
name: ${{ matrix.repository }}
- uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
+ uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@release/2.3
with:
repository: ${{ matrix.repository }}
ref: ""
test-infra-repository: pytorch/test-infra
- test-infra-ref: main
+ test-infra-ref: release/2.3
build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
+ # ExecuTorch only needs the first layer of submodules; override the
+ # "recursive" default to do less work, and to give the buck daemon fewer
+ # files to look at.
+ submodules: true
+ env-var-script: build/packaging/env_var_script_linux.sh
pre-script: ${{ matrix.pre-script }}
post-script: ${{ matrix.post-script }}
package-name: ${{ matrix.package-name }}
diff --git a/.github/workflows/build-wheels-m1.yml b/.github/workflows/build-wheels-m1.yml
index dbc74433ff8..0fa451c378e 100644
--- a/.github/workflows/build-wheels-m1.yml
+++ b/.github/workflows/build-wheels-m1.yml
@@ -19,12 +19,12 @@ on:
jobs:
generate-matrix:
- uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
+ uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@release/2.3
with:
package-type: wheel
os: macos-arm64
test-infra-repository: pytorch/test-infra
- test-infra-ref: main
+ test-infra-ref: release/2.3
with-cuda: disabled
with-rocm: disabled
@@ -43,13 +43,19 @@ jobs:
smoke-test-script: build/packaging/smoke_test.py
package-name: executorch
name: ${{ matrix.repository }}
- uses: pytorch/test-infra/.github/workflows/build_wheels_macos.yml@main
+ uses: pytorch/test-infra/.github/workflows/build_wheels_macos.yml@release/2.3
with:
repository: ${{ matrix.repository }}
ref: ""
test-infra-repository: pytorch/test-infra
- test-infra-ref: main
+ test-infra-ref: release/2.3
build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
+ # ExecuTorch only needs the first layer of submodules; override the
+ # "recursive" default to do less work, and to give the buck daemon fewer
+ # files to look at.
+ submodules: true
+ delocate-wheel: false
+ env-var-script: build/packaging/env_var_script_m1.sh
pre-script: ${{ matrix.pre-script }}
post-script: ${{ matrix.post-script }}
package-name: ${{ matrix.package-name }}
diff --git a/.github/workflows/doc-build.yml b/.github/workflows/doc-build.yml
index ee5cfb859b3..b243d4ffa02 100644
--- a/.github/workflows/doc-build.yml
+++ b/.github/workflows/doc-build.yml
@@ -14,7 +14,7 @@ on:
jobs:
build:
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
strategy:
matrix:
include:
@@ -46,13 +46,9 @@ jobs:
# ET_VERSION_DOCS will be pulled during the doc build to add to the version dropdown
# on the website. See docs/source/conf.py for details
- REF_TYPE=${{ github.ref_type }}
- REF_NAME=${{ github.ref_name }}
-
- echo "$REF_TYPE"
- echo "$REF_NAME"
-
- ET_VERSION_DOCS="${REF_NAME}"
+ GITHUB_REF=${{ github.ref }}
+ echo "$GITHUB_REF"
+ ET_VERSION_DOCS="${GITHUB_REF}"
echo "$ET_VERSION_DOCS"
set -eux
@@ -68,26 +64,24 @@ jobs:
make html
cd ..
+ # If it's main branch, add noindex tag to all .html files to exclude from Google Search indexing.
+ echo "GitHub Ref: ${GITHUB_REF}"
+ if [[ "${{ github.ref }}" == 'refs/heads/main' ]]; then
+ find docs/_build/html/ -name "*.html" -print0 | xargs -0 sed -i '/
/a \ \ ';
+ fi
+
cp -rf docs/_build/html/* "${RUNNER_DOCS_DIR}"
mv docs/_build/html "${RUNNER_ARTIFACT_DIR}"
ls -R "${RUNNER_ARTIFACT_DIR}"/*/*.html
-# Enable preview later. Previews are available publicly
-#
-# upload-preview:
-# if: github.repository == 'pytorch/executorch' && github.event_name == 'push' &&
-# (github.ref_type == 'branch' && github.ref_name == 'main')
-# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-
upload-gh-pages:
needs: build
- if: github.repository == 'pytorch/executorch' && github.event_name == 'push' &&
- ((github.ref_type == 'branch' && github.ref_name == 'main') || github.ref_type == 'tag')
+ if: github.repository == 'pytorch/executorch' && github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/v'))
permissions:
contents: write
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
with:
repository: pytorch/executorch
download-artifact: docs
@@ -96,22 +90,17 @@ jobs:
script: |
set -euo pipefail
- REF_TYPE=${{ github.ref_type }}
- REF_NAME=${{ github.ref_name }}
-
- # If building for a release tag, branch, set the branch/tag name
- # as the target folder in the gh-pages branch. The artifacts created
- # during the build will be copied over to the target dir in the
- # gh-pages branch.
- if [[ "${REF_TYPE}" == branch ]]; then
- TARGET_FOLDER="${REF_NAME}"
- elif [[ "${REF_TYPE}" == tag ]]; then
- # Strip the leading "v" as well as the trailing patch version and "-rc" suffix.
- # For example: 'v0.1.2' -> '0.1' and 'v0.1.2-rc1' -> 0.1.
- TARGET_FOLDER=$(echo "${REF_NAME}" | sed 's/^v//i; s/-rc[0-9]*$//; s/\.[0-9]*$//')
+ # Get github.ref for the output doc folder. By default "main"
+ # If matches a tag like refs/tags/v1.12.0-rc3 or
+ # refs/tags/v1.12.0 convert to 1.12
+ GITHUB_REF=${{ github.ref }}
+
+ # Convert refs/tags/v1.12.0rc3 into 1.12.
+ # Adopted from https://github.com/pytorch/pytorch/blob/main/.github/workflows/_docs.yml#L150C11-L155C13
+ if [[ "${GITHUB_REF}" =~ ^refs/tags/v([0-9]+\\.[0-9]+)\\. ]]; then
+ TARGET_FOLDER="${BASH_REMATCH[1]}"
else
- echo "ERROR: Invalid REF_TYPE: ${REF_TYPE}. Expected 'branch' or 'tag'."
- exit 1
+ TARGET_FOLDER="main"
fi
echo "Target Folder: ${TARGET_FOLDER}"
@@ -122,12 +111,6 @@ jobs:
mv "${RUNNER_ARTIFACT_DIR}"/html/* "${TARGET_FOLDER}"
git add "${TARGET_FOLDER}" || true
- # If it's main branch, add noindex tag to all .html files to exclude from Google Search indexing.
- if [[ "${REF_NAME}" == 'main' ]]; then
- find "${TARGET_FOLDER}" -type f -name "*.html" -exec sed -i '//a ' {} \;
- git add "${TARGET_FOLDER}"/**/*.html || true
- fi
-
git config user.name 'pytorchbot'
git config user.email 'soumith+bot@pytorch.org'
git commit -m "Auto-generating sphinx docs" || true
diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml
index f773f3aca88..6cf6e0495b3 100644
--- a/.github/workflows/docker-builds.yml
+++ b/.github/workflows/docker-builds.yml
@@ -50,7 +50,7 @@ jobs:
mkdir "${GITHUB_WORKSPACE}"
- name: Setup SSH (Click me for login details)
- uses: pytorch/test-infra/.github/actions/setup-ssh@main
+ uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.3
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
@@ -58,11 +58,11 @@ jobs:
uses: actions/checkout@v3
- name: Setup Linux
- uses: pytorch/test-infra/.github/actions/setup-linux@main
+ uses: pytorch/test-infra/.github/actions/setup-linux@release/2.3
- name: Build docker image
id: build-docker-image
- uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+ uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.3
with:
docker-image-name: ${{ matrix.docker-image-name }}
always-rebuild: true
@@ -70,5 +70,5 @@ jobs:
force-push: true
- name: Teardown Linux
- uses: pytorch/test-infra/.github/actions/teardown-linux@main
+ uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.3
if: always()
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 7cb2cf69b8b..a47f38d1b86 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -16,7 +16,7 @@ concurrency:
jobs:
lintrunner:
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-linter
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index f8ffd41d214..efa3ed6f540 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -21,12 +21,12 @@ jobs:
environment: ${{ (github.event_name == 'schedule') && 'update-commit-hash' || '' }}
steps:
- name: update-pytorch-commit-hash
- uses: pytorch/test-infra/.github/actions/update-commit-hash@main
+ uses: pytorch/test-infra/.github/actions/update-commit-hash@release/2.3
if: ${{ github.event_name == 'schedule' }}
with:
repo-name: pytorch
branch: main
pin-folder: .ci/docker/ci_commit_pins
- test-infra-ref: main
+ test-infra-ref: release/2.3
updatebot-token: ${{ secrets.UPDATEBOT_TOKEN }}
pytorchbot-token: ${{ secrets.GH_PYTORCHBOT_TOKEN }}
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 9751b906cd8..6b3a25d89c8 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -33,7 +33,7 @@ jobs:
test-setup-linux-gcc:
name: test-setup-linux-gcc
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
strategy:
matrix:
include:
@@ -58,7 +58,7 @@ jobs:
test-models-linux:
name: test-models-linux
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
needs: gather-models
strategy:
matrix: ${{ fromJSON(needs.gather-models.outputs.models) }}
@@ -85,7 +85,7 @@ jobs:
test-llama-runner-linux:
name: test-llama-runner-linux
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
strategy:
matrix:
dtype: [fp32]
@@ -139,7 +139,7 @@ jobs:
test-custom-ops-linux:
name: test-custom-ops-linux
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
strategy:
matrix:
include:
@@ -164,7 +164,7 @@ jobs:
test-selective-build-linux:
name: test-selective-build-linux
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
strategy:
matrix:
include:
@@ -189,7 +189,7 @@ jobs:
test-quantized-aot-lib-linux:
name: test-quantized-aot-lib-linux
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
strategy:
matrix:
include:
@@ -212,7 +212,7 @@ jobs:
test-pybind-build-linux:
name: test-pybind-build-linux
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
strategy:
matrix:
include:
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 16ed6a27577..a21e02a468c 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -34,7 +34,7 @@ jobs:
test-models-macos:
name: test-models-macos
- uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.3
needs: gather-models
strategy:
matrix: ${{ fromJSON(needs.gather-models.outputs.models) }}
@@ -63,7 +63,7 @@ jobs:
test-custom-ops-macos:
name: test-custom-ops-macos
- uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.3
strategy:
matrix:
include:
@@ -89,7 +89,7 @@ jobs:
test-selective-build-macos:
name: test-selective-build-macos
- uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.3
strategy:
matrix:
include:
@@ -115,7 +115,7 @@ jobs:
test-demo-backend-delegation:
name: test-demo-backend-delegation
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
strategy:
matrix:
include:
@@ -139,7 +139,7 @@ jobs:
test-arm-backend-delegation:
name: test-arm-backend-delegation
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -169,7 +169,7 @@ jobs:
test-arm-reference-delegation:
name: test-arm-reference-delegation
- uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.3
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -200,7 +200,7 @@ jobs:
test-coreml-delegate:
name: test-coreml-delegate
- uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.3
with:
runner: macos-13-xlarge
python-version: '3.11'
@@ -222,7 +222,7 @@ jobs:
test-pybind-build-macos:
name: test-pybind-build-macos
- uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.3
strategy:
matrix:
include:
@@ -249,7 +249,7 @@ jobs:
test-llama-runner-macos:
name: test-llama-runner-mac
- uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+ uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.3
strategy:
matrix:
dtype: [fp32]
diff --git a/.github/workflows/update-viablestrict.yml b/.github/workflows/update-viablestrict.yml
index 189a5cf3aa3..9bb89aa2be3 100644
--- a/.github/workflows/update-viablestrict.yml
+++ b/.github/workflows/update-viablestrict.yml
@@ -16,7 +16,7 @@ jobs:
environment: ${{ (github.event_name == 'schedule') && 'update-viable-strict' || '' }}
steps:
- name: Update viable/strict
- uses: pytorch/test-infra/.github/actions/update-viablestrict@main
+ uses: pytorch/test-infra/.github/actions/update-viablestrict@release/2.3
with:
repository: pytorch/executorch
stable-branch: viable/strict
diff --git a/.gitignore b/.gitignore
index 6661daed13e..26a46f23f62 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
buck-out/
cmake-out/
cmake-android-out/
+cmake-out-android/
cmake-ios-out/
ethos-u-scratch/
executorch.egg-info
diff --git a/.swift/coreml_backend/dummy.swift b/.swift/coreml_backend/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/coreml_backend_debug/dummy.swift b/.swift/coreml_backend_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/custom_backend/dummy.swift b/.swift/custom_backend/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/custom_backend_debug/dummy.swift b/.swift/custom_backend_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/executorch/dummy.swift b/.swift/executorch/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/executorch_debug/dummy.swift b/.swift/executorch_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/mps_backend/dummy.swift b/.swift/mps_backend/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/mps_backend_debug/dummy.swift b/.swift/mps_backend_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/optimized_backend/dummy.swift b/.swift/optimized_backend/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/optimized_backend_debug/dummy.swift b/.swift/optimized_backend_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/portable_backend/dummy.swift b/.swift/portable_backend/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/portable_backend_debug/dummy.swift b/.swift/portable_backend_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/quantized_backend/dummy.swift b/.swift/quantized_backend/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/quantized_backend_debug/dummy.swift b/.swift/quantized_backend_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/xnnpack_backend/dummy.swift b/.swift/xnnpack_backend/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/xnnpack_backend_debug/dummy.swift b/.swift/xnnpack_backend_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 46b73f63492..e8f4c93a808 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -352,23 +352,27 @@ add_subdirectory(schema)
# Only contains primitive operators; does not contain portable kernels or other
# full operators. Does not contain any backends.
#
-
-add_library(executorch ${_executorch__srcs})
-target_link_libraries(executorch PRIVATE program_schema)
-target_link_options_shared_lib(executorch)
+add_library(executorch_no_prim_ops ${_executorch_no_prim_ops__srcs})
+target_link_libraries(executorch_no_prim_ops PRIVATE program_schema)
# Check if dl exists for this toolchain and only then link it.
find_library(DL_LIBRARY_EXISTS NAMES dl)
# Check if the library was found
if(DL_LIBRARY_EXISTS)
- target_link_libraries(executorch PRIVATE dl) # For dladdr()
+ target_link_libraries(executorch_no_prim_ops PRIVATE dl) # For dladdr()
endif()
-target_include_directories(executorch PUBLIC ${_common_include_directories})
-target_compile_options(executorch PUBLIC ${_common_compile_options})
+target_include_directories(executorch_no_prim_ops PUBLIC ${_common_include_directories})
+target_compile_options(executorch_no_prim_ops PUBLIC ${_common_compile_options})
if(MAX_KERNEL_NUM)
- target_compile_definitions(executorch
+ target_compile_definitions(executorch_no_prim_ops
PRIVATE MAX_KERNEL_NUM=${MAX_KERNEL_NUM})
endif()
+add_library(executorch ${_executorch__srcs})
+target_link_libraries(executorch PRIVATE executorch_no_prim_ops)
+target_include_directories(executorch PUBLIC ${_common_include_directories})
+target_compile_options(executorch PUBLIC ${_common_compile_options})
+target_link_options_shared_lib(executorch)
+
#
# portable_ops_lib: A library to register core ATen ops using portable kernels,
# see kernels/portable/CMakeLists.txt.
@@ -406,7 +410,7 @@ endif()
# Install `executorch` library as well as `executorch-config.cmake` under
# ${CMAKE_INSTALL_PREFIX}/
install(
- TARGETS executorch
+ TARGETS executorch executorch_no_prim_ops
DESTINATION lib
INCLUDES
DESTINATION ${_common_include_directories})
@@ -523,10 +527,19 @@ if(EXECUTORCH_BUILD_PYBIND)
find_library(TORCH_PYTHON_LIBRARY torch_python
PATHS "${TORCH_INSTALL_PREFIX}/lib")
+ # TODO(larryliu): Fix macOS 2 dylibs having 2 sets of static variables issue
+ if(EXECUTORCH_BUILD_CUSTOM_OPS_AOT AND NOT APPLE)
+ list(APPEND _dep_libs custom_ops_aot_lib)
+ endif()
# compile options for pybind
-
- set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
- -fexceptions)
+ set(_pybind_compile_options
+ -Wno-deprecated-declarations
+ -fPIC
+ -frtti
+ -fexceptions
+ # libtorch is built with the old ABI, so we need to do the same for any
+ # .cpp files that include torch, c10, or ATen targets.
+ -D_GLIBCXX_USE_CXX11_ABI=0)
# util lib
add_library(
util
@@ -540,8 +553,11 @@ if(EXECUTORCH_BUILD_PYBIND)
# pybind portable_lib
pybind11_add_module(portable_lib extension/pybindings/pybindings.cpp)
+ # The actual output file needs a leading underscore so it can coexist with
+ # portable_lib.py in the same python package.
+ set_target_properties(portable_lib PROPERTIES OUTPUT_NAME "_portable_lib")
target_compile_definitions(portable_lib
- PUBLIC EXECUTORCH_PYTHON_MODULE_NAME=portable_lib)
+ PUBLIC EXECUTORCH_PYTHON_MODULE_NAME=_portable_lib)
target_include_directories(portable_lib PRIVATE ${TORCH_INCLUDE_DIRS})
target_compile_options(portable_lib PUBLIC ${_pybind_compile_options})
target_link_libraries(
@@ -557,6 +573,24 @@ if(EXECUTORCH_BUILD_PYBIND)
${PYBIND_LINK_COREML}
${PYBIND_LINK_MPS}
${PYBIND_LINK_XNNPACK})
+ if(APPLE)
+ # pip wheels will need to be able to find the torch libraries. On Linux, the
+ # .so has non-absolute dependencies on libs like "libtorch.so" without
+ # paths; as long as we `import torch` first, those dependencies will work.
+ # But Apple dylibs do not support non-absolute dependencies, so we need to
+ # tell the loader where to look for its libraries. The LC_LOAD_DYLIB entries
+ # for the torch libraries will look like "@rpath/libtorch.dylib", so we can
+ # add an LC_RPATH entry to look in a directory relative to the installed
+ # location of our _portable_lib.so file. To see these LC_* values, run
+ # `otool -l _portable_lib*.so`.
+ set_target_properties(
+ portable_lib
+ PROPERTIES # Assume that this library will be installed in
+ # `site-packages/executorch/extension/pybindings`, and that
+ # the torch libs are in `site-packages/torch/lib`.
+ BUILD_RPATH "@loader_path/../../../torch/lib"
+ INSTALL_RPATH "@loader_path/../../../torch/lib")
+ endif()
install(TARGETS portable_lib
LIBRARY DESTINATION executorch/extension/pybindings)
diff --git a/Package.swift b/Package.swift
deleted file mode 100644
index b0dfec174f2..00000000000
--- a/Package.swift
+++ /dev/null
@@ -1,101 +0,0 @@
-// swift-tools-version:5.9
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-import PackageDescription
-
-let version = "0.1.0"
-let url = "https://ossci-ios.s3.amazonaws.com/executorch/"
-let debug = "_debug"
-let deliverables = [
- "coreml_backend": [
- "sha256": "5bfa35cb5143b4af6840e0e5dd2d40bce93dff331b8eb5798a46274239391a5d",
- "sha256" + debug: "1422019da9000f8ff7be597de9e0e3b2482f99cdaa75c2d179835778647be1a6",
- "frameworks": [
- "Accelerate",
- "CoreML",
- ],
- "libraries": [
- "sqlite3",
- ],
- ],
- "custom_backend": [
- "sha256": "2201a61eaf7e06e1937cb73a469fb36cabc219496ba004b85feb2cc7c10f300d",
- "sha256" + debug: "3eb6eb97bf0641d2305b0f50ff05a8862d7d65e2491cf4aa05ef1d108649f07c",
- ],
- "executorch": [
- "sha256": "2b55cbcff845ab9eaf16a21e520546b2975ef8c55b9e3fbbcc0c375334e40c6f",
- "sha256" + debug: "12933cedff6cf21c9d21668779f8d8af8049646fe7d290787b12227ff7abe4a7",
- ],
- "mps_backend": [
- "sha256": "510d708361b6ea0692ce5aeb638725d6275824b37bbe744aa876fda24cc2bbbf",
- "sha256" + debug: "6a67ba0bf8033f17bd66acb222446df51cd1304e24a4fb2c6d97e15a30fb24f0",
- "frameworks": [
- "Metal",
- "MetalPerformanceShaders",
- "MetalPerformanceShadersGraph",
- ],
- ],
- "optimized_backend": [
- "sha256": "50aaa54901a7cee1059e71cc623f054610406d65ba8fd6edb10b45861be67237",
- "sha256" + debug: "3f43f465727c8705432f4bb69260cc9501c519e5da006fc19ee2ab2ea260d1f0",
- ],
- "portable_backend": [
- "sha256": "964238e92828665aa598c05b2264faab91fb13ce0f42633cc7d5653300af3e9b",
- "sha256" + debug: "d6d85304a4b40f13c9b893e8c264ebdb15307cacf8997494b3818a52e4914b28",
- ],
- "quantized_backend": [
- "sha256": "37d31a319f92e26bab2b7ec5e783a8b14457dee0a4638dcdca1d9e17539ee3fb",
- "sha256" + debug: "6b45f66f60f6106a41e191418c970bf7b0605df73b9815a06441a5f0809b54e6",
- ],
- "xnnpack_backend": [
- "sha256": "03d506243c392e872519ae1335a025ef202319c1db339a753f9d7d74cba226f0",
- "sha256" + debug: "3341e89abc99552a6a5bad360003baed194a83e865338bc07afe9e4f171ea169",
- ],
-].reduce(into: [String: [String: Any]]()) {
- $0[$1.key] = $1.value
- $0[$1.key + debug] = $1.value
-}
-.reduce(into: [String: [String: Any]]()) {
- var newValue = $1.value
- if $1.key.hasSuffix(debug) {
- $1.value.forEach { key, value in
- if key.hasSuffix(debug) {
- newValue[String(key.dropLast(debug.count))] = value
- }
- }
- }
- $0[$1.key] = newValue.filter { key, _ in !key.hasSuffix(debug) }
-}
-
-let package = Package(
- name: "executorch",
- platforms: [
- .iOS(.v15),
- ],
- products: deliverables.keys.map { key in
- .library(name: key, targets: ["\(key)_dependencies"])
- }.sorted { $0.name < $1.name },
- targets: deliverables.flatMap { key, value -> [Target] in
- [
- .binaryTarget(
- name: key,
- url: "\(url)\(key)-\(version).zip",
- checksum: value["sha256"] as? String ?? ""
- ),
- .target(
- name: "\(key)_dependencies",
- dependencies: [.target(name: key)],
- path: ".swift/\(key)",
- linkerSettings:
- (value["frameworks"] as? [String] ?? []).map { .linkedFramework($0) } +
- (value["libraries"] as? [String] ?? []).map { .linkedLibrary($0) }
- ),
- ]
- }
-)
diff --git a/README-wheel.md b/README-wheel.md
new file mode 100644
index 00000000000..ebbaab90a87
--- /dev/null
+++ b/README-wheel.md
@@ -0,0 +1,39 @@
+**ExecuTorch** is a [PyTorch](https://pytorch.org/) platform that provides
+infrastructure to run PyTorch programs everywhere from AR/VR wearables to
+standard on-device iOS and Android mobile deployments. One of the main goals for
+ExecuTorch is to enable wider customization and deployment capabilities of the
+PyTorch programs.
+
+The `executorch` pip package is in alpha.
+* Required python version: `==3.10`
+* Compatible systems: Linux x86_64, macOS aarch64
+
+The prebuilt `executorch.extension.pybindings.portable_lib` module included in
+this package provides a way to run ExecuTorch `.pte` files, with some
+restrictions:
+* Only [core ATen
+ operators](https://pytorch.org/executorch/stable/ir-ops-set-definition.html)
+ are linked into the prebuilt module
+* Only the [XNNPACK backend
+ delegate](https://pytorch.org/executorch/main/native-delegates-executorch-xnnpack-delegate.html)
+ is linked into the prebuilt module
+
+Please visit the [ExecuTorch website](https://pytorch.org/executorch/) for
+tutorials and documentation. Here are some starting points:
+* [Getting
+ Started](https://pytorch.org/executorch/stable/getting-started-setup.html)
+ * Set up the ExecuTorch environment and run PyTorch models locally.
+* [Working with
+ local LLMs](https://pytorch.org/executorch/stable/llm/getting-started.html)
+ * Learn how to use ExecuTorch to export and accelerate a large-language model
+ from scratch.
+* [Exporting to
+ ExecuTorch](https://pytorch.org/executorch/main/tutorials/export-to-executorch-tutorial.html)
+ * Learn the fundamentals of exporting a PyTorch `nn.Module` to ExecuTorch, and
+ optimizing its performance using quantization and hardware delegation.
+* Running LLaMA on
+ [iOS](https://pytorch.org/executorch/stable/llm/llama-demo-ios.html) and
+ [Android](https://pytorch.org/executorch/stable/llm/llama-demo-android.html)
+ devices.
+ * Build and run LLaMA in a demo mobile app, and learn how to integrate models
+ with your own apps.
diff --git a/backends/apple/coreml/CMakeLists.txt b/backends/apple/coreml/CMakeLists.txt
index f1c19d00ee8..b3d0182999a 100644
--- a/backends/apple/coreml/CMakeLists.txt
+++ b/backends/apple/coreml/CMakeLists.txt
@@ -13,6 +13,8 @@ if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
endif()
+option(COREML_BUILD_EXECUTOR_RUNNER "Build CoreML executor runner." OFF)
+
# inmemoryfs sources
set(INMEMORYFS_SOURCES
runtime/inmemoryfs/inmemory_filesystem.cpp
@@ -144,7 +146,7 @@ target_include_directories(
)
target_link_libraries(
coremldelegate PRIVATE
- executorch
+ executorch_no_prim_ops
)
if(EXECUTORCH_BUILD_SDK)
@@ -174,18 +176,26 @@ find_library(SQLITE_LIBRARY sqlite3)
target_link_libraries(coremldelegate
PRIVATE
- executorch
+ executorch_no_prim_ops
${ACCELERATE_FRAMEWORK}
${COREML_FRAMEWORK}
${FOUNDATION_FRAMEWORK}
${SQLITE_LIBRARY}
)
+if(COREML_BUILD_EXECUTOR_RUNNER)
+target_link_libraries(coremldelegate
+ PRIVATE
+ portable_ops_lib
+ portable_kernels
+)
+endif()
+
target_compile_options(coremldelegate PRIVATE "-fobjc-arc")
target_compile_options(coremldelegate PRIVATE "-fno-exceptions")
if(EXECUTORCH_BUILD_SDK)
-target_compile_options(executorch PUBLIC -DET_EVENT_TRACER_ENABLED)
+target_compile_options(executorch_no_prim_ops PUBLIC -DET_EVENT_TRACER_ENABLED)
target_compile_options(coremldelegate PRIVATE "-frtti")
target_compile_options(libprotobuf-lite PRIVATE "-frtti")
else()
diff --git a/backends/apple/coreml/README.md b/backends/apple/coreml/README.md
index 1710860f87e..4a21d8d8ae1 100644
--- a/backends/apple/coreml/README.md
+++ b/backends/apple/coreml/README.md
@@ -6,54 +6,123 @@ Core ML is an optimized framework for running machine learning models on Apple d
## Layout
- `compiler/` : Lowers a module to Core ML backend.
+- `partition/`: Partitions a module fully or partially to Core ML backend.
+- `quantizer/`: Quantizes a module in Core ML favored scheme.
- `scripts/` : Scripts for installing dependencies and running tests.
- `runtime/`: Core ML delegate runtime implementation.
- `inmemoryfs`: InMemory filesystem implementation used to serialize/de-serialize AOT blob.
- `kvstore`: Persistent Key-Value store implementation.
- `delegate`: Runtime implementation.
- `include` : Public headers.
- - `tests` : Tests for Core ML delegate.
- - `workspace` : Xcode workspace for tests.
+ - `sdk` : SDK implementation.
+ - `tests` : Unit tests.
+ - `workspace` : Xcode workspace for the runtime.
- `third-party/`: External dependencies.
-## Help & Improvements
-If you have problems or questions or have suggestions for ways to make
-implementation and testing better, please create an issue on [github](https://www.github.com/pytorch/executorch/issues).
+## Partition and Delegation
-## Delegation
-
-For delegating the Program to the **Core ML** backend, the client must be responsible for calling `to_backend` with the **CoreMLBackend** tag.
+To delegate a Program to the **Core ML** backend, the client must call `to_backend` with the **CoreMLPartitioner**.
```python
-import executorch.exir as exir
import torch
-
-from executorch.exir.backend.backend_api import to_backend
+import executorch.exir
from executorch.backends.apple.coreml.compiler import CoreMLBackend
+from executorch.backends.apple.coreml.partition.coreml_partitioner import CoreMLPartitioner
-class LowerableSubModel(torch.nn.Module):
+class Model(torch.nn.Module):
def __init__(self):
super().__init__()
def forward(self, x):
return torch.sin(x)
-# Convert the lowerable module to Edge IR Representation
-to_be_lowered = LowerableSubModel()
-example_input = (torch.ones(1), )
-to_be_lowered_exir_submodule = exir.capture(to_be_lowered, example_input).to_edge()
+source_model = Model()
+example_inputs = (torch.ones(1), )
-# Lower to Core ML backend
-lowered_module = to_backend('CoreMLBackend', to_be_lowered_exir_submodule.exported_program, [])
+# Export the source model to Edge IR representation
+aten_program = torch.export.export(source_model, example_inputs)
+edge_program_manager = executorch.exir.to_edge(aten_program)
+
+# Delegate to Core ML backend
+delegated_program_manager = edge_program_manager.to_backend(CoreMLPartitioner())
+
+# Serialize delegated program
+executorch_program = delegated_program_manager.to_executorch()
+with open("model.pte", "wb") as f:
+ f.write(executorch_program.buffer)
```
-Currently, the **Core ML** backend delegates the whole module to **Core ML**. If a specific op is not supported by the **Core ML** backend then the `to_backend` call would throw an exception. We will be adding a **Core ML Partitioner** to resolve the issue.
+The module will be fully or partially delegated to **Core ML**, depending on whether all or part of ops are supported by the **Core ML** backend. User may force skip certain ops by `CoreMLPartitioner(skip_ops_for_coreml_delegation=...)`
+
+The `to_backend` implementation is a thin wrapper over [coremltools](https://apple.github.io/coremltools/docs-guides/), `coremltools` is responsible for converting an **ExportedProgram** to a **MLModel**. The converted **MLModel** data is saved, flattened, and returned as bytes to **ExecuTorch**.
-The `to_backend` implementation is a thin wrapper over `coremltools`, `coremltools` is responsible for converting an **ExportedProgram** to a **MLModel**. The converted **MLModel** data is saved, flattened, and returned as bytes to **ExecuTorch**.
+## Quantization
+
+To quantize a Program in a Core ML favored way, the client may utilize **CoreMLQuantizer**.
+
+```python
+import torch
+import executorch.exir
+
+from torch._export import capture_pre_autograd_graph
+from torch.ao.quantization.quantize_pt2e import (
+ convert_pt2e,
+ prepare_pt2e,
+ prepare_qat_pt2e,
+)
+
+from executorch.backends.apple.coreml.quantizer.coreml_quantizer import CoreMLQuantizer
+from coremltools.optimize.torch.quantization.quantization_config import (
+ LinearQuantizerConfig,
+ QuantizationScheme,
+)
+
+class Model(torch.nn.Module):
+ def __init__(self) -> None:
+ super().__init__()
+ self.conv = torch.nn.Conv2d(
+ in_channels=3, out_channels=16, kernel_size=3, padding=1
+ )
+ self.relu = torch.nn.ReLU()
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ a = self.conv(x)
+ return self.relu(a)
+
+source_model = Model()
+example_inputs = (torch.randn((1, 3, 256, 256)), )
+
+pre_autograd_aten_dialect = capture_pre_autograd_graph(model, example_inputs)
+
+quantization_config = LinearQuantizerConfig.from_dict(
+ {
+ "global_config": {
+ "quantization_scheme": QuantizationScheme.symmetric,
+ "activation_dtype": torch.uint8,
+ "weight_dtype": torch.int8,
+ "weight_per_channel": True,
+ }
+ }
+)
+quantizer = CoreMLQuantizer(quantization_config)
+
+# For post-training quantization, use `prepare_pt2e`
+# For quantization-aware trainin,g use `prepare_qat_pt2e`
+prepared_graph = prepare_pt2e(pre_autograd_aten_dialect, quantizer)
+
+prepared_graph(*example_inputs)
+converted_graph = convert_pt2e(prepared_graph)
+```
+
+The `converted_graph` is the quantized torch model, and can be delegated to **Core ML** similarly through **CoreMLPartitioner**
## Runtime
-To execute a **Core ML** delegated **Program**, the client must link to the `coremldelegate` library. Once linked there are no additional steps required, **ExecuTorch** when running the **Program** would call the **Core ML** runtime to execute the **Core ML** delegated part of the **Program**.
+To execute a Core ML delegated program, the application must link to the `coremldelegate` library. Once linked there are no additional steps required, ExecuTorch when running the program would call the Core ML runtime to execute the Core ML delegated part of the program.
Please follow the instructions described in the [Core ML setup](/backends/apple/coreml/setup.md) to link the `coremldelegate` library.
+
+## Help & Improvements
+If you have problems or questions or have suggestions for ways to make
+implementation and testing better, please create an issue on [github](https://www.github.com/pytorch/executorch/issues).
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm
index da399e80d54..6fe37925d27 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm
@@ -630,7 +630,7 @@ - (NSUInteger)_compact:(NSUInteger)sizeInBytes error:(NSError * __autoreleasing
}
if (_estimatedSizeInBytes <= sizeInBytes) {
- return YES;
+ return _estimatedSizeInBytes;
}
std::error_code ec;
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h
index eab239b496c..78c76fadd04 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h
@@ -27,6 +27,9 @@ __attribute__((objc_subclassing_restricted)) @interface ETCoreMLDefaultModelExec
/// The model.
@property (readonly, strong, nonatomic) ETCoreMLModel* model;
+/// If set to `YES` then output backing are ignored.
+@property (readwrite, atomic) BOOL ignoreOutputBackings;
+
@end
NS_ASSUME_NONNULL_END
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm
index 399c91bd495..57316e28015 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm
@@ -26,6 +26,9 @@ - (instancetype)initWithModel:(ETCoreMLModel *)model {
loggingOptions:(const executorchcoreml::ModelLoggingOptions& __unused)loggingOptions
eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable __unused)eventLogger
error:(NSError * __autoreleasing *)error {
+ if (self.ignoreOutputBackings) {
+ predictionOptions.outputBackings = @{};
+ }
id outputs = [self.model.mlModel predictionFromFeatures:inputs
options:predictionOptions
error:error];
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h b/backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h
index 1a1b10848bb..d9c4d4ef638 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h
@@ -7,6 +7,7 @@
#import
+#import
#import
NS_ASSUME_NONNULL_BEGIN
@@ -48,7 +49,11 @@ typedef NS_ERROR_ENUM(ETCoreMLErrorDomain, ETCoreMLError) {
/// Record the error with `os_log_error` and fills `*errorOut` with `NSError`.
#define ETCoreMLLogErrorAndSetNSError(errorOut, errorCode, formatString, ...) \
- os_log_error(ETCoreMLErrorUtils.loggingChannel, formatString, ##__VA_ARGS__); \
+ if (ET_LOG_ENABLED) { \
+ ET_LOG(Error, "%s", [NSString stringWithFormat:@formatString, ##__VA_ARGS__].UTF8String); \
+ } else { \
+ os_log_error(ETCoreMLErrorUtils.loggingChannel, formatString, ##__VA_ARGS__); \
+ } \
if (errorOut) { \
*errorOut = \
[NSError errorWithDomain:ETCoreMLErrorDomain \
@@ -58,24 +63,31 @@ typedef NS_ERROR_ENUM(ETCoreMLErrorDomain, ETCoreMLError) {
}]; \
}
-/// Record the error and its underlying error with `os_log_error` and fills
-/// `*errorOut` with NSError.
+/// Record the error and its underlying error with `os_log_error` and fills `*errorOut` with `NSError`.
#define ETCoreMLLogUnderlyingErrorAndSetNSError(errorOut, errorCode, underlyingNSError, formatString, ...) \
- os_log_error(ETCoreMLErrorUtils.loggingChannel, \
- formatString ", with underlying error= %@.", \
- ##__VA_ARGS__, \
- (underlyingNSError).localizedDescription); \
+ if (ET_LOG_ENABLED) { \
+ ET_LOG(Error, "%s", [NSString stringWithFormat:@formatString, ##__VA_ARGS__].UTF8String); \
+ } else { \
+ os_log_error(ETCoreMLErrorUtils.loggingChannel, \
+ formatString ", with underlying error= %@.", \
+ ##__VA_ARGS__, \
+ (underlyingNSError).localizedDescription); \
+ } \
if (errorOut) { \
*errorOut = [ETCoreMLErrorUtils errorWithCode:errorCode \
underlyingError:underlyingNSError \
format:@formatString, ##__VA_ARGS__]; \
}
-#define ETCoreMLLogError(error, formatString, ...) \
- os_log_error(ETCoreMLErrorUtils.loggingChannel, \
- formatString ", with error= %@.", \
- ##__VA_ARGS__, \
- (error).localizedDescription);
+#define ETCoreMLLogError(error, formatString, ...) \
+ if (ET_LOG_ENABLED) { \
+ ET_LOG(Error, "%s", [NSString stringWithFormat:@formatString, ##__VA_ARGS__].UTF8String); \
+ } else { \
+ os_log_error(ETCoreMLErrorUtils.loggingChannel, \
+ formatString ", with error= %@.", \
+ ##__VA_ARGS__, \
+ (error).localizedDescription); \
+ }
#pragma clang diagnostic pop
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h
index 0f8a440c858..14c90694464 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h
@@ -6,13 +6,18 @@
// Please refer to the license found in the LICENSE file in the root directory of the source tree.
#import
+#import
NS_ASSUME_NONNULL_BEGIN
@class ETCoreMLAsset;
+namespace executorchcoreml {
+class MultiArray;
+}
+
/// Represents a ML model, the class is a thin wrapper over `MLModel` with additional properties.
-@interface ETCoreMLModel : NSObject
+__attribute__((objc_subclassing_restricted)) @interface ETCoreMLModel : NSObject
- (instancetype)init NS_UNAVAILABLE;
@@ -31,6 +36,12 @@ NS_ASSUME_NONNULL_BEGIN
orderedOutputNames:(NSOrderedSet*)orderedOutputNames
error:(NSError* __autoreleasing*)error NS_DESIGNATED_INITIALIZER;
+- (nullable NSArray*)prepareInputs:(const std::vector&)inputs
+ error:(NSError* __autoreleasing*)error;
+
+- (nullable NSArray*)prepareOutputBackings:(const std::vector&)outputs
+ error:(NSError* __autoreleasing*)error;
+
/// The underlying MLModel.
@property (strong, readonly, nonatomic) MLModel* mlModel;
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm
index 791fb7c03b6..ee7218bd271 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm
@@ -8,6 +8,164 @@
#import
#import
+#import
+#import
+#import
+#import
+
+#pragma mark - ETCoreMLMultiArrayDescriptor
+__attribute__((objc_subclassing_restricted))
+@interface ETCoreMLMultiArrayDescriptor: NSObject
+
+- (instancetype)init NS_UNAVAILABLE;
+
++ (instancetype)new NS_UNAVAILABLE;
+
+- (instancetype)initWithShape:(NSArray *)shape
+ dataType:(MLMultiArrayDataType)dataType NS_DESIGNATED_INITIALIZER;
+
+@property (copy, readonly, nonatomic) NSArray *shape;
+
+@property (assign, readonly, nonatomic) MLMultiArrayDataType dataType;
+
+@end
+
+@implementation ETCoreMLMultiArrayDescriptor
+
+- (instancetype)initWithShape:(NSArray *)shape
+ dataType:(MLMultiArrayDataType)dataType {
+ self = [super init];
+ if (self) {
+ _shape = shape;
+ _dataType = dataType;
+ }
+
+ return self;
+}
+
+- (BOOL)isEqual:(id)object {
+ if (object == self) {
+ return YES;
+ }
+
+ if (![object isKindOfClass:self.class]) {
+ return NO;
+ }
+
+ ETCoreMLMultiArrayDescriptor *other = (ETCoreMLMultiArrayDescriptor *)object;
+ return [self.shape isEqualToArray:other.shape] && self.dataType == other.dataType;
+}
+
+- (NSUInteger)hash {
+ return [self.shape hash] ^ (NSUInteger)self.dataType;
+}
+
+- (instancetype)copyWithZone:(NSZone *)zone {
+ return [[ETCoreMLMultiArrayDescriptor allocWithZone:zone] initWithShape:self.shape
+ dataType:self.dataType];
+}
+
+@end
+
+namespace {
+
+using namespace executorchcoreml;
+
+size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
+ switch (data_type) {
+ case MLMultiArrayDataTypeFloat16: {
+ return 2;
+ }
+ case MLMultiArrayDataTypeFloat32: {
+ return 4;
+ }
+ case MLMultiArrayDataTypeInt32: {
+ return 4;
+ }
+ case MLMultiArrayDataTypeFloat64: {
+ return 8;
+ }
+ default: {
+ return 0;
+ }
+ }
+}
+
+std::vector calculate_strides(const std::vector& shape) {
+ if (shape.size() == 0) {
+ return {};
+ }
+
+ if (shape.size() == 1) {
+ return {1};
+ }
+
+ std::vector strides(shape.size(), 1);
+ size_t product = 1;
+ for (size_t i = shape.size(); i > 0; i--) {
+ strides[i - 1] = product;
+ product *= shape[i - 1];
+ }
+
+ return strides;
+}
+
+MLMultiArray * _Nullable make_ml_multi_array(const std::vector& shape,
+ MLMultiArrayDataType dataType,
+ NSCache *cache,
+ NSError * __autoreleasing *error) {
+ ETCoreMLMultiArrayDescriptor *descriptor = [[ETCoreMLMultiArrayDescriptor alloc] initWithShape:to_array(shape)
+ dataType:dataType];
+ // Check the cache first otherwise allocate a new backing storage.
+ NSMutableData *backing_storage = [cache objectForKey:descriptor];
+ if (backing_storage) {
+ [cache removeObjectForKey:descriptor];
+ } else {
+ size_t n = std::accumulate(shape.cbegin(), shape.cend(), 1, std::multiplies{});
+ backing_storage = [[NSMutableData alloc] initWithLength:n * get_number_of_bytes(dataType)];
+ }
+
+ __weak NSCache *weakCache = cache;
+ // Add the storage back to the cache when it gets deallocated, the next prediction would use the same storage.
+ MLMultiArray *result = [[MLMultiArray alloc] initWithDataPointer:backing_storage.mutableBytes
+ shape:descriptor.shape
+ dataType:descriptor.dataType
+ strides:to_array(calculate_strides(shape))
+ deallocator:^(void * _Nonnull bytes) {[weakCache setObject:backing_storage forKey:descriptor];}
+ error:error];
+
+ return result;
+}
+
+NSDictionary *
+get_multi_array_constraints_by_name(NSDictionary *feature_descriptions) {
+ NSMutableDictionary *result = [NSMutableDictionary dictionaryWithCapacity:feature_descriptions.count];
+ [feature_descriptions enumerateKeysAndObjectsUsingBlock:^(NSString *key, MLFeatureDescription *description, BOOL * _Nonnull stop) {
+ result[key] = description.multiArrayConstraint;
+ }];
+
+ return result;
+}
+
+NSDictionary *get_multi_array_input_constraints_by_name(MLModelDescription *description) {
+ return get_multi_array_constraints_by_name(description.inputDescriptionsByName);
+}
+
+NSDictionary *get_multi_array_output_constraints_by_name(MLModelDescription *description) {
+ return get_multi_array_constraints_by_name(description.outputDescriptionsByName);
+}
+
+}
+
+#pragma mark - ETCoreMLModel
+@interface ETCoreMLModel ()
+
+@property (strong, readonly, nonatomic) NSCache *cache;
+@property (copy, readonly, nonatomic) NSDictionary *inputConstraintsByName;
+@property (copy, readonly, nonatomic) NSDictionary *outputConstraintsByName;
+
+@end
+
@implementation ETCoreMLModel
@@ -33,8 +191,11 @@ - (nullable instancetype)initWithAsset:(ETCoreMLAsset *)asset
_asset = asset;
_orderedInputNames = [orderedInputNames copy];
_orderedOutputNames = [orderedOutputNames copy];
+ _cache = [[NSCache alloc] init];
+ _inputConstraintsByName = get_multi_array_input_constraints_by_name(mlModel.modelDescription);
+ _outputConstraintsByName = get_multi_array_output_constraints_by_name(mlModel.modelDescription);
}
-
+
return self;
}
@@ -42,4 +203,73 @@ - (NSString *)identifier {
return self.asset.identifier;
}
+- (nullable NSArray *)prepareArgs:(const std::vector&)args
+ argNames:(NSOrderedSet *)argNames
+ argConstraintsByName:(NSDictionary *)argConstraintsByName
+ copyData:(const BOOL)copyData
+ error:(NSError * __autoreleasing *)error {
+ NSEnumerator *nameEnumerator = [argNames objectEnumerator];
+ NSMutableArray *result = [NSMutableArray arrayWithCapacity:args.size()];
+ for (const auto& arg : args) {
+ BOOL lCopyData = copyData;
+ NSString *argName = [nameEnumerator nextObject];
+ MLMultiArrayConstraint *constraint = argConstraintsByName[argName];
+ const auto& layout = arg.layout();
+ auto dataType = to_ml_multiarray_data_type(layout.dataType());
+ MLMultiArray *multiArrayArg = nil;
+ if (dataType == constraint.dataType) {
+ // We can use the same data storage.
+ multiArrayArg = [[MLMultiArray alloc] initWithDataPointer:arg.data()
+ shape:to_array(layout.shape())
+ dataType:constraint.dataType
+ strides:to_array(layout.strides())
+ deallocator:^(void * _Nonnull bytes) {}
+ error:error];
+ lCopyData = NO;
+ } else {
+ // We can't use the same data storage, data types are not the same.
+ multiArrayArg = ::make_ml_multi_array(layout.shape(), constraint.dataType, self.cache, error);
+ }
+
+ if (!multiArrayArg) {
+ return nil;
+ }
+
+ if (multiArrayArg && lCopyData) {
+ [multiArrayArg getMutableBytesWithHandler:^(void *_Nonnull mutableBytes,
+ NSInteger __unused size,
+ NSArray *strides) {
+ MultiArray buffer(mutableBytes, MultiArray::MemoryLayout(to_multiarray_data_type(constraint.dataType).value(),
+ layout.shape(),
+ to_vector(strides)));
+ arg.copy(buffer);
+ }];
+ }
+
+ [result addObject:multiArrayArg];
+ }
+
+ return result;
+}
+
+- (nullable NSArray *)prepareInputs:(const std::vector&)inputs
+ error:(NSError * __autoreleasing *)error {
+ return [self prepareArgs:inputs
+ argNames:self.orderedInputNames
+ argConstraintsByName:self.inputConstraintsByName
+ copyData:YES
+ error:error];
+
+}
+
+- (nullable NSArray *)prepareOutputBackings:(const std::vector&)outputs
+ error:(NSError * __autoreleasing *)error {
+ return [self prepareArgs:outputs
+ argNames:self.orderedOutputNames
+ argConstraintsByName:self.outputConstraintsByName
+ copyData:NO
+ error:error];
+
+}
+
@end
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelExecutor.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModelExecutor.h
index e6e329c9ddd..2f1b22f456b 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelExecutor.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelExecutor.h
@@ -35,6 +35,9 @@ NS_ASSUME_NONNULL_BEGIN
/// The model.
@property (readonly, strong, nonatomic) ETCoreMLModel* model;
+/// If set to `YES` then output backing are ignored.
+@property (readwrite, atomic) BOOL ignoreOutputBackings;
+
@end
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h
index fb616c71527..6bfdbade9c4 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h
@@ -7,11 +7,14 @@
#import
+#import
+
NS_ASSUME_NONNULL_BEGIN
namespace executorchcoreml {
struct ModelLoggingOptions;
class ModelEventLogger;
+class MultiArray;
};
@class ETCoreMLModel;
@@ -49,7 +52,7 @@ __attribute__((objc_subclassing_restricted)) @interface ETCoreMLModelManager : N
/// Executes the loaded model.
///
/// @param handle The handle to the loaded model.
-/// @param args The arguments to the model.
+/// @param args The arguments (inputs and outputs) of the model.
/// @param loggingOptions The model logging options.
/// @param error On failure, error is filled with the failure information.
/// @retval `YES` if the execution succeeded otherwise `NO`.
@@ -59,6 +62,19 @@ __attribute__((objc_subclassing_restricted)) @interface ETCoreMLModelManager : N
eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger
error:(NSError* __autoreleasing*)error;
+/// Executes the loaded model.
+///
+/// @param handle The handle to the loaded model.
+/// @param argsVec The arguments (inputs and outputs) of the model.
+/// @param loggingOptions The model logging options.
+/// @param error On failure, error is filled with the failure information.
+/// @retval `YES` if the execution succeeded otherwise `NO`.
+- (BOOL)executeModelWithHandle:(ModelHandle*)handle
+ argsVec:(const std::vector&)argsVec
+ loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions
+ eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger
+ error:(NSError* __autoreleasing*)error;
+
/// Unloads the loaded model.
///
/// @param handle The handle to the loaded model.
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
index 1c0d2a30f97..c51de9d1e14 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
@@ -22,6 +22,8 @@
#import
#import
#import
+#import
+#import
#import
#import
#import
@@ -98,32 +100,60 @@ BOOL is_backed_by_same_buffer(MLMultiArray *array1, MLMultiArray *array2) {
return options;
}
-BOOL copy(MLMultiArray *src, MLMultiArray *dst, NSError * __autoreleasing *error) {
- if (![src.shape isEqualToArray:dst.shape]) {
- ETCoreMLLogErrorAndSetNSError(error, 0, "%@: Model is broken", NSStringFromClass(ETCoreMLModelManager.class));
- return NO;
- }
+void copy(MLMultiArray *src, MLMultiArray *dst) {
if (::is_backed_by_same_buffer(src, dst)) {
- return YES;
- }
- @autoreleasepool {
- [src copyInto:dst];
+ return;
}
- return YES;
+
+ [src copyInto:dst];
}
-BOOL set_outputs(NSArray *outputs,
- NSArray *model_outputs,
- NSError * __autoreleasing *error) {
+void set_outputs(NSArray *outputs, NSArray *model_outputs) {
NSEnumerator *enumerator = [model_outputs objectEnumerator];
for (MLMultiArray *output in outputs) {
MLMultiArray *model_output = [enumerator nextObject];
- if (!::copy(output, model_output, error)) {
- return NO;
+ ::copy(model_output, output);
+ }
+}
+
+std::optional get_data_type(MLMultiArrayDataType data_type) {
+ switch (data_type) {
+ case MLMultiArrayDataTypeFloat16: {
+ return MultiArray::DataType::Float16;
+ }
+ case MLMultiArrayDataTypeFloat32: {
+ return MultiArray::DataType::Float32;
+ }
+ case MLMultiArrayDataTypeFloat64: {
+ return MultiArray::DataType::Float64;
+ }
+ case MLMultiArrayDataTypeInt32: {
+ return MultiArray::DataType::Int32;
+ }
+ default: {
+ return std::nullopt;
}
}
-
- return YES;
+}
+
+void copy(MLMultiArray *src, executorchcoreml::MultiArray& dst) {
+ [src getBytesWithHandler:^(const void * _Nonnull bytes, NSInteger size) {
+ if (bytes == dst.data()) {
+ return;
+ }
+
+ MultiArray::MemoryLayout src_layout(get_data_type(src.dataType).value(), to_vector(src.shape), to_vector(src.strides));
+ MultiArray(const_cast(bytes), std::move(src_layout)).copy(dst);
+ }];
+}
+
+void set_outputs(std::vector& outputs,
+ NSArray *model_outputs) {
+ NSEnumerator *enumerator = [model_outputs objectEnumerator];
+ for (auto& output : outputs) {
+ MLMultiArray *model_output = [enumerator nextObject];
+ ::copy(model_output, output);
+ }
}
NSData * _Nullable get_file_data(const inmemoryfs::InMemoryFileSystem *inMemoryFS,
@@ -313,6 +343,7 @@ void add_compute_unit(std::string& identifier, MLComputeUnits compute_units) {
return result;
}
+
#endif
} //namespace
@@ -467,7 +498,7 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier
return [[ETCoreMLModelAnalyzer alloc] initWithCompiledModelAsset:compiledModelAsset
modelAsset:modelAsset
metadata:metadata
- operationPathToDebugSymbolMap: operation_path_to_symbol_name_map
+ operationPathToDebugSymbolMap:operation_path_to_symbol_name_map
configuration:configuration
assetManager:self.assetManager
error:error];
@@ -641,6 +672,48 @@ - (void)addPrewarmedAsset:(ETCoreMLAsset *)asset {
os_unfair_lock_unlock(&_lock);
}
+- (nullable NSArray *)executeModelUsingExecutor:(id)executor
+ inputs:(NSArray *)inputs
+ outputBackings:(NSArray *)outputBackings
+ loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions
+ eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger
+ error:(NSError * __autoreleasing *)error {
+ NSError *localError = nil;
+ ETCoreMLModel *model = executor.model;
+ MLPredictionOptions *predictionOptions = ::get_prediction_options(outputBackings, model.orderedOutputNames, error);
+ if (!predictionOptions) {
+ return nil;
+ }
+
+ id inputFeatures = ::get_feature_provider(inputs, model.orderedInputNames, error);
+ if (!inputFeatures) {
+ return nil;
+ }
+
+ NSArray *modelOutputs = [executor executeModelWithInputs:inputFeatures
+ predictionOptions:predictionOptions
+ loggingOptions:loggingOptions
+ eventLogger:eventLogger
+ error:&localError];
+ // Try without output backings.
+ if (!modelOutputs && predictionOptions.outputBackings.count > 0) {
+ localError = nil;
+ executor.ignoreOutputBackings = YES;
+ }
+
+ modelOutputs = [executor executeModelWithInputs:inputFeatures
+ predictionOptions:predictionOptions
+ loggingOptions:loggingOptions
+ eventLogger:eventLogger
+ error:&localError];
+
+ if (error) {
+ *error = localError;
+ }
+
+ return modelOutputs;
+}
+
- (BOOL)executeModelWithHandle:(ModelHandle *)handle
args:(NSArray *)args
loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions
@@ -659,33 +732,91 @@ - (BOOL)executeModelWithHandle:(ModelHandle *)handle
if (args.count != model.orderedInputNames.count + model.orderedOutputNames.count) {
ETCoreMLLogErrorAndSetNSError(error,
ETCoreMLErrorCorruptedModel,
- "%@: Model is invalid.",
- NSStringFromClass(self.class));
+ "%@: Model is invalid, expected args count to be %lu but got %lu.",
+ NSStringFromClass(self.class),
+ static_cast(model.orderedInputNames.count + model.orderedOutputNames.count),
+ args.count);
return NO;
}
-
- NSArray *inputs = [args subarrayWithRange:NSMakeRange(0, model.orderedInputNames.count)];
- NSArray *outputs = [args subarrayWithRange:NSMakeRange(model.orderedInputNames.count, args.count - model.orderedInputNames.count)];
- id inputFeatures = ::get_feature_provider(inputs, model.orderedInputNames, error);
- if (!inputFeatures) {
- return NO;
+ @autoreleasepool {
+ NSArray *inputs = [args subarrayWithRange:NSMakeRange(0, model.orderedInputNames.count)];
+ NSArray *outputs = [args subarrayWithRange:NSMakeRange(model.orderedInputNames.count, args.count - model.orderedInputNames.count)];
+ NSArray *outputBackings = @[];
+ if (executor.ignoreOutputBackings == NO) {
+ outputBackings = outputs;
+ }
+
+ NSArray *modelOutputs = [self executeModelUsingExecutor:executor
+ inputs:inputs
+ outputBackings:outputBackings
+ loggingOptions:loggingOptions
+ eventLogger:eventLogger
+ error:error];
+ if (!modelOutputs) {
+ return NO;
+ }
+
+ ::set_outputs(outputs, modelOutputs);
}
- MLPredictionOptions *predictionOptions = ::get_prediction_options(outputs, model.orderedOutputNames, error);
- if (!predictionOptions) {
+ return YES;
+}
+
+- (BOOL)executeModelWithHandle:(ModelHandle *)handle
+ argsVec:(const std::vector&)argsVec
+ loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions
+ eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger
+ error:(NSError * __autoreleasing *)error {
+ id executor = [self executorWithHandle:handle];
+ if (!executor) {
+ ETCoreMLLogErrorAndSetNSError(error,
+ 0,
+ "%@: Model is already unloaded.",
+ NSStringFromClass(self.class));
return NO;
}
- NSArray *modelOutputs = [executor executeModelWithInputs:inputFeatures
- predictionOptions:predictionOptions
- loggingOptions:loggingOptions
- eventLogger:eventLogger
- error:error];
- if (!outputs) {
+ ETCoreMLModel *model = executor.model;
+ if (argsVec.size() != model.orderedInputNames.count + model.orderedOutputNames.count) {
+ ETCoreMLLogErrorAndSetNSError(error,
+ ETCoreMLErrorCorruptedModel,
+ "%@: Model is invalid, expected args count to be %lu but got %lu.",
+ NSStringFromClass(self.class),
+ static_cast(model.orderedInputNames.count + model.orderedOutputNames.count),
+ argsVec.size());
return NO;
}
- return ::set_outputs(outputs, modelOutputs, error);
+ std::vector inputArgs(argsVec.begin(), argsVec.begin() + model.orderedInputNames.count);
+ std::vector outputArgs(argsVec.begin() + model.orderedInputNames.count, argsVec.end());
+ @autoreleasepool {
+ NSArray *inputs = [model prepareInputs:inputArgs error:error];
+ if (!inputs) {
+ return NO;
+ }
+
+ NSArray *outputBackings = @[];
+ if (executor.ignoreOutputBackings == NO) {
+ outputBackings = [model prepareOutputBackings:outputArgs error:error];
+ }
+
+ if (!outputBackings) {
+ return NO;
+ }
+
+ NSArray *modelOutputs = [self executeModelUsingExecutor:executor
+ inputs:inputs
+ outputBackings:outputBackings
+ loggingOptions:loggingOptions
+ eventLogger:eventLogger
+ error:error];
+ if (!modelOutputs) {
+ return NO;
+ }
+
+ ::set_outputs(outputArgs, modelOutputs);
+ return YES;
+ }
}
- (BOOL)unloadModelWithHandle:(ModelHandle *)handle {
diff --git a/backends/apple/coreml/runtime/delegate/MLMultiArray_Copy.mm b/backends/apple/coreml/runtime/delegate/MLMultiArray_Copy.mm
index 4aa5fffe94a..b8a10fcbbbc 100644
--- a/backends/apple/coreml/runtime/delegate/MLMultiArray_Copy.mm
+++ b/backends/apple/coreml/runtime/delegate/MLMultiArray_Copy.mm
@@ -7,55 +7,17 @@
#import
+#import
#import
namespace {
using namespace executorchcoreml;
-template
-T toValue(NSNumber *value);
-
-template<> size_t toValue(NSNumber *value) {
- return value.unsignedLongValue;
-}
-
-template<> ssize_t toValue(NSNumber *value) {
- return value.longLongValue;
-}
-
-template::value, T>::type>
-std::vector to_vector(NSArray *numbers) {
- std::vector result;
- result.reserve(numbers.count);
- for (NSNumber *number in numbers) {
- result.emplace_back(toValue(number));
- }
-
- return result;
-}
-
-MultiArray::DataType to_multi_array_data_type(MLMultiArrayDataType data_type) {
- switch (data_type) {
- case MLMultiArrayDataTypeInt32: {
- return MultiArray::DataType::Int;
- }
- case MLMultiArrayDataTypeFloat: {
- return MultiArray::DataType::Float;
- }
- case MLMultiArrayDataTypeFloat16: {
- return MultiArray::DataType::Float16;
- }
- case MLMultiArrayDataTypeDouble: {
- return MultiArray::DataType::Double;
- }
- }
-}
-
MultiArray to_multi_array(void *data,
MLMultiArrayDataType dataType,
NSArray *shape,
NSArray *strides) {
- auto layout = MultiArray::MemoryLayout(to_multi_array_data_type(dataType),
+ auto layout = MultiArray::MemoryLayout(to_multiarray_data_type(dataType).value(),
to_vector(shape),
to_vector(strides));
return MultiArray(data, std::move(layout));
diff --git a/backends/apple/coreml/runtime/delegate/backend_delegate.h b/backends/apple/coreml/runtime/delegate/backend_delegate.h
index d6a6016c087..ed921fb35bd 100644
--- a/backends/apple/coreml/runtime/delegate/backend_delegate.h
+++ b/backends/apple/coreml/runtime/delegate/backend_delegate.h
@@ -26,7 +26,7 @@ class BackendDelegate {
struct Config {
// Max models cache size in bytes.
- size_t max_models_cache_size = 2 * size_t(1024) * size_t(1024) * size_t(1024);
+ size_t max_models_cache_size = 10 * size_t(1024) * size_t(1024) * size_t(1024);
// If set to `true`, delegate pre-warms the most recently used asset.
bool should_prewarm_asset = true;
// If set to `true`, delegate pre-warms the model in `init`.
diff --git a/backends/apple/coreml/runtime/delegate/backend_delegate.mm b/backends/apple/coreml/runtime/delegate/backend_delegate.mm
index b91a6208b6a..1ded4a76b3b 100644
--- a/backends/apple/coreml/runtime/delegate/backend_delegate.mm
+++ b/backends/apple/coreml/runtime/delegate/backend_delegate.mm
@@ -44,44 +44,6 @@ MLComputeUnits get_compute_units(const Buffer& buffer) {
return configuration;
}
-template::value, T>::type>
-NSArray *to_array(const std::vector& array) {
- NSMutableArray *result = [NSMutableArray arrayWithCapacity:array.size()];
- for (T value : array) {
- [result addObject:@(value)];
- }
-
- return result;
-}
-
-MLMultiArrayDataType get_data_type(MultiArray::DataType dataType) {
- switch (dataType) {
- case MultiArray::DataType::Float16: {
- return MLMultiArrayDataTypeFloat16;
- }
- case MultiArray::DataType::Float: {
- return MLMultiArrayDataTypeFloat32;
- }
- case MultiArray::DataType::Double: {
- return MLMultiArrayDataTypeDouble;
- }
- case MultiArray::DataType::Int: {
- return MLMultiArrayDataTypeInt32;
- }
- }
-}
-
-MLMultiArray * _Nullable to_ml_multiarray(const MultiArray& array, NSError * __autoreleasing *error) {
- const auto& layout = array.layout();
- MLMultiArray *result = [[MLMultiArray alloc] initWithDataPointer:array.data()
- shape:to_array(layout.shape())
- dataType:get_data_type(layout.dataType())
- strides:to_array(layout.strides())
- deallocator:^(void * _Nonnull bytes) {}
- error:error];
- return result;
-}
-
NSURL * _Nullable create_directory_if_needed(NSURL *url,
NSFileManager *fileManager,
NSError * __autoreleasing *error) {
@@ -194,17 +156,8 @@ bool execute(Handle* handle,
ModelEventLogger *event_logger,
std::error_code& ec) const noexcept override {
NSError *error = nil;
- NSMutableArray *model_args = [NSMutableArray arrayWithCapacity:args.size()];
- for (const auto& arg : args) {
- MLMultiArray *multi_array = to_ml_multiarray(arg, &error);
- if (!multi_array) {
- return false;
- }
- [model_args addObject:multi_array];
- }
-
if (![model_manager_ executeModelWithHandle:handle
- args:model_args
+ argsVec:args
loggingOptions:logging_options
eventLogger:event_logger
error:&error]) {
diff --git a/backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist b/backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist
index 7dd12acaaf8..df37a47755f 100644
--- a/backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist
+++ b/backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist
@@ -7,6 +7,6 @@
shouldPrewarmModel
maxAssetsSizeInBytes
- 2147483648
+ 1073741824
diff --git a/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm b/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
index a51e73ee68d..b672d4a08e4 100644
--- a/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
+++ b/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
@@ -28,16 +28,25 @@
using namespace executorchcoreml;
std::optional get_data_type(ScalarType scalar_type) {
- if (scalar_type == ScalarType::Float) {
- return MultiArray::DataType::Float;
- } else if (scalar_type == ScalarType::Double) {
- return MultiArray::DataType::Double;
- } else if (scalar_type == ScalarType::Half) {
- return MultiArray::DataType::Float16;
- } else if (scalar_type == ScalarType::Int) {
- return MultiArray::DataType::Int;
- } else {
- return std::nullopt;
+ switch (scalar_type) {
+ case ScalarType::Bool:
+ return MultiArray::DataType::Bool;
+ case ScalarType::Byte:
+ return MultiArray::DataType::Byte;
+ case ScalarType::Short:
+ return MultiArray::DataType::Short;
+ case ScalarType::Int:
+ return MultiArray::DataType::Int32;
+ case ScalarType::Long:
+ return MultiArray::DataType::Int64;
+ case ScalarType::Half:
+ return MultiArray::DataType::Float16;
+ case ScalarType::Float:
+ return MultiArray::DataType::Float32;
+ case ScalarType::Double:
+ return MultiArray::DataType::Float64;
+ default:
+ return std::nullopt;
}
}
@@ -54,6 +63,7 @@
auto tensor = eValue->toTensor();
auto dataType = get_data_type(tensor.scalar_type());
if (!dataType.has_value()) {
+ ET_LOG(Error, "%s: DataType=%d is not supported", ETCoreMLStrings.delegateIdentifier.UTF8String, (int)tensor.scalar_type());
return std::nullopt;
}
@@ -167,7 +177,7 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
auto multi_array = get_multi_array(args[i], ArgType::Input);
ET_CHECK_OR_RETURN_ERROR(multi_array.has_value(),
Internal,
- "%s: Expected tensor at args[%zu]", ETCoreMLStrings.delegateIdentifier.UTF8String, i);
+ "%s: Failed to create multiarray from input at args[%zu]", ETCoreMLStrings.delegateIdentifier.UTF8String, i);
delegate_args.emplace_back(std::move(multi_array.value()));
}
@@ -176,7 +186,7 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
auto multi_array = get_multi_array(args[i], ArgType::Output);
ET_CHECK_OR_RETURN_ERROR(multi_array.has_value(),
Internal,
- "%s: Expected tensor at args[%zu]", ETCoreMLStrings.delegateIdentifier.UTF8String, i);
+ "%s: Failed to create multiarray from output at args[%zu]", ETCoreMLStrings.delegateIdentifier.UTF8String, i);
delegate_args.emplace_back(std::move(multi_array.value()));
}
diff --git a/backends/apple/coreml/runtime/delegate/multiarray.h b/backends/apple/coreml/runtime/delegate/multiarray.h
index cd165373dc8..70a2a08a2f7 100644
--- a/backends/apple/coreml/runtime/delegate/multiarray.h
+++ b/backends/apple/coreml/runtime/delegate/multiarray.h
@@ -7,6 +7,9 @@
#pragma once
+#import
+#import
+#import
#import
namespace executorchcoreml {
@@ -29,13 +32,33 @@ class Buffer {
};
/// A class representing a MultiArray.
-class MultiArray {
+class MultiArray final {
public:
/// The MultiArray datatype.
- enum class DataType : uint8_t { Int = 0, Double, Float, Float16 };
+ enum class DataType : uint8_t {
+ Bool = 0,
+ Byte,
+ Char,
+ Short,
+ Int32,
+ Int64,
+ Float16,
+ Float32,
+ Float64,
+ };
+
+ /// Options for copying.
+ struct CopyOptions {
+ inline CopyOptions() noexcept : use_bnns(true), use_memcpy(true) { }
+
+ inline CopyOptions(bool use_bnns, bool use_memcpy) noexcept : use_bnns(use_bnns), use_memcpy(use_memcpy) { }
+
+ bool use_bnns = true;
+ bool use_memcpy = true;
+ };
/// A class describing the memory layout of a MultiArray.
- class MemoryLayout {
+ class MemoryLayout final {
public:
MemoryLayout(DataType dataType, std::vector shape, std::vector strides)
: dataType_(dataType), shape_(std::move(shape)), strides_(std::move(strides)) { }
@@ -53,7 +76,10 @@ class MultiArray {
inline size_t rank() const noexcept { return shape_.size(); }
/// Returns the number of elements in the MultiArray.
- size_t get_num_elements() const noexcept;
+ size_t num_elements() const noexcept;
+
+ /// Returns the byte size of an element.
+ size_t num_bytes() const noexcept;
/// Returns `true` if the memory layout is packed otherwise `false`.
bool is_packed() const noexcept;
@@ -78,11 +104,42 @@ class MultiArray {
/// Copies this into another `MultiArray`.
///
/// @param dst The destination `MultiArray`.
- bool copy(MultiArray& dst) const noexcept;
+ void copy(MultiArray& dst, CopyOptions options = CopyOptions()) const noexcept;
+
+ /// Get the value at `indices`.
+ template inline T value(const std::vector& indices) const noexcept {
+ return *(static_cast(data(indices)));
+ }
+
+ /// Set the value at `indices`.
+ template inline void set_value(const std::vector& indices, T value) const noexcept {
+ T* ptr = static_cast(data(indices));
+ *ptr = value;
+ }
+
+ /// Get the value at `index`.
+ template inline T value(size_t index) const noexcept { return *(static_cast(data(index))); }
+
+ /// Set the value at `index`.
+ template inline void set_value(size_t index, T value) const noexcept {
+ T* ptr = static_cast(data(index));
+ *ptr = value;
+ }
private:
+ void* data(const std::vector& indices) const noexcept;
+
+ void* data(size_t index) const noexcept;
+
void* data_;
MemoryLayout layout_;
};
+/// Converts `MultiArray::DataType` to `MLMultiArrayDataType`.
+std::optional to_ml_multiarray_data_type(MultiArray::DataType data_type);
+
+/// Converts `MLMultiArrayDataType` to `MultiArray::DataType`.
+std::optional to_multiarray_data_type(MLMultiArrayDataType data_type);
+
+
} // namespace executorchcoreml
diff --git a/backends/apple/coreml/runtime/delegate/multiarray.mm b/backends/apple/coreml/runtime/delegate/multiarray.mm
index 3b8dcb98a30..74996fb8d5a 100644
--- a/backends/apple/coreml/runtime/delegate/multiarray.mm
+++ b/backends/apple/coreml/runtime/delegate/multiarray.mm
@@ -10,120 +10,16 @@
#import
#import
-
#import
#import
+#import
+#import
#import
namespace {
using namespace executorchcoreml;
-template
-struct TypedMultiArray {
- explicit TypedMultiArray(T *data, MultiArray::MemoryLayout layout) noexcept
- :data(data), layout(std::move(layout))
- {}
-
- T *data;
- MultiArray::MemoryLayout layout;
-};
-
-#pragma mark - BNNS
-
-template
-struct BNNSCopier {
- static bool supported() noexcept {
- return false;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dstNNSDesc) noexcept {}
-};
-
-// float -> _Float16
-template<>
-struct BNNSCopier {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept {
- src_bnns_desc->data_type = BNNSDataTypeFloat32;
- dst_bnns_desc->data_type = BNNSDataTypeFloat16;
- BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL);
- }
-};
-
-// float -> int32_t
-template<>
-struct BNNSCopier {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept {
- src_bnns_desc->data_type = BNNSDataTypeFloat32;
- dst_bnns_desc->data_type = BNNSDataTypeInt32;
- BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL);
- }
-};
-
-// _Float16 -> float
-template<>
-struct BNNSCopier<_Float16, float> {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept {
- src_bnns_desc->data_type = BNNSDataTypeFloat16;
- dst_bnns_desc->data_type = BNNSDataTypeFloat32;
- BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL);
- }
-};
-
-// _Float16 -> int32_t
-template<>
-struct BNNSCopier<_Float16, int32_t> {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept {
- src_bnns_desc->data_type = BNNSDataTypeFloat16;
- dst_bnns_desc->data_type = BNNSDataTypeInt32;
- BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL);
- }
-};
-
-// int32_t -> _Float16
-template<>
-struct BNNSCopier {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept {
- src_bnns_desc->data_type = BNNSDataTypeInt32;
- dst_bnns_desc->data_type = BNNSDataTypeFloat16;
- BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL);
- }
-};
-
-// int32_t -> float
-template<>
-struct BNNSCopier {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept {
- src_bnns_desc->data_type = BNNSDataTypeInt32;
- dst_bnns_desc->data_type = BNNSDataTypeFloat32;
- BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL);
- }
-};
-
-/// Returns BNNSDataLayout and sets strides from the multi-array strides.
+// Returns BNNSDataLayout and sets strides from the multi-array strides.
///
/// BNNS requires strides to be non-decreasing order;
/// `bnns_strides[i] <= bnns_strides[i + 1]`. BNNSDataLayout defines
@@ -132,408 +28,491 @@ static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *ds
/// @param multi_array_strides The multiarray strides.
/// @param bnns_strides The bnns strides.
/// @retval The `BNNSDataLayout`.
-BNNSDataLayout get_bnns_data_layout(const std::vector& multi_array_strides, size_t *bnns_strides) {
- uint32_t firstMajorFlag = 1;
+std::optional get_bnns_data_layout(const std::vector& multi_array_strides,
+ size_t *bnns_strides) {
+ bool first_major = false;
uint32_t rank = static_cast(multi_array_strides.size());
if (rank > BNNS_MAX_TENSOR_DIMENSION) {
- return (BNNSDataLayout)-1;
+ return std::nullopt;
}
if (std::is_sorted(multi_array_strides.begin(), multi_array_strides.end(), std::less())) {
- firstMajorFlag = 0;
+ first_major = false;
std::copy(multi_array_strides.begin(), multi_array_strides.end(), bnns_strides);
} else if (std::is_sorted(multi_array_strides.begin(), multi_array_strides.end(), std::greater()) ) {
- firstMajorFlag = 1;
+ first_major = true;
std::copy(multi_array_strides.rbegin(), multi_array_strides.rend(), bnns_strides);
} else {
- return (BNNSDataLayout)-1;
+ return std::nullopt;
}
// See BNNSDataLayout's raw value how this bitwise-or makes sense.
- return (BNNSDataLayout)((rank << 16) | (8 << 12) | firstMajorFlag);
+ return (BNNSDataLayout) (0x08000 + // flags as canonical first/last major type
+ 0x10000 * rank + // set dimensionality
+ (first_major ? 1 : 0)); // set first/last major bit
}
-/// Initializes BNNSNDArrayDescriptor for the shape and strides.
+/// Returns `BNNSDataType` from `MultiArray::DataType`.
///
-/// @param layout The memory layout.
-/// @param desc The ``BNNSNDArrayDescriptor` to be initialized.
-/// @retval `true` if the initialization succeeded otherwise `false`.
-bool init_bnns_array_descriptor(const MultiArray::MemoryLayout& layout, BNNSNDArrayDescriptor *desc) {
- BNNSDataLayout bnns_layout = get_bnns_data_layout(layout.strides(), desc->stride);
- if (bnns_layout == (BNNSDataLayout)-1) {
- return false;
- }
-
- std::memset(desc, 0, sizeof(*desc));
- const auto& shape = layout.shape();
- std::copy(shape.begin(), shape.end(), desc->size);
- desc->layout = bnns_layout;
- desc->data_scale = 1.0f;
- desc->data_bias = 0.0f;
-
- return true;
-}
-
-template
-struct MultiArrayBNNSCopier {
- static bool copy(TypedMultiArray& src, TypedMultiArray& dst) {
- if (!BNNSCopier::supported()) {
- return false;
+/// @param datatype The multiarray datatype.
+/// @retval The `BNNSDataType`.
+std::optional get_bnns_data_type(MultiArray::DataType datatype) {
+ switch (datatype) {
+ case MultiArray::DataType::Bool: {
+ return BNNSDataTypeBoolean;
}
-
- BNNSNDArrayDescriptor src_bnns_array;
- BNNSNDArrayDescriptor dst_bnns_array;
- if (!init_bnns_array_descriptor(src.layout, &src_bnns_array) || !init_bnns_array_descriptor(dst.layout, &dst_bnns_array)) {
- return false;
+ case MultiArray::DataType::Byte: {
+ return BNNSDataTypeUInt8;
+ }
+ case MultiArray::DataType::Char: {
+ return BNNSDataTypeInt8;
+ }
+ case MultiArray::DataType::Short: {
+ return BNNSDataTypeInt16;
+ }
+ case MultiArray::DataType::Int32: {
+ return BNNSDataTypeInt32;
+ }
+ case MultiArray::DataType::Int64: {
+ return BNNSDataTypeInt64;
+ }
+ case MultiArray::DataType::Float16: {
+ return BNNSDataTypeFloat16;
+ }
+ case MultiArray::DataType::Float32: {
+ return BNNSDataTypeFloat32;
+ }
+ default: {
+ return std::nullopt;
}
-
- BNNSCopier::copy(&src_bnns_array, &dst_bnns_array);
- return true;
}
-};
-
-#pragma mark - VImageCopier
+}
-bool init_vi_Buffer(const MultiArray::MemoryLayout& layout, vImage_Buffer *viBuf, size_t bytesPerScalar) {
- size_t rank = layout.rank();
- const auto& shape = layout.shape();
- const auto& strides = layout.strides();
-
- if (rank < 2) {
- // vImage path requires at least two dimensions.
- return false;
- }
-
- // vImage blitter requires first major and every dimension except row (shape[rank - 2]) is contiguous.
- if (!std::is_sorted(strides.begin(), strides.end(), std::greater())) {
+/// Initializes BNNS array descriptor from multi array.
+///
+/// @param bnns_descriptor The descriptor to be initialized.
+/// @param multi_array The multiarray.
+/// @retval `true` if the initialization succeeded otherwise `false`.
+bool init_bnns_descriptor(BNNSNDArrayDescriptor& bnns_descriptor, const MultiArray& multi_array) {
+ const auto& layout = multi_array.layout();
+ if (layout.num_elements() == 1) {
return false;
}
- if (strides[rank - 1] != 1) {
+ auto bnns_datatype = get_bnns_data_type(layout.dataType());
+ if (!bnns_datatype) {
return false;
}
- size_t height = std::accumulate(shape.begin(), shape.end() - 1, size_t(1), std::multiplies());
- if (height * strides[rank - 2] != strides[0] * shape[0]) {
+ std::memset(&bnns_descriptor, 0, sizeof(bnns_descriptor));
+ auto bnns_layout = get_bnns_data_layout(layout.strides(), bnns_descriptor.stride);
+ if (!bnns_layout) {
return false;
}
- size_t width = shape[rank - 1];
- size_t rowBytes = strides[rank - 2] * bytesPerScalar;
-
- viBuf->data = NULL;
- viBuf->height = height;
- viBuf->width = width;
- viBuf->rowBytes = rowBytes;
+ const auto& shape = layout.shape();
+ std::copy(shape.begin(), shape.end(), bnns_descriptor.size);
+ bnns_descriptor.layout = bnns_layout.value();
+ bnns_descriptor.data_scale = 1.0f;
+ bnns_descriptor.data_bias = 0.0f;
+ bnns_descriptor.data_type = bnns_datatype.value();
+ bnns_descriptor.data = multi_array.data();
return true;
}
-template
-struct VImageCopier {
- static bool supported() noexcept {
+bool copy_using_bnns(const MultiArray& src, MultiArray& dst) {
+ if (dst.layout().num_bytes() < src.layout().num_bytes()) {
return false;
}
-
- static void copy(vImage_Buffer *src_vi_buffer, vImage_Buffer *dst_vi_buffer) noexcept {}
-};
-
-template
-struct VImageCopier {
- static bool supported() noexcept {
- return true;
+ BNNSNDArrayDescriptor src_descriptor;
+ if (!init_bnns_descriptor(src_descriptor, src)) {
+ return false;
}
- static void copy(vImage_Buffer *src_vi_buffer, vImage_Buffer *dst_vi_buffer) noexcept {
- vImageCopyBuffer(src_vi_buffer, dst_vi_buffer, sizeof(T), kvImageDoNotTile);
- }
-};
-
-// float -> _Float16
-template <>
-struct VImageCopier {
- static bool supported() noexcept {
- return true;
+ BNNSNDArrayDescriptor dst_descriptor;
+ if (!init_bnns_descriptor(dst_descriptor, dst)) {
+ return false;
}
- static void copy(vImage_Buffer *src_vi_buffer, vImage_Buffer *dst_vi_buffer) noexcept {
- vImageConvert_PlanarFtoPlanar16F(src_vi_buffer, dst_vi_buffer, kvImageDoNotTile);
- }
-};
+ return BNNSCopy(&dst_descriptor, &src_descriptor, NULL) == 0;
+}
-// _Float16 -> float
-template <>
-struct VImageCopier<_Float16, float> {
- static bool supported() noexcept {
- return true;
- }
+std::vector get_layouts(const std::vector& arrays) {
+ std::vector result;
+ result.reserve(arrays.size());
- static void copy(vImage_Buffer *src_vi_buffer, vImage_Buffer *dst_vi_buffer) noexcept {
- vImageConvert_Planar16FtoPlanarF(src_vi_buffer, dst_vi_buffer, kvImageDoNotTile);
- }
-};
-
-template
-struct MultiArrayVImageCopier {
- static bool copy(TypedMultiArray& src, TypedMultiArray& dst) {
- if (!VImageCopier::supported()) {
- return false;
- }
-
- vImage_Buffer src_vi_buffer;
- vImage_Buffer dst_vi_buffer;
- if (!init_vi_Buffer(src.layout, &src_vi_buffer, sizeof(T1))) {
- return false;
- }
-
- if (!init_vi_Buffer(dst.layout, &dst_vi_buffer, sizeof(T2))) {
- return false;
- }
-
- VImageCopier::copy(&src_vi_buffer, &dst_vi_buffer);
- return true;
- }
-};
-
-#pragma mark - VDSPCopier
-
-template
-struct VDSPCopier {
- static bool supported() noexcept {
- return false;
- }
+ std::transform(arrays.begin(), arrays.end(), std::back_inserter(result), [](const auto& array) {
+ return array.layout();
+ });
- static void copy(const T1 *src_data, T2 *dst_data, size_t num_elements) noexcept {}
-};
+ return result;
+}
-// Double -> Float
-template<>
-struct VDSPCopier {
- static bool supported() noexcept {
- return true;
- }
+std::vector get_datas(const std::vector& arrays) {
+ std::vector result;
+ result.reserve(arrays.size());
- static void copy(const double *src_data, float *dst_data, size_t num_elements) noexcept {
- vDSP_vdpsp(src_data, 1, dst_data, 1, num_elements);
- }
-};
-
-// Float -> Double
-template<>
-struct VDSPCopier {
- static bool supported() noexcept {
- return true;
- }
+ std::transform(arrays.begin(), arrays.end(), std::back_inserter(result), [](const auto& array) {
+ return array.data();
+ });
- static void copy(const float *src_data, double *dst_data, size_t num_elements) noexcept {
- vDSP_vspdp(src_data, 1, dst_data, 1, num_elements);
- }
-};
+ return result;
+}
-// Float -> Int32
-template<>
-struct VDSPCopier {
- static bool supported() noexcept {
+// We can coalesce two adjacent dimensions if either dim has size 1 or if `shape[n] * stride[n] == stride[n + 1]`.
+bool can_coalesce_dimensions(const std::vector& shape,
+ const std::vector& strides,
+ size_t dim1,
+ size_t dim2) {
+ auto shape1 = shape[dim1];
+ auto shape2 = shape[dim2];
+ if (shape1 == 1 || shape2 == 1) {
return true;
}
- static void copy(const float *src_data, int32_t *dst_data, size_t num_elements) noexcept {
- vDSP_vfix32(src_data, 1, dst_data, 1, num_elements);
- }
-};
+ auto stride1 = strides[dim1];
+ auto stride2 = strides[dim2];
+ return shape1 * stride1 == stride2;
+}
-// Int32 -> Double
-template<>
-struct VDSPCopier {
- static bool supported() noexcept {
- return true;
+bool can_coalesce_dimensions(const std::vector& shape,
+ const std::vector>& all_strides,
+ size_t dim1,
+ size_t dim2) {
+ for (const auto& strides : all_strides) {
+ if (!::can_coalesce_dimensions(shape, strides, dim1, dim2)) {
+ return false;
+ }
}
- static void copy(const int32_t *src_data, double *dst_data, size_t num_elements) noexcept {
- vDSP_vflt32D(src_data, 1, dst_data, 1, num_elements);
- }
-};
+ return true;
+}
-// Int32 -> Float
-template<>
-struct VDSPCopier {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(const int32_t *src_data, float *dst_data, size_t num_elements) noexcept {
- vDSP_vflt32(src_data, 1, dst_data, 1, num_elements);
+void update_strides(std::vector>& all_strides,
+ size_t dim1,
+ size_t dim2) {
+ for (auto& strides : all_strides) {
+ strides[dim1] = strides[dim2];
}
-};
+}
-template
-struct MultiArrayVDSPCopier {
- static bool copy(TypedMultiArray& src, TypedMultiArray& dst) {
- if (!VDSPCopier::supported()) {
- return false;
- }
-
- if (!src.layout.is_packed() || !dst.layout.is_packed()) {
- return false;
+std::vector coalesce_dimensions(std::vector layouts) {
+ if (layouts.size() == 0) {
+ return {};
+ }
+
+ std::vector shape = layouts.back().shape();
+ // reverse shape.
+ std::reverse(shape.begin(), shape.end());
+ std::vector> all_strides;
+ // reverse strides.
+ all_strides.reserve(layouts.size());
+ std::transform(layouts.begin(), layouts.end(), std::back_inserter(all_strides), [](const MultiArray::MemoryLayout& layout) {
+ auto strides = layout.strides();
+ std::reverse(strides.begin(), strides.end());
+ return strides;
+ });
+ size_t rank = layouts[0].rank();
+ size_t prev_dim = 0;
+ for (size_t dim = 1; dim < rank; ++dim) {
+ if (::can_coalesce_dimensions(shape, all_strides, prev_dim, dim)) {
+ if (shape[prev_dim] == 1) {
+ ::update_strides(all_strides, prev_dim, dim);
+ }
+ shape[prev_dim] *= shape[dim];
+ } else {
+ ++prev_dim;
+ if (prev_dim != dim) {
+ ::update_strides(all_strides, prev_dim, dim);
+ shape[prev_dim] = shape[dim];
+ }
}
-
- VDSPCopier::copy(src.data, dst.data, src.layout.get_num_elements());
- return true;
}
-};
-
-#pragma mark - MemCopy
-
-template
-struct MemCopier {
- static bool supported() noexcept {
- return false;
+
+ if (rank == prev_dim + 1) {
+ return layouts;
}
- static void copy(const T1 *src_data, T2 *dst_data, size_t num_elements) noexcept {}
-};
-
-template
-struct MemCopier {
- static bool supported() noexcept {
- return true;
+ shape.resize(prev_dim + 1);
+ for (auto& strides : all_strides) {
+ strides.resize(prev_dim + 1);
}
- static void copy(const T *src_data, T *dst_data, size_t num_elements) noexcept {
- std::memcpy(dst_data, src_data, num_elements);
+ std::vector result;
+ result.reserve(layouts.size());
+ std::reverse(shape.begin(), shape.end());
+ for (size_t i = 0; i < layouts.size(); ++i) {
+ std::reverse(all_strides[i].begin(), all_strides[i].end());
+ result.emplace_back(layouts[i].dataType(), shape, std::move(all_strides[i]));
}
+
+ return result;
+}
+
+enum class Direction : uint8_t {
+ Forward = 0,
+ Backward
};
-template
-struct MultiArrayMemCopier {
- static bool copy(TypedMultiArray& src, TypedMultiArray& dst) {
- if (!MemCopier::supported()) {
- return false;
- }
-
- if (!src.layout.is_packed() || !dst.layout.is_packed()) {
- return false;
+void set_data_pointers(std::vector& data_pointers,
+ ssize_t index,
+ size_t dim,
+ Direction direction,
+ const std::vector& layouts) {
+ for (size_t i = 0; i < layouts.size(); ++i) {
+ const auto& layout = layouts[i];
+ const ssize_t stride = layout.strides()[dim];
+ const size_t num_bytes = layout.num_bytes();
+ ssize_t offset = 0;
+ switch (direction) {
+ case Direction::Forward: {
+ offset = stride * index * num_bytes;
+ break;
+ }
+ case Direction::Backward: {
+ offset = - stride * index * num_bytes;
+ break;
+ }
}
-
- MemCopier::copy(src.data, dst.data, src.layout.get_num_elements());
- return true;
+ data_pointers[i] = (void *)(static_cast(data_pointers[i]) + offset);
}
-};
+}
+
+void increment_data_pointers(std::vector& data_pointers,
+ size_t index,
+ size_t dim,
+ const std::vector& layouts) {
+ set_data_pointers(data_pointers, index, dim, Direction::Forward, layouts);
+}
-#pragma mark - MultiArrayIterator
-/// TODO - remove recursion and coalesce contiguous dimensions.
-template
-struct MultiArrayIterator {
- explicit MultiArrayIterator(TypedMultiArray& array1, TypedMultiArray& array2)
- :array1(array1), array2(array2)
+void decrement_data_pointers(std::vector& data_pointers,
+ size_t index,
+ size_t dim,
+ const std::vector& layouts) {
+ set_data_pointers(data_pointers, index, dim, Direction::Backward, layouts);
+}
+
+class MultiArrayIterator final {
+public:
+ explicit MultiArrayIterator(const std::vector& arrays)
+ :datas_(get_datas(arrays)),
+ layouts_(coalesce_dimensions(get_layouts(arrays)))
{}
+private:
template
- void loop(FN&& fn, T1 *data1, T2 *data2, size_t dim) {
- const size_t index = dim - 1;
- const auto& layout1 = array1.layout;
- const auto& layout2 = array2.layout;
- const ssize_t stride1 = layout1.strides()[index];
- const ssize_t stride2 = layout2.strides()[index];
- const size_t bound = layout1.shape()[index];
-
- if (index == 0) {
- for (size_t i = 0; i < bound; i++) {
- if (fn(data1 + stride1 * i, data2 + stride2 * i)) {
- break;
+ void exec(FN&& fn, const std::vector& layouts, std::vector datas, size_t n) {
+ const auto& layout = layouts.back();
+ // Avoid function call for rank <= 2.
+ switch (n) {
+ case 0: {
+ break;
+ }
+ case 1: {
+ for (size_t i = 0; i < layout.shape()[0]; ++i) {
+ ::increment_data_pointers(datas, i, 0, layouts);
+ fn(datas);
+ ::decrement_data_pointers(datas, i, 0, layouts);
+ }
+ break;
+ }
+ case 2: {
+ for (size_t i = 0; i < layout.shape()[1]; ++i) {
+ ::increment_data_pointers(datas, i, 1, layouts);
+ for (size_t j = 0; j < layout.shape()[0]; ++j) {
+ ::increment_data_pointers(datas, j, 0, layouts);
+ fn(datas);
+ ::decrement_data_pointers(datas, j, 0, layouts);
+ }
+ ::decrement_data_pointers(datas, i, 1, layouts);
+ }
+
+ break;
+ }
+
+ default: {
+ const size_t bound = layouts.back().shape()[n - 1];
+ for (size_t index = 0; index < bound; ++index) {
+ ::increment_data_pointers(datas, index, n - 1, layouts);
+ exec(std::forward(fn), layouts, datas, n - 1);
+ ::decrement_data_pointers(datas, index, n - 1, layouts);
}
}
- return;
- }
-
- for (size_t i = 0; i < bound; i++) {
- loop(fn, data1 + stride1 * i, data2 + stride2 * i, dim - 1);
}
}
+public:
template
- void loop(FN&& fn) {
- loop(fn, array1.data, array2.data, array1.layout.rank());
+ void exec(FN&& fn) {
+ std::vector datas = datas_;
+ exec(fn, layouts_, datas, layouts_[0].rank());
}
- TypedMultiArray array1;
- TypedMultiArray array2;
+private:
+ std::vector datas_;
+ std::vector layouts_;
};
+/// BNNS has no double type, so we handle the conversions here.
template
-struct MultiArrayLoopingCopier {
- static bool copy(TypedMultiArray& src, TypedMultiArray& dst) {
- auto looper = MultiArrayIterator(src, dst);
- looper.loop([](T1 *src, T2 *dst){
- *dst = static_cast(*src);
- return true;
- });
-
- return true;
- }
-};
+inline void copy_value(void *dst, const void *src) {
+ const T2 *src_ptr = static_cast(src);
+ T1 *dst_ptr = static_cast(dst);
+ *dst_ptr = static_cast(*src_ptr);
+}
-template
-struct MultiArrayCopier {
- static bool copy(TypedMultiArray& src, TypedMultiArray& dst) {
- if (src.layout.shape() != dst.layout.shape()) {
- return false;
+template
+void copy(void *dst,
+ MultiArray::DataType dst_data_type,
+ const void *src) {
+ switch (dst_data_type) {
+ case MultiArray::DataType::Bool: {
+ ::copy_value(dst, src);
+ break;
+ }
+
+ case MultiArray::DataType::Byte: {
+ ::copy_value(dst, src);
+ break;
+ }
+
+ case MultiArray::DataType::Char: {
+ ::copy_value(dst, src);
+ break;
+ }
+
+ case MultiArray::DataType::Short: {
+ ::copy_value(dst, src);
+ break;
}
-
- if (src.layout.get_num_elements() == 0) {
- return true;
+
+ case MultiArray::DataType::Int32: {
+ ::copy_value(dst, src);
+ break;
}
-
- if (MultiArrayBNNSCopier::copy(src, dst)) {
- return true;
+
+ case MultiArray::DataType::Int64: {
+ ::copy_value(dst, src);
+ break;
}
-
- if (MultiArrayVImageCopier::copy(src, dst)) {
- return true;
+
+ case MultiArray::DataType::Float16: {
+ ::copy_value<_Float16, T>(dst, src);
+ break;
}
-
- if (MultiArrayVDSPCopier::copy(src, dst)) {
- return true;
+
+ case MultiArray::DataType::Float32: {
+ ::copy_value(dst, src);
+ break;
}
-
- if (MultiArrayMemCopier::copy(src, dst)) {
- return true;
+
+ case MultiArray::DataType::Float64: {
+ ::copy_value(dst, src);
+ break;
}
-
- return MultiArrayLoopingCopier::copy(src, dst);
}
-};
+}
-template
-bool copy(TypedMultiArray& src, MultiArray& dst) {
- const auto& dstLayout = dst.layout();
- switch (dstLayout.dataType()) {
- case MultiArray::DataType::Int: {
- auto dst_array = TypedMultiArray(reinterpret_cast(dst.data()), dstLayout);
- return MultiArrayCopier::copy(src, dst_array);
+void copy(void *dst,
+ MultiArray::DataType dst_data_type,
+ const void *src,
+ MultiArray::DataType src_data_type) {
+ switch (src_data_type) {
+ case MultiArray::DataType::Bool: {
+ ::copy(dst, dst_data_type, src);
+ break;
+ }
+
+ case MultiArray::DataType::Byte: {
+ ::copy(dst, dst_data_type, src);
+ break;
+ }
+
+ case MultiArray::DataType::Char: {
+ ::copy