diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml index 7de308b1a63..c1afbf17e03 100644 --- a/.github/workflows/apple-perf.yml +++ b/.github/workflows/apple-perf.yml @@ -3,6 +3,16 @@ name: apple-perf on: schedule: - cron: 0 1 * * * + pull_request: + paths: + - .github/workflows/apple-perf.yml + - extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 + push: + branches: + - main + paths: + - .github/workflows/apple-perf.yml + - extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 # Note: GitHub has an upper limit of 10 inputs workflow_dispatch: inputs: @@ -25,10 +35,6 @@ on: description: The list of configs used the benchmark required: false type: string - test_spec: - description: The test spec to drive the test on AWS devices - required: false - type: string workflow_call: inputs: models: @@ -50,10 +56,6 @@ on: description: The list of configs used the benchmark required: false type: string - test_spec: - description: The test spec to drive the test on AWS devices - required: false - type: string concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} @@ -74,9 +76,9 @@ jobs: # Separate default values from the workflow dispatch. To ensure defaults are accessible # during scheduled runs and to provide flexibility for different defaults between # on-demand and periodic benchmarking. - CRON_DEFAULT_MODELS: "stories110M,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l" - CRON_DEFAULT_DEVICES: "apple_iphone_15" - CRON_DEFAULT_DELEGATES: "xnnpack,coreml,mps" + CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'stories110M,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l' || 'stories110M' }} + CRON_DEFAULT_DEVICES: apple_iphone_15 + CRON_DEFAULT_DELEGATES: ${{ github.event_name == 'schedule' && 'xnnpack,coreml,mps' || 'xnnpack' }} run: | set -ex MODELS="${{ inputs.models }}" @@ -114,6 +116,41 @@ jobs: echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT + prepare-test-specs: + runs-on: linux.2xlarge + needs: set-parameters + strategy: + matrix: + model: ${{ fromJson(needs.set-parameters.outputs.models) }} + delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }} + fail-fast: false + steps: + - uses: actions/checkout@v3 + + - name: Prepare the spec + shell: bash + working-directory: extension/benchmark/apple/Benchmark + run: | + set -eux + # The model will be exported in the next step to this S3 path + MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip" + # We could write a script to properly use jinja here, but there is only one variable, + # so let's just sed it + sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2 + cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml + # Just print the test spec for debugging + cat default-ios-device-farm-appium-test-spec.yml + + - name: Upload the spec + uses: seemethere/upload-artifact-s3@v5 + with: + s3-bucket: gha-artifacts + s3-prefix: | + ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }} + retention-days: 1 + if-no-files-found: error + path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml + export-models: name: export-models uses: pytorch/test-infra/.github/workflows/macos_job.yml@main @@ -173,10 +210,19 @@ jobs: DELEGATE_CONFIG="mps" fi PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}" + bash .ci/scripts/test_llama.sh \ + -model "${{ matrix.model }}" \ + -build_tool "${BUILD_MODE}" \ + -dtype "${DTYPE}" \ + -mode "${DELEGATE_CONFIG}" \ + -upload "${ARTIFACTS_DIR_NAME}" else PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \ - bash .ci/scripts/test_model.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}" + bash .ci/scripts/test_model.sh \ + "${{ matrix.model }}" \ + "${BUILD_MODE}" \ + "${{ matrix.delegate }}" \ + "${ARTIFACTS_DIR_NAME}" fi echo "::endgroup::" @@ -282,6 +328,7 @@ jobs: if: always() needs: - set-parameters + - prepare-test-specs - upload-benchmark-app - export-models permissions: @@ -307,8 +354,7 @@ jobs: # Uploaded to S3 from the previous job ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.ipa ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.xctestrun.zip - test-spec: ${{ inputs.test_spec || 'https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml' }} - extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip + test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/default-ios-device-farm-appium-test-spec.yml upload-benchmark-results: needs: diff --git a/.github/workflows/upload-apple-test-specs.yml b/.github/workflows/upload-apple-test-specs.yml deleted file mode 100644 index cf5906c6df1..00000000000 --- a/.github/workflows/upload-apple-test-specs.yml +++ /dev/null @@ -1,95 +0,0 @@ -name: Upload AWS Device Farm Apple iOS test specs - -on: - pull_request: - paths: - - .github/workflows/upload-apple-test-specs.yml - - extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml - push: - branches: - - main - paths: - - .github/workflows/upload-apple-test-specs.yml - - extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml - -concurrency: - # NB: This concurency group needs to be different than the one used in apple-perf, otherwise - # GH complains about concurrency deadlock - group: apple-spec-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} - cancel-in-progress: true - -jobs: - upload-apple-test-spec-for-validation: - runs-on: linux.2xlarge - steps: - - uses: actions/checkout@v3 - - - name: Upload the spec as a GitHub artifact for validation - uses: seemethere/upload-artifact-s3@v5 - with: - s3-bucket: gha-artifacts - s3-prefix: | - ${{ github.repository }}/${{ github.run_id }}/artifacts - retention-days: 1 - if-no-files-found: error - path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml - - validate-apple-test-spec: - needs: upload-apple-test-spec-for-validation - uses: ./.github/workflows/apple-perf.yml - secrets: inherit - permissions: - id-token: write - contents: read - with: - # Just use a small model here with a minimal amount of configuration to test the spec - models: stories110M - devices: apple_iphone_15 - delegates: xnnpack - test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/default-ios-device-farm-appium-test-spec.yml - - upload-apple-test-spec: - needs: validate-apple-test-spec - runs-on: ubuntu-22.04 - timeout-minutes: 15 - permissions: - id-token: write - contents: read - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-python@v4 - with: - python-version: '3.11' - cache: pip - - - name: configure aws credentials - uses: aws-actions/configure-aws-credentials@v1.7.0 - with: - role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-ios - aws-region: us-east-1 - - - name: Only push to S3 when running the workflow manually from main branch - if: ${{ github.ref == 'refs/heads/main' }} - shell: bash - run: | - set -eux - echo "UPLOAD_ON_MAIN=1" >> "${GITHUB_ENV}" - - - name: Upload the spec to S3 ossci-ios bucket - shell: bash - working-directory: extension/benchmark/apple/Benchmark/ - env: - SPEC_FILE: default-ios-device-farm-appium-test-spec.yml - run: | - set -eux - - pip install awscli==1.32.18 - - AWS_CMD="aws s3 cp --dryrun" - if [[ "${UPLOAD_ON_MAIN:-0}" == "1" ]]; then - AWS_CMD="aws s3 cp" - fi - - shasum -a 256 "${SPEC_FILE}" - ${AWS_CMD} "${SPEC_FILE}" s3://ossci-ios/executorch/ --acl public-read diff --git a/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml b/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 similarity index 72% rename from extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml rename to extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 index fcb2e7a978c..dc610437fc8 100644 --- a/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml +++ b/extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2 @@ -10,11 +10,22 @@ phases: # The pre-test phase includes commands that setup your test environment. pre_test: commands: + # Download the model from S3 + - curl -s --fail '{{ model_path }}' -o model.zip + - unzip model.zip && ls -la + + # Extract the app + - unzip $DEVICEFARM_APP_PATH -d /tmp + + # Copy the model + - mkdir -p /tmp/Payload/Benchmark.app/aatp/data + - cp *.bin /tmp/Payload/Benchmark.app/aatp/data + - cp *.pte /tmp/Payload/Benchmark.app/aatp/data + - mkdir $DEVICEFARM_TEST_PACKAGE_PATH/Debug-iphoneos - mkdir $DEVICEFARM_TEST_PACKAGE_PATH/Release-iphoneos - - unzip $DEVICEFARM_APP_PATH -d /tmp - - cp -r /tmp/Payload/*.app $DEVICEFARM_TEST_PACKAGE_PATH/Debug-iphoneos/ - - cp -r /tmp/Payload/*.app $DEVICEFARM_TEST_PACKAGE_PATH/Release-iphoneos/ + - cp -r /tmp/Payload/Benchmark.app $DEVICEFARM_TEST_PACKAGE_PATH/Debug-iphoneos/ + - cp -r /tmp/Payload/Benchmark.app $DEVICEFARM_TEST_PACKAGE_PATH/Release-iphoneos/ # The test phase includes commands that run your test suite execution. test: