pytorch
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 27 additions & 1 deletion b/‎.ci/scripts/test_model.sh‎
Lines changed: 27 additions & 1 deletion
diff --git a/‎.ci/scripts/utils.sh‎
Lines changed: 3 additions & 2 deletions b/‎.ci/scripts/utils.sh‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/apple-perf.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/apple-perf.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/lint.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/lint.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 28 additions & 3 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 28 additions & 3 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/__init__.py‎ b/‎backends/__init__.py‎
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/annotate_channels_last_dim_order_pass.py‎
Lines changed: 2 additions & 24 deletions b/‎backends/arm/_passes/annotate_channels_last_dim_order_pass.py‎
Lines changed: 2 additions & 24 deletions
@@ -188,6 +188,14 @@ test_model_with_qnn() {
     EXPORT_SCRIPT=edsr
     # Additional deps for edsr
     pip install piq
+  elif [[ "${MODEL_NAME}" == "albert" ]]; then
+    EXPORT_SCRIPT=albert
+  elif [[ "${MODEL_NAME}" == "bert" ]]; then
+    EXPORT_SCRIPT=bert
+  elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
+    EXPORT_SCRIPT=distilbert
+  elif [[ "${MODEL_NAME}" == "eurobert" ]]; then
+    EXPORT_SCRIPT=eurobert
   else
     echo "Unsupported model $MODEL_NAME"
     exit 1
@@ -197,7 +205,25 @@ test_model_with_qnn() {
   # TODO(guangyang): Make QNN chipset matches the target device
   QNN_CHIPSET=SM8450
 
-  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS
+  SCRIPT_FOLDER=""
+  case "${MODEL_NAME}" in
+    "dl3"|"mv3"|"mv2"|"ic4"|"ic3"|"vit"|"mb"|"w2l")
+        SCRIPT_FOLDER=scripts
+        ;;
+    "albert"|"bert"|"distilbert")
+        pip install evaluate
+        SCRIPT_FOLDER=oss_scripts
+        # Bert models running in 16bit will encounter op validation fail on some operations,
+        # which requires CHIPSET >= SM8550.
+        QNN_CHIPSET=SM8550
+        ;;
+    *)
+        echo "Unsupported model $MODEL_NAME"
+        exit 1
+        ;;
+  esac
+
+  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.${SCRIPT_FOLDER}.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS
   EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
 }
 
 
@@ -156,13 +156,14 @@ build_executorch_runner() {
 }
 
 cmake_install_executorch_lib() {
+  build_type="${1:-Release}"
   echo "Installing libexecutorch.a and libportable_kernels.a"
   clean_executorch_install_folders
   retry cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
-          -DCMAKE_BUILD_TYPE=Release \
+          -DCMAKE_BUILD_TYPE=${build_type} \
           -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
           -Bcmake-out .
-  cmake --build cmake-out -j9 --target install --config Release
+  cmake --build cmake-out -j9 --target install --config ${build_type}
 }
 
 download_stories_model_artifacts() {
 
@@ -342,8 +342,8 @@ jobs:
               git clone https://github.com/huggingface/optimum-executorch
               pushd optimum-executorch
               # There is no release yet, for CI stability, always test from the same commit on main
-              git checkout 1c653dc49812fc431a22312c7295d97005d22e12
-              python install_dev.py
+              git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
+              python install_dev.py --skip_override_torch
               pip list
 
               ARGS=(
 
@@ -347,8 +347,8 @@ jobs:
             git clone https://github.com/huggingface/optimum-executorch
             pushd optimum-executorch
             # There is no release yet, for CI stability, always test from the same commit on main
-            git checkout 1c653dc49812fc431a22312c7295d97005d22e12
-            ${CONDA_RUN} python install_dev.py
+            git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
+            ${CONDA_RUN} python install_dev.py --skip_override_torch
             pip list
 
             ARGS=(
 
@@ -46,7 +46,7 @@ jobs:
         fi
 
         # This has already been cached in the docker image
-        lintrunner init 2> /dev/null
+        lintrunner init
 
         RC=0
         # Run lintrunner on all files
 
@@ -480,6 +480,32 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
 
+  test-qnn-optimum-model:
+    name: test-qnn-optimum-model
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      matrix:
+        dtype: [fp32]
+        model: [albert, bert, distilbert] # eurobert requires transfomer >= 4.48.0, skip for now
+      fail-fast: false
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-qnn-sdk
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 900
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
+
   test-apple-model:
     name: test-apple-model
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -571,9 +597,8 @@ jobs:
         git clone https://github.com/huggingface/optimum-executorch
         pushd optimum-executorch
         # There is no release yet, for CI stability, always test from the same commit on main
-        git checkout 1c653dc49812fc431a22312c7295d97005d22e12
-        pip install .[tests]
-        pip install transformers==4.52.4
+        git checkout 4c3b18f6cca68c5ccff809131d570062723d7188
+        python install_dev.py --skip_override_torch
         popd
         pip list
         echo "::endgroup::"
 
@@ -42,6 +42,9 @@ xcuserdata/
 *.xcworkspace/
 *.xcframework/
 
+# clangd
+.cache/
+
 # misc
 /.vscode/
 *.so
 
@@ -29,6 +29,7 @@
 from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass  # noqa
 from .decompose_linalg_vector_norm_pass import DecomposeLinearVectorNormPass  # noqa
 from .decompose_linear_pass import DecomposeLinearPass  # noqa
+from .decompose_maxpool2d_with_dilation import DecomposeMaxPool2DPass  # noqa
 from .decompose_meandim_pass import DecomposeMeanDimPass  # noqa
 from .decompose_ne_pass import DecomposeNotEqualPass  # noqa
 from .decompose_select import DecomposeSelectPass  # noqa
 
@@ -5,15 +5,12 @@
 
 # pyre-unsafe
 
-from typing import cast
 
 import torch
 from executorch.backends.arm._passes.arm_pass_utils import (
     create_node,
     get_first_fake_tensor,
-    insert_q_dq_pair,
 )
-from executorch.backends.arm.tosa_quant_utils import dq_op, q_op
 from executorch.backends.arm.tosa_utils import is_consumer_node_depthwise_conv2d
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
@@ -59,20 +56,10 @@ class AnnotateChannelsLastDimOrder(ExportPass):
 
     def is_weight_node_for_depthwise_conv2d(self, node: torch.fx.Node):
         """
-        returns True for dq and w in the following sequences;
+        returns True for w in the following sequence;
         w -> depthwise_conv2d -> ...
-        w -> dq -> depthwise_conv2d -> ...
         """
-        if node.op == "call_function":
-            if node.target != dq_op:
-                return False
-            prev_node = node.args[0]
-            if cast(torch.fx.Node, prev_node).op != "placeholder":
-                return False
-            if is_consumer_node_depthwise_conv2d(node):
-                consumer_node = list(node.users)[0]
-                return consumer_node.args[1] == node
-        elif node.op == "placeholder":
+        if node.op == "placeholder":
             # node is an input, weight or bias node
             consumer_node = list(node.users)[0]
             if self.is_weight_node_for_depthwise_conv2d(consumer_node):
@@ -129,8 +116,6 @@ def is_channel_reshape(input_shape, output_shape):
 
     @staticmethod
     def insert_input_transpose(node, input_node, graph_module):
-        quantize = input_node.target == dq_op
-        q_params = input_node.args[1:] if quantize else None
         with graph_module.graph.inserting_before(node):
             permute_node = create_node(
                 graph_module.graph,
@@ -143,8 +128,6 @@ def insert_input_transpose(node, input_node, graph_module):
                         else AnnotateChannelsLastDimOrder.NHWC_inverse_order
                     ),
                 ),
-                quantize=quantize,
-                q_params=q_params,
             )
             node.replace_input_with(input_node, permute_node)
 
@@ -185,11 +168,6 @@ def insert_output_transpose(node, graph_module):
             for user in users:
                 user.replace_input_with(node, permute_node)
 
-            quantize = node.args[0] == q_op
-            if quantize:
-                q_params = node.args[0].args[1:]
-                insert_q_dq_pair(graph_module.graph, node, q_params)
-
     @staticmethod
     def _insert_view_transpose(
         input_shape, output_shape, node, input_node, graph_module