From dd34d3586b3fb00c85691419d9192dcdf3fcc81a Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Fri, 17 Oct 2025 17:19:20 -0700 Subject: [PATCH 1/8] Bump transformers pin to 4.56.1 --- .ci/docker/requirements-ci.txt | 2 +- requirements-examples.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt index 4d2fb63122f..d16b91cc7a3 100644 --- a/.ci/docker/requirements-ci.txt +++ b/.ci/docker/requirements-ci.txt @@ -6,7 +6,7 @@ sympy==1.12 timm==0.6.13 tomli==2.0.1 torchsr==1.0.4 -transformers==4.47.1 +transformers==4.56.1 zstd==1.5.5.1 pandas>=2.2.2; python_version >= '3.10' pytest==7.2.0 diff --git a/requirements-examples.txt b/requirements-examples.txt index 0923cf8fefc..368159f96e9 100644 --- a/requirements-examples.txt +++ b/requirements-examples.txt @@ -4,4 +4,4 @@ datasets == 3.6.0 # 4.0.0 deprecates trust_remote_code and load scripts. For now timm == 1.0.7 torchsr == 1.0.4 torchtune >= 0.6.1 -transformers == 4.53.1 +transformers == 4.56.1 From 496de652691bd6d72c550025bd0a84c51aa6406f Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Sun, 19 Oct 2025 11:59:09 -0700 Subject: [PATCH 2/8] Remove unnecessary phi-3-mini export code --- .ci/scripts/test_phi_3_mini.sh | 23 ++- .github/workflows/pull.yml | 7 +- examples/models/phi-3-mini/README.md | 28 +-- examples/models/phi-3-mini/eager.py | 118 ------------ .../models/phi-3-mini/export_phi-3-mini.py | 168 ------------------ .../models/phi-3-mini/install_requirements.sh | 5 + examples/models/phi-3-mini/main.cpp | 5 +- examples/models/phi-3-mini/phi_3_mini.py | 41 ----- examples/models/phi-3-mini/static_cache.py | 43 ----- 9 files changed, 41 insertions(+), 397 deletions(-) delete mode 100644 examples/models/phi-3-mini/eager.py delete mode 100644 examples/models/phi-3-mini/export_phi-3-mini.py delete mode 100644 examples/models/phi-3-mini/phi_3_mini.py delete mode 100644 examples/models/phi-3-mini/static_cache.py diff --git a/.ci/scripts/test_phi_3_mini.sh b/.ci/scripts/test_phi_3_mini.sh index 289263ace37..24ba4e0a1b5 100644 --- a/.ci/scripts/test_phi_3_mini.sh +++ b/.ci/scripts/test_phi_3_mini.sh @@ -36,34 +36,33 @@ cmake_build_phi_3_mini() { cmake --build ${BUILD_DIR}/${MODEL_DIR} -j${NPROC} --config ${BUILD_TYPE} } -# Download and convert tokenizer.model +# Download tokenizer.model prepare_tokenizer() { - echo "Downloading and converting tokenizer.model" - wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true" - $PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin + echo "Downloading tokenizer.model" + wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/tokenizer.model?download=true" } # Export phi-3-mini model to pte export_phi_3_mini () { echo "Exporting phi-3-mini. This will take a few minutes" - $PYTHON_EXECUTABLE -m executorch.examples.models.phi-3-mini.export_phi-3-mini -c "4k" -s 128 -o phi-3-mini.pte + optimum-cli export executorch --model microsoft/Phi-3-mini-4k-instruct --task text-generation --recipe xnnpack --output_dir ./ } run_and_verify() { NOW=$(date +"%H:%M:%S") echo "Starting to run phi-3-mini runner at ${NOW}" - if [[ ! -f "phi-3-mini.pte" ]]; then - echo "Export failed. Abort" + if [[ ! -f "model.pte" ]]; then + echo "Missing model artifact. Abort" exit 1 fi - if [[ ! -f "tokenizer.bin" ]]; then - echo "tokenizer.bin is missing." + if [[ ! -f "tokenizer.model" ]]; then + echo "tokenizer.model is missing." exit 1 fi ${BUILD_DIR}/${MODEL_DIR}/phi_3_mini_runner \ - --model_path=phi-3-mini.pte \ - --tokenizer_path=tokenizer.bin \ + --model_path=model.pte \ + --tokenizer_path=tokenizer.model \ --seq_len=60 \ --temperature=0 \ --prompt="<|system|> @@ -92,7 +91,7 @@ What is the capital of France?<|end|> cmake_install_executorch_libraries cmake_build_phi_3_mini -# Step 2. Export the tokenizer and model +# Step 2. Export the model prepare_tokenizer export_phi_3_mini diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index c96b85740bc..6f4afe3e52b 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -632,11 +632,14 @@ jobs: # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" - + echo "::group::Setup ExecuTorch" PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake" - + echo "::endgroup::" + + echo "::group::Setup requirements" # install phi-3-mini requirements bash examples/models/phi-3-mini/install_requirements.sh + echo "::endgroup::" # run e2e (export, tokenizer and runner) PYTHON_EXECUTABLE=python bash .ci/scripts/test_phi_3_mini.sh Release diff --git a/examples/models/phi-3-mini/README.md b/examples/models/phi-3-mini/README.md index b1a78f26954..86160e0b39a 100644 --- a/examples/models/phi-3-mini/README.md +++ b/examples/models/phi-3-mini/README.md @@ -1,24 +1,32 @@ # Summary -This example demonstrates how to run a [Phi-3-mini](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) 3.8B model via ExecuTorch. We use XNNPACK to accelarate the performance and XNNPACK symmetric per channel quantization. +This example demonstrates how to run a [Phi-3-mini](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) 3.8B model via ExecuTorch. We use XNNPACK to accelarate the performance and XNNPACK symmetric per channel quantization. # Instructions ## Step 1: Setup 1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch. For installation run `./install_executorch.sh` -2. Currently, we support transformers v4.53.1. Install transformers with the following command: +2. Currently, we support transformers v4.56.1. Install transformers with the following command: ``` -pip uninstall -y transformers ; pip install transformers==4.53.1 +pip uninstall -y transformers ; pip install transformers==4.56.1 ``` +3. Install `optimum-executorch`: + +``` +OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) +pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} +``` + ## Step 2: Prepare and run the model -1. Download the `tokenizer.model` from HuggingFace and create `tokenizer.bin`. +1. Download the `tokenizer.model` from HuggingFace. ``` cd executorch -wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true" -python -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin +wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/tokenizer.model?download=true" ``` 2. Export the model. This step will take a few minutes to finish. ``` -python -m executorch.examples.models.phi-3-mini.export_phi-3-mini -c "4k" -s 128 -o phi-3-mini.pte +optimum-cli export executorch --model microsoft/Phi-3-mini-4k-instruct --task text-generation --recipe xnnpack --qlinear 8da4w --qembedding 8w --output_dir ./ ``` +The model artifact `model.pte` size is about 2.0GB. + 3. Build and run the model. - Build executorch with LLM preset: ``` @@ -38,9 +46,9 @@ cmake --build cmake-out/examples/models/phi-3-mini -j16 --config Release - Run model. Options available [here](https://github.com/pytorch/executorch/blob/main/examples/models/phi-3-mini/main.cpp#L16-L33) ``` cmake-out/examples/models/phi-3-mini/phi_3_mini_runner \ - --model_path=phi-3-mini.pte \ - --tokenizer_path=tokenizer.bin \ - --seq_len=128 \ + --model_path=model.pte \ + --tokenizer_path=tokenizer.model \ + --seq_len=60 \ --temperature=0 \ --prompt="<|system|> You are a helpful assistant.<|end|> diff --git a/examples/models/phi-3-mini/eager.py b/examples/models/phi-3-mini/eager.py deleted file mode 100644 index 8b57b5a24c9..00000000000 --- a/examples/models/phi-3-mini/eager.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -# Script to run phi-3-mini model in eager mode. - -import argparse -import time - -import torch - -from transformers import AutoTokenizer, Phi3ForCausalLM - -from .phi_3_mini import Phi3Mini - -end_of_text_token = 32000 - - -def _generate_token(args, model, prompt_tokens): - current_token = 0 - generated_tokens = [] - - print("Generating tokens:", end="", flush=True) - - while current_token != end_of_text_token and len(generated_tokens) < args.seq_len: - outputs = model.forward(input_ids=prompt_tokens) - current_token = torch.argmax(outputs.logits[:, -1, :], dim=-1).item() - print(f" {current_token}", end="", flush=True) - generated_tokens.append(current_token) - prompt_tokens = torch.cat( - [prompt_tokens, torch.tensor([[current_token]], dtype=torch.long)], dim=-1 - ) - - print("", flush=True) - - return generated_tokens - - -def _generate_token_with_kv_cache(args, model, prompt_tokens): - print("Generating tokens:", end="", flush=True) - - model = Phi3Mini(model, 1, args.seq_len + prompt_tokens.shape[-1]) - result = model.forward(input_ids=prompt_tokens) - - current_token = torch.argmax(result, dim=-1).item() - print(f" {current_token}", end="", flush=True) - generated_tokens = [current_token] - - while current_token != end_of_text_token and len(generated_tokens) < args.seq_len: - result = model.forward( - input_ids=torch.tensor([[current_token]], dtype=torch.long), - ) - current_token = torch.argmax(result, dim=-1).item() - print(f" {current_token}", end="", flush=True) - generated_tokens.append(current_token) - - print("", flush=True) - - return generated_tokens - - -def main(args): - seed = 42 - torch.manual_seed(seed) - model_name = "microsoft/Phi-3-mini-4k-instruct" - model = Phi3ForCausalLM.from_pretrained(model_name) - tokenizer = AutoTokenizer.from_pretrained(model_name) - - tokens = tokenizer.encode(args.prompt, return_tensors="pt") - - start = time.time() - generated_tokens = ( - _generate_token_with_kv_cache(args, model, tokens) - if args.use_kv_cache - else _generate_token(args, model, tokens) - ) - end = time.time() - - print( - "Generated response: \n {}".format( - tokenizer.decode( - generated_tokens, - skip_special_tokens=True, - clean_up_tokenization_spaces=False, - ) - ), - flush=True, - ) - print(f"Time spent: {end - start}", flush=True) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-s", - "--seq_len", - type=int, - default=128, - help="Maximum number of tokens to generate", - ) - parser.add_argument( - "-kv", - "--use_kv_cache", - default=False, - action="store_true", - help="Whether or not to use KV cache", - ) - parser.add_argument( - "-p", - "--prompt", - type=str, - default="Tell me a story", - help="Prompt as input for the model", - ) - main(parser.parse_args()) diff --git a/examples/models/phi-3-mini/export_phi-3-mini.py b/examples/models/phi-3-mini/export_phi-3-mini.py deleted file mode 100644 index 017c15f783e..00000000000 --- a/examples/models/phi-3-mini/export_phi-3-mini.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import argparse - -import torch - -from executorch.backends.transforms.duplicate_dynamic_quant_chain import ( - DuplicateDynamicQuantChainPass, -) -from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner - -from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import ( - get_symmetric_quantization_config, - XNNPACKQuantizer, -) -from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config -from executorch.exir import to_edge_transform_and_lower -from executorch.exir.capture._config import ExecutorchBackendConfig -from executorch.exir.passes import MemoryPlanningPass -from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass -from torch.export import export as torch_export -from torch.nn.attention import SDPBackend -from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e - -from transformers import Phi3ForCausalLM -from transformers.cache_utils import StaticCacheConfig - -from transformers.integrations.executorch import TorchExportableModuleForDecoderOnlyLM - - -def _prepare_export_inputs(max_seq_len: int, sliding_window: int): - """ - Prepare example inputs and configurations for export. - - Returns: - example_input_ids (torch.Tensor): Example input IDs tensor. - example_cache_position (torch.Tensor): Example cache position tensor. - dynamic_shapes (dict or None): Dynamic shape specifications for export. - strict (bool): Whether to use strict export mode. - """ - # Prepare inputs with dynamic shapes - seq_length = 3 # Sequence length > 1 to avoid specialization issues - example_input_ids = torch.zeros((1, seq_length), dtype=torch.long) - example_cache_position = torch.arange(seq_length, dtype=torch.long) - max_dim = min(max_seq_len, sliding_window) - 1 - seq_len_dim = torch.export.Dim("seq_length_dim", max=max_dim) - dynamic_shapes = { - "input_ids": {1: seq_len_dim}, - "cache_position": {0: seq_len_dim}, - } - - return example_input_ids, example_cache_position, dynamic_shapes - - -def export(args) -> None: - torch.manual_seed(0) - - if args.context_length == "4k": - model_name = "microsoft/Phi-3-mini-4k-instruct" - elif args.context_length == "128k": - model_name = "microsoft/Phi-3-mini-128k-instruct" - else: - raise Exception( - f"Invalid context length {args.context_length}. Should be either 4k or 128k" - ) - - with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad(): - model = Phi3ForCausalLM.from_pretrained(model_name) - model.generation_config.cache_implementation = "static" - model.generation_config.cache_config = StaticCacheConfig( - batch_size=1, max_cache_len=model.config.max_position_embeddings - ) - - exportable_module = TorchExportableModuleForDecoderOnlyLM( - model, - max_batch_size=1, - max_cache_len=model.config.max_position_embeddings, - ) - input_ids, cache_position, dynamic_shapes = _prepare_export_inputs( - model.config.max_position_embeddings, model.config.sliding_window - ) - example_inputs = (input_ids, cache_position) - exported_program = exportable_module.export( - input_ids, cache_position, dynamic_shapes, strict=False - ) - # Apply RemoveTransposes pass to remove - # any back-to-back transpose ops that are not needed - # e.g. output of update_cache is transposed and - # input to custom_sdpa is transposed. - from executorch.extension.llm.export.export_passes import ( - RemoveRedundantTransposes, - ) - - mutated_gm = RemoveRedundantTransposes()(exported_program.module())[0] - - xnnpack_quant_config = get_symmetric_quantization_config( - is_per_channel=True, is_dynamic=True - ) - xnnpack_quantizer = XNNPACKQuantizer() - xnnpack_quantizer.set_global(xnnpack_quant_config) - - gm = prepare_pt2e(mutated_gm, xnnpack_quantizer) # pyre-fixme[6] - gm(*example_inputs) - gm = convert_pt2e(gm) - DuplicateDynamicQuantChainPass()(gm) - exported_program = torch_export( - gm, example_inputs, dynamic_shapes=dynamic_shapes, strict=False - ) - - edge_config = get_xnnpack_edge_compile_config() - edge_manager = to_edge_transform_and_lower( - exported_program, - partitioner=[XnnpackPartitioner()], - compile_config=edge_config, - constant_methods={ - "get_eos_ids": [32000], - "use_kv_cache": True, - "enable_dynamic_shape": True, - "get_max_seq_len": model.config.max_position_embeddings - 1, - }, - ) - edge_manager = edge_manager.to_backend(XnnpackPartitioner()) - et_program = edge_manager.to_executorch( - ExecutorchBackendConfig( - extract_delegate_segments=True, - do_quant_fusion_and_const_prop=True, - memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False), - sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(), - ) - ) - - with open(args.output_name, "wb") as file: - file.write(et_program.buffer) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - "-c", - "--context_length", - type=str, - default="4k", - choices=["4k", "128k"], - help="Phi-3-mini provides two context length variants: 4k and 128k", - ) - parser.add_argument( - "-s", - "--seq_len", - type=int, - default=128, - help="Maximum number of tokens including prompt to generate", - ) - parser.add_argument( - "-o", - "--output_name", - default="phi-3-mini.pte", - help="Override the output filename of the saved pte model file.", - ) - export(parser.parse_args()) - - -if __name__ == "__main__": - main() diff --git a/examples/models/phi-3-mini/install_requirements.sh b/examples/models/phi-3-mini/install_requirements.sh index dabeab2ba66..92f91e8a58d 100644 --- a/examples/models/phi-3-mini/install_requirements.sh +++ b/examples/models/phi-3-mini/install_requirements.sh @@ -9,4 +9,9 @@ set -x pip install sentencepiece +EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)" + +OPTIMUM_ET_VERSION=$(cat "${EXECUTORCH_ROOT}/.ci/docker/ci_commit_pins/optimum-executorch.txt") +pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} + pip list diff --git a/examples/models/phi-3-mini/main.cpp b/examples/models/phi-3-mini/main.cpp index cc500511624..a644fe2f81b 100644 --- a/examples/models/phi-3-mini/main.cpp +++ b/examples/models/phi-3-mini/main.cpp @@ -6,9 +6,9 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include -#include #include using executorch::extension::llm::TextLLMRunner; @@ -46,8 +46,7 @@ int main(int32_t argc, char** argv) { int32_t seq_len = FLAGS_seq_len; std::unique_ptr tokenizer = - std::make_unique(); - tokenizer->load(tokenizer_path); + executorch::extension::llm::load_tokenizer(tokenizer_path); auto runner = executorch::extension::llm::create_text_llm_runner( model_path, std::move(tokenizer)); diff --git a/examples/models/phi-3-mini/phi_3_mini.py b/examples/models/phi-3-mini/phi_3_mini.py deleted file mode 100644 index f355beb882a..00000000000 --- a/examples/models/phi-3-mini/phi_3_mini.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -import torch.nn -from transformers import Phi3ForCausalLM - -from .static_cache import ETStaticCache - - -class Phi3Mini(torch.nn.Module): - - def __init__(self, model: Phi3ForCausalLM, max_batch_size: int, max_seq_len: int): - super().__init__() - self.model = model - self.cache = ETStaticCache( - # pyre-fixme[16]: `Phi3ForCausalLM` has no attribute `config`. - config=model.config, - max_batch_size=max_batch_size, - max_cache_len=max_seq_len, - # pyre-fixme[16]: `Phi3ForCausalLM` has no attribute `device`. - device=self.model.device, - # pyre-fixme[16]: `Phi3ForCausalLM` has no attribute `dtype`. - dtype=self.model.dtype, - ) - - def forward( - self, - # pyre-fixme[9]: input_ids has type `LongTensor`; used as `None`. - input_ids: torch.LongTensor, - ) -> torch.FloatTensor: - # pyre-fixme[16]: `Phi3ForCausalLM` has no attribute `forward`. - return self.model.forward( - input_ids=input_ids, - use_cache=True, - return_dict=True, - past_key_values=self.cache, - ).logits[:, -1, :] diff --git a/examples/models/phi-3-mini/static_cache.py b/examples/models/phi-3-mini/static_cache.py deleted file mode 100644 index baf66ac2d17..00000000000 --- a/examples/models/phi-3-mini/static_cache.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - -from typing import Optional - -import torch -from transformers import PretrainedConfig, StaticCache - - -class ETStaticCache(StaticCache): - """ - A customized static cache implementation, which overrides a few methods to make it exportable to ExecuTorch. - This can be removed once transformers supports static cache for Phi3 properly. - """ - - def __init__( - self, - config: PretrainedConfig, - max_batch_size: int, - max_cache_len: int, - device, - dtype=torch.float32, - ) -> None: - super().__init__( - config=config, - max_batch_size=max_batch_size, - max_cache_len=max_cache_len, - device=device, - dtype=dtype, - ) - - def get_seq_length(self, layer_idx: Optional[int] = 0) -> int: - # pyre-fixme[16]: `ETStaticCache` has no attribute `key_cache`. - return (self.key_cache[layer_idx][0, 0].any(dim=-1)).sum().item() - - def get_usable_length( - self, new_seq_length: int, layer_idx: Optional[int] = 0 - ) -> int: - return self.get_seq_length(layer_idx) From bbd12e6590e60a73e3708eb365d6c227fc85d479 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Sun, 19 Oct 2025 13:43:53 -0700 Subject: [PATCH 3/8] Fix --- examples/models/phi-3-mini/install_requirements.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/models/phi-3-mini/install_requirements.sh b/examples/models/phi-3-mini/install_requirements.sh index 92f91e8a58d..696b21a247f 100644 --- a/examples/models/phi-3-mini/install_requirements.sh +++ b/examples/models/phi-3-mini/install_requirements.sh @@ -9,7 +9,7 @@ set -x pip install sentencepiece -EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)" +EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../../.." && pwd)" OPTIMUM_ET_VERSION=$(cat "${EXECUTORCH_ROOT}/.ci/docker/ci_commit_pins/optimum-executorch.txt") pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} From f1edb82a69c4327196a96cd25484d11eaddeb8c7 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Sun, 19 Oct 2025 14:22:42 -0700 Subject: [PATCH 4/8] Bump optimum-executorch --- .ci/docker/ci_commit_pins/optimum-executorch.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/docker/ci_commit_pins/optimum-executorch.txt b/.ci/docker/ci_commit_pins/optimum-executorch.txt index 3c085a7ef3a..574ccb745d0 100644 --- a/.ci/docker/ci_commit_pins/optimum-executorch.txt +++ b/.ci/docker/ci_commit_pins/optimum-executorch.txt @@ -1 +1 @@ -09fdbd0a0639b128f712a4f5202ed42ca4c60957 +467660923a5a25e4718e1d6697b93ff1bab4e807 From fefd006c9bf5f8b07e0b8f63a5cc17b8ae3b9e55 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Sun, 19 Oct 2025 15:33:50 -0700 Subject: [PATCH 5/8] Bump optimum-executorch --- .ci/docker/ci_commit_pins/optimum-executorch.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/docker/ci_commit_pins/optimum-executorch.txt b/.ci/docker/ci_commit_pins/optimum-executorch.txt index 574ccb745d0..e42ee83cab3 100644 --- a/.ci/docker/ci_commit_pins/optimum-executorch.txt +++ b/.ci/docker/ci_commit_pins/optimum-executorch.txt @@ -1 +1 @@ -467660923a5a25e4718e1d6697b93ff1bab4e807 +e8f76b4295584c4328e7fd7971c131cb341c7438 From fe82a7a2022794fd8064a917806f5b2bdef6c0f3 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Sun, 19 Oct 2025 16:16:58 -0700 Subject: [PATCH 6/8] install accelerate --- examples/models/phi-3-mini/install_requirements.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/models/phi-3-mini/install_requirements.sh b/examples/models/phi-3-mini/install_requirements.sh index 696b21a247f..731a71a75d4 100644 --- a/examples/models/phi-3-mini/install_requirements.sh +++ b/examples/models/phi-3-mini/install_requirements.sh @@ -7,7 +7,7 @@ set -x -pip install sentencepiece +pip install sentencepiece accelerate EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../../.." && pwd)" From 4169cd2a5d2ea4bc8aaaff02f47d4e37ac45ab97 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Sun, 19 Oct 2025 16:47:37 -0700 Subject: [PATCH 7/8] Fix NXP test --- backends/nxp/tests/test_batch_norm_fusion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/nxp/tests/test_batch_norm_fusion.py b/backends/nxp/tests/test_batch_norm_fusion.py index 788d04c6dad..21e70fdbfbe 100644 --- a/backends/nxp/tests/test_batch_norm_fusion.py +++ b/backends/nxp/tests/test_batch_norm_fusion.py @@ -105,7 +105,7 @@ def test_batch_norm_conv_fusing(bias: bool, input_shape: list[int]): og_nodes = list(program.graph.nodes) transformed_nodes = list(graph_module_out.graph.nodes) - assert len(og_nodes) == (11 if bias else 10) + assert len(og_nodes) == (10 if bias else 9) assert og_nodes[9 if bias else 8].target.__name__ == "batch_norm.default" assert len(transformed_nodes) == 5 @@ -139,7 +139,7 @@ def test_batch_norm_linear_fusing(bias: bool): og_nodes = list(og_module.graph.nodes) transformed_nodes = list(graph_module_out.graph.nodes) - assert len(og_nodes) == (11 if bias else 10) + assert len(og_nodes) == (10 if bias else 9) assert og_nodes[8 if bias else 7].target.__name__ == "linear.default" assert len(transformed_nodes) == 5 From 963ee234ab2d4af1491c280e90b555d01e200610 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Sun, 19 Oct 2025 17:16:20 -0700 Subject: [PATCH 8/8] Revert nxp changes --- backends/nxp/tests/test_batch_norm_fusion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/nxp/tests/test_batch_norm_fusion.py b/backends/nxp/tests/test_batch_norm_fusion.py index 21e70fdbfbe..788d04c6dad 100644 --- a/backends/nxp/tests/test_batch_norm_fusion.py +++ b/backends/nxp/tests/test_batch_norm_fusion.py @@ -105,7 +105,7 @@ def test_batch_norm_conv_fusing(bias: bool, input_shape: list[int]): og_nodes = list(program.graph.nodes) transformed_nodes = list(graph_module_out.graph.nodes) - assert len(og_nodes) == (10 if bias else 9) + assert len(og_nodes) == (11 if bias else 10) assert og_nodes[9 if bias else 8].target.__name__ == "batch_norm.default" assert len(transformed_nodes) == 5 @@ -139,7 +139,7 @@ def test_batch_norm_linear_fusing(bias: bool): og_nodes = list(og_module.graph.nodes) transformed_nodes = list(graph_module_out.graph.nodes) - assert len(og_nodes) == (10 if bias else 9) + assert len(og_nodes) == (11 if bias else 10) assert og_nodes[8 if bias else 7].target.__name__ == "linear.default" assert len(transformed_nodes) == 5