pytorch · larryliu0820 · Oct 20, 2025 · Oct 18, 2025 · Oct 19, 2025 · Oct 19, 2025
diff --git a/.ci/docker/ci_commit_pins/optimum-executorch.txt b/.ci/docker/ci_commit_pins/optimum-executorch.txt
@@ -1 +1 @@
-09fdbd0a0639b128f712a4f5202ed42ca4c60957
+e8f76b4295584c4328e7fd7971c131cb341c7438
diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt
@@ -6,7 +6,7 @@ sympy==1.12
 timm==0.6.13
 tomli==2.0.1
 torchsr==1.0.4
-transformers==4.47.1
+transformers==4.56.1
 zstd==1.5.5.1
 pandas>=2.2.2; python_version >= '3.10'
 pytest==7.2.0

diff --git a/.ci/scripts/test_phi_3_mini.sh b/.ci/scripts/test_phi_3_mini.sh
@@ -36,34 +36,33 @@ cmake_build_phi_3_mini() {
   cmake --build ${BUILD_DIR}/${MODEL_DIR} -j${NPROC} --config ${BUILD_TYPE}
 }
 
-# Download and convert tokenizer.model
+# Download tokenizer.model
 prepare_tokenizer() {
-  echo "Downloading and converting tokenizer.model"
-  wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true"
-  $PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
+  echo "Downloading tokenizer.model"
+  wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/tokenizer.model?download=true"
 }
 
 # Export phi-3-mini model to pte
 export_phi_3_mini () {
   echo "Exporting phi-3-mini. This will take a few minutes"
-  $PYTHON_EXECUTABLE -m executorch.examples.models.phi-3-mini.export_phi-3-mini -c "4k" -s 128 -o phi-3-mini.pte
+  optimum-cli export executorch --model microsoft/Phi-3-mini-4k-instruct --task text-generation --recipe xnnpack --output_dir ./
 }
 
 run_and_verify() {
     NOW=$(date +"%H:%M:%S")
     echo "Starting to run phi-3-mini runner at ${NOW}"
-    if [[ ! -f "phi-3-mini.pte" ]]; then
-        echo "Export failed. Abort"
+    if [[ ! -f "model.pte" ]]; then
+        echo "Missing model artifact. Abort"
         exit 1
     fi
-    if [[ ! -f "tokenizer.bin" ]]; then
-        echo "tokenizer.bin is missing."
+    if [[ ! -f "tokenizer.model" ]]; then
+        echo "tokenizer.model is missing."
         exit 1
     fi
 
     ${BUILD_DIR}/${MODEL_DIR}/phi_3_mini_runner \
-    --model_path=phi-3-mini.pte \
-    --tokenizer_path=tokenizer.bin \
+    --model_path=model.pte \
+    --tokenizer_path=tokenizer.model \
     --seq_len=60 \
     --temperature=0 \
     --prompt="<|system|>
@@ -92,7 +91,7 @@ What is the capital of France?<|end|>
 cmake_install_executorch_libraries
 cmake_build_phi_3_mini
 
-# Step 2. Export the tokenizer and model
+# Step 2. Export the model
 prepare_tokenizer
 export_phi_3_mini
 

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -632,11 +632,14 @@ jobs:
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
-
+        echo "::group::Setup ExecuTorch"
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
-
+        echo "::endgroup::"
+
+        echo "::group::Setup requirements"
         # install phi-3-mini requirements
         bash examples/models/phi-3-mini/install_requirements.sh
+        echo "::endgroup::"
 
         # run e2e (export, tokenizer and runner)
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_phi_3_mini.sh Release

@@ -1,24 +1,32 @@
 # Summary
-This example demonstrates how to run a [Phi-3-mini](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) 3.8B model via ExecuTorch. We use XNNPACK to accelarate the performance and XNNPACK symmetric per channel quantization.
+This example demonstrates how to run a [Phi-3-mini](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) 3.8B model via ExecuTorch. We use XNNPACK to accelarate the performance and XNNPACK symmetric per channel quantization.
 
 # Instructions
 ## Step 1: Setup
 1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch. For installation run `./install_executorch.sh`
-2. Currently, we support transformers v4.53.1. Install transformers with the following command:
+2. Currently, we support transformers v4.56.1. Install transformers with the following command:
 ```
-pip uninstall -y transformers ; pip install transformers==4.53.1
+pip uninstall -y transformers ; pip install transformers==4.56.1
 ```
+3. Install `optimum-executorch`:
+
+```
+OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
+pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
+```
+
 ## Step 2: Prepare and run the model
-1. Download the `tokenizer.model` from HuggingFace and create `tokenizer.bin`.
+1. Download the `tokenizer.model` from HuggingFace.
 ```
 cd executorch
-wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true"
-python -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
+wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/tokenizer.model?download=true"
 ```
 2. Export the model. This step will take a few minutes to finish.
 ```
-python -m executorch.examples.models.phi-3-mini.export_phi-3-mini -c "4k" -s 128 -o phi-3-mini.pte
+optimum-cli export executorch --model microsoft/Phi-3-mini-4k-instruct --task text-generation --recipe xnnpack --qlinear 8da4w --qembedding 8w --output_dir ./
 ```
+The model artifact `model.pte` size is about 2.0GB.
+
 3. Build and run the model.
 - Build executorch with LLM preset:
 ```
@@ -38,9 +46,9 @@ cmake --build cmake-out/examples/models/phi-3-mini -j16 --config Release
 - Run model. Options available [here](https://github.com/pytorch/executorch/blob/main/examples/models/phi-3-mini/main.cpp#L16-L33)
 ```
 cmake-out/examples/models/phi-3-mini/phi_3_mini_runner \
-    --model_path=phi-3-mini.pte \
-    --tokenizer_path=tokenizer.bin \
-    --seq_len=128 \
+    --model_path=model.pte \
+    --tokenizer_path=tokenizer.model \
+    --seq_len=60 \
     --temperature=0 \
     --prompt="<|system|>
 You are a helpful assistant.<|end|>
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		09fdbd0a0639b128f712a4f5202ed42ca4c60957
		e8f76b4295584c4328e7fd7971c131cb341c7438