From 808360ba3b5aec8accaadffd8bf627e4563ef056 Mon Sep 17 00:00:00 2001 From: Guang Yang Date: Mon, 12 May 2025 11:56:29 -0700 Subject: [PATCH] Update CI for HF Optimum models --- .github/workflows/trunk.yml | 78 ++++++++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 10 deletions(-) diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index f393e52aa1d..8d87c65f7a3 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -555,11 +555,11 @@ jobs: strategy: matrix: hf_model_id: [ - google/gemma-2-2b, - Qwen/Qwen2.5-0.5B, + google/gemma-3-1b-it, + Qwen/Qwen3-0.6B, HuggingFaceTB/SmolLM2-135M, meta-llama/Llama-3.2-1B, - allenai/OLMo-1B-hf + allenai/OLMo-1B-hf, ] fail-fast: false with: @@ -569,44 +569,102 @@ jobs: submodules: 'recursive' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} timeout: 90 + upload-artifact: profiling-artifacts-${{ strategy.job-index }} script: | echo "::group::Set up ExecuTorch" # The generic Linux job chooses to use base env, not the one setup by the image CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") conda activate "${CONDA_ENV}" PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake + # Build executor_runner with ETdump enabled + PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \ + -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DEXECUTORCH_ENABLE_LOGGING=1 \ + -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_BUILD_XNNPACK=ON \ + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ + -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_DEVTOOLS=ON \ + -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ + -Bcmake-out . + cmake --build cmake-out -j16 --target install --config Release echo "::endgroup::" echo "::group::Set up Hugging Face" pip install -U "huggingface_hub[cli]" huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN git clone https://github.com/huggingface/optimum-executorch - cd optimum-executorch + pushd optimum-executorch # There is no release yet, for CI stability, always test from the same commit on main - git checkout 577a2b19670e4c643a5c6ecb09bf47b9a699e7c6 + git checkout da80c9e35b3db5c7eea8731b7d660482fb4870a8 pip install .[tests] + popd + + if [ "${{ matrix.hf_model_id }}" == "google/gemma-3-1b-it" ]; then + # Fixes for gemma-3 is not available in the released version + git clone https://github.com/huggingface/transformers.git + pushd transformers + git checkout a57274466f7f72efaa2662d1738cdaf28ae8071f + pip install -e . + popd + fi pip list echo "::endgroup::" - echo "::group::Export and Run ${{ matrix.hf_model_id }}" + echo "::group::Export to ExecuTorch" # Pass matrix variable as environment variable export MODEL_ID="${{ matrix.hf_model_id }}" + export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_8da4w" + pushd optimum-executorch + + optimum-cli export executorch \ + --model ${MODEL_ID} \ + --task text-generation \ + --recipe xnnpack \ + --use_custom_sdpa \ + --output_dir ${OUTPUT_DIR} \ + --qlinear + + ls -FlAGhp ${OUTPUT_DIR} + popd + echo "::endgroup::" + + echo "::group::Inference using python API" + pushd optimum-executorch python -c " import os from optimum.executorch import ExecuTorchModelForCausalLM from transformers import AutoTokenizer model_id = os.getenv('MODEL_ID') - print(f'Loading model: {model_id}') - model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe='xnnpack') - tokenizer = AutoTokenizer.from_pretrained(model_id) + pte_dir = os.getenv('OUTPUT_DIR') + print(f'Loading model {model_id} from {pte_dir}.') + model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir) generated_text = model.text_generation( - tokenizer=tokenizer, + tokenizer=AutoTokenizer.from_pretrained(model_id), prompt='Simply put, the theory of relativity states that', max_seq_len=64 ) print(generated_text) " + popd + echo "::endgroup::" + + echo "::group::Inference using executor_runner with ETDump" + ./cmake-out/executor_runner \ + --model_path ${OUTPUT_DIR}/model.pte \ + --etdump_path ${OUTPUT_DIR}/etdump.etdp + + export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv + mkdir -p $(dirname "$TSV_PATH") + python3 -m devtools.inspector.inspector_cli \ + --etdump_path ${OUTPUT_DIR}/etdump.etdp \ + --tsv_path ${TSV_PATH} + echo "::endgroup::"