Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
370 changes: 370 additions & 0 deletions .github/workflows/mlx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ on:
- .github/workflows/mlx.yml
- backends/mlx/**
- extension/llm/export/**
- extension/audio/**
- examples/models/parakeet/**
- examples/models/voxtral_realtime/**
workflow_dispatch:

permissions: {}
Expand Down Expand Up @@ -104,3 +107,370 @@ jobs:
echo "::error::Too many test failures: $FAILED > $MAX_FAILURES"
exit 1
fi

test-mlx-parakeet:
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
job-name: test-mlx-parakeet
runner: macos-14-xlarge
python-version: "3.12"
submodules: recursive
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
set -eux

echo "::group::Install ExecuTorch"
${CONDA_RUN} python install_executorch.py > /dev/null
echo "::endgroup::"

echo "::group::Install Parakeet requirements"
${CONDA_RUN} pip install -r examples/models/parakeet/install_requirements.txt
echo "::endgroup::"

${CONDA_RUN} pip list

echo "::group::Export Parakeet"
${CONDA_RUN} python -m executorch.examples.models.parakeet.export_parakeet_tdt \
--backend mlx \
--dtype bf16 \
--qlinear_encoder 4w \
--qlinear_encoder_group_size 128 \
--qlinear 4w \
--qlinear_group_size 128 \
--output-dir /tmp/parakeet_mlx
echo "::endgroup::"

echo "::group::Build Parakeet MLX runner"
${CONDA_RUN} make parakeet-mlx
echo "::endgroup::"

echo "::group::Run Parakeet MLX runner"
curl -L https://dldata-public.s3.us-east-2.amazonaws.com/2086-149220-0033.wav -o /tmp/test_audio.wav
OUTPUT=$(./cmake-out/examples/models/parakeet/parakeet_runner \
--model_path /tmp/parakeet_mlx/model.pte \
--audio_path /tmp/test_audio.wav \
--tokenizer_path /tmp/parakeet_mlx/tokenizer.model 2>&1)
echo "Runner output:"
echo "$OUTPUT"
if echo "$OUTPUT" | grep -iq "Phoebe"; then
echo "Success: 'Phoebe' found in output"
else
echo "Failed: Expected 'Phoebe' not found in output"
exit 1
fi
echo "::endgroup::"

test-mlx-voxtral:
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
secrets: inherit
with:
job-name: test-mlx-voxtral
runner: macos-14-xlarge
python-version: "3.12"
submodules: recursive
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
secrets-env: EXECUTORCH_HF_TOKEN
timeout: 90
script: |
set -eux

echo "::group::Install ExecuTorch"
${CONDA_RUN} python install_executorch.py > /dev/null
echo "::endgroup::"

echo "::group::Install Voxtral requirements"
${CONDA_RUN} pip install mistral_common librosa soundfile datasets
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
${CONDA_RUN} pip install "optimum-executorch @ git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}"
echo "::endgroup::"

${CONDA_RUN} pip list

echo "::group::Export Voxtral"
${CONDA_RUN} python -m executorch.backends.mlx.examples.voxtral.export_voxtral_hf \
--output-dir /tmp/voxtral_mlx \
--dtype bf16 \
--qlinear 4w
echo "::endgroup::"

echo "::group::Build Voxtral MLX runner"
${CONDA_RUN} make voxtral-mlx
echo "::endgroup::"

echo "::group::Run Voxtral MLX runner"
curl -L https://huggingface.co/mistralai/Voxtral-Mini-3B-2507/resolve/main/tekken.json -o /tmp/tekken.json
curl -L https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav -o /tmp/test_audio.wav
OUTPUT=$(./cmake-out/examples/models/voxtral/voxtral_runner \
--model_path /tmp/voxtral_mlx/model.pte \
--tokenizer_path /tmp/tekken.json \
--audio_path /tmp/test_audio.wav \
--processor_path /tmp/voxtral_mlx/preprocessor.pte \
--prompt "What is happening in this audio?" \
--temperature 0 2>&1)
echo "Runner output:"
echo "$OUTPUT"
if echo "$OUTPUT" | grep -iq "poem"; then
echo "Success: 'poem' found in output"
else
echo "Failed: Expected 'poem' not found in output"
exit 1
fi
echo "::endgroup::"

test-mlx-voxtral-realtime:
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
secrets: inherit
with:
job-name: test-mlx-voxtral-realtime
runner: macos-14-xlarge
python-version: "3.12"
submodules: recursive
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
secrets-env: EXECUTORCH_HF_TOKEN
timeout: 90
script: |
set -eux

echo "::group::Install ExecuTorch"
${CONDA_RUN} python install_executorch.py > /dev/null
echo "::endgroup::"

echo "::group::Install Voxtral Realtime requirements"
${CONDA_RUN} pip install safetensors
echo "::endgroup::"

${CONDA_RUN} pip list

echo "::group::Download model"
HF_TOKEN=$SECRET_EXECUTORCH_HF_TOKEN ${CONDA_RUN} python -c "from huggingface_hub import snapshot_download; snapshot_download('mistralai/Voxtral-Mini-4B-Realtime-2602')"
MODEL_PATH=$(HF_TOKEN=$SECRET_EXECUTORCH_HF_TOKEN ${CONDA_RUN} python -c "from huggingface_hub import snapshot_download; print(snapshot_download('mistralai/Voxtral-Mini-4B-Realtime-2602'))")
echo "Model path: ${MODEL_PATH}"
echo "::endgroup::"

echo "::group::Export preprocessor"
${CONDA_RUN} python -m executorch.extension.audio.mel_spectrogram \
--feature_size 128 \
--streaming \
--backend mlx \
--output_file /tmp/voxtral_rt_mlx/preprocessor.pte
echo "::endgroup::"

echo "::group::Export Voxtral Realtime (streaming)"
${CONDA_RUN} python -m executorch.examples.models.voxtral_realtime.export_voxtral_rt \
--model-path "${MODEL_PATH}" \
--backend mlx \
--streaming \
--output-dir /tmp/voxtral_rt_mlx \
--qlinear-encoder 4w \
--qlinear 4w \
--qembedding 8w \
--qembedding-group-size 128
echo "::endgroup::"

echo "::group::Build Voxtral Realtime MLX runner"
${CONDA_RUN} make voxtral_realtime-mlx
echo "::endgroup::"

echo "::group::Run Voxtral Realtime MLX runner"
curl -L https://dldata-public.s3.us-east-2.amazonaws.com/2086-149220-0033.wav -o /tmp/test_audio.wav
OUTPUT=$(./cmake-out/examples/models/voxtral_realtime/voxtral_realtime_runner \
--model_path /tmp/voxtral_rt_mlx/model.pte \
--tokenizer_path "${MODEL_PATH}/tekken.json" \
--preprocessor_path /tmp/voxtral_rt_mlx/preprocessor.pte \
--audio_path /tmp/test_audio.wav \
--streaming 2>&1)
echo "Runner output:"
echo "$OUTPUT"
if echo "$OUTPUT" | grep -iq "Phoebe"; then
echo "Success: 'Phoebe' found in output"
else
echo "Failed: Expected 'Phoebe' not found in output"
exit 1
fi
echo "::endgroup::"

test-mlx-whisper:
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
secrets: inherit
with:
job-name: test-mlx-whisper
runner: macos-14-xlarge
python-version: "3.12"
submodules: recursive
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
secrets-env: EXECUTORCH_HF_TOKEN
timeout: 90
script: |
set -eux

echo "::group::Install ExecuTorch and configure MLX build"
${CONDA_RUN} python install_executorch.py > /dev/null
echo "::endgroup::"

echo "::group::Install Whisper requirements"
${CONDA_RUN} pip install -U "huggingface_hub[cli]<1.0"
${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
${CONDA_RUN} pip install transformers soundfile datasets librosa
echo "::endgroup::"

${CONDA_RUN} pip list

echo "::group::Export Whisper"
${CONDA_RUN} python -m executorch.backends.mlx.examples.whisper.export_whisper \
--model-id "openai/whisper-tiny" \
--output-dir /tmp/whisper_mlx \
--dtype bf16 \
--qlinear 4w
echo "::endgroup::"

echo "::group::Run Whisper inference"
OUTPUT=$( ${CONDA_RUN} python -m executorch.backends.mlx.examples.whisper.run_whisper \
--model-dir /tmp/whisper_mlx \
--use-sample-audio 2>&1)
echo "$OUTPUT"
if echo "$OUTPUT" | grep -iq "Mr. Quilter"; then
echo "Success: 'Mr. Quilter' found in transcription"
else
echo "Failed: Expected 'Mr. Quilter' not found in transcription"
exit 1
fi
echo "::endgroup::"


test-mlx-stories110m:
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
job-name: test-mlx-stories110m
runner: macos-14-xlarge
python-version: "3.12"
submodules: recursive
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
set -eux

echo "::group::Install ExecuTorch"
${CONDA_RUN} python install_executorch.py > /dev/null
echo "::endgroup::"

echo "::group::Install Llama requirements"
${CONDA_RUN} sh examples/models/llama/install_requirements.sh
echo "::endgroup::"

${CONDA_RUN} pip list

echo "::group::Build ExecuTorch with MLX delegate"
${CONDA_RUN} cmake --workflow --preset mlx-release
echo "::endgroup::"

echo "::group::Build Llama runner with MLX"
pushd examples/models/llama
${CONDA_RUN} cmake --workflow --preset llama-release
popd
echo "::endgroup::"

echo "::group::Download stories110M artifacts"
curl -Ls "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt" --output stories110M.pt
curl -Ls "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model" --output tokenizer.model
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
echo "::endgroup::"

echo "::group::Create tokenizer.bin"
${CONDA_RUN} python -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
echo "::endgroup::"

echo "::group::Export stories110M with MLX backend via export_llama_lib"
${CONDA_RUN} python -m extension.llm.export.export_llm \
base.checkpoint=stories110M.pt \
base.params=params.json \
model.use_kv_cache=true \
model.dtype_override=fp32 \
backend.mlx.enabled=true \
quantization.qmode=4w \
quantization.group_size=32 \
export.output_name=/tmp/stories110m_mlx.pte
echo "::endgroup::"

echo "::group::Run inference with C++ llama runner"
./cmake-out/examples/models/llama/llama_main \
--model_path=/tmp/stories110m_mlx.pte \
--tokenizer_path=tokenizer.bin \
--prompt="Once upon a time," \
--temperature=0 \
--seq_len=10
echo "::endgroup::"

test-mlx-llm:
strategy:
fail-fast: false
matrix:
model:
- id: "unsloth/Llama-3.2-1B-Instruct"
name: "llama-1b"
- id: "unsloth/Qwen3-0.6B"
name: "qwen3-0.6b"
- id: "unsloth/gemma-3-1b-it"
name: "gemma3-1b"
use-custom: [false, true]
qconfig: ["4w", "nvfp4"]
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
secrets: inherit
with:
job-name: test-mlx-llm-${{ matrix.model.name }}${{ matrix.use-custom && '-custom' || '' }}-${{ matrix.qconfig }}
runner: macos-14-xlarge
python-version: "3.12"
submodules: recursive
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
secrets-env: EXECUTORCH_HF_TOKEN
timeout: 90
script: |
set -eux

MODEL_ID="${{ matrix.model.id }}"
MODEL_NAME="${{ matrix.model.name }}"
USE_CUSTOM="${{ matrix.use-custom }}"
QCONFIG="${{ matrix.qconfig }}"

CUSTOM_ARGS=""
if [ "${USE_CUSTOM}" = "true" ]; then
CUSTOM_ARGS="--use-custom-sdpa --use-custom-kv-cache"
fi

echo "::group::Install ExecuTorch and configure MLX build"
${CONDA_RUN} python install_executorch.py > /dev/null
${CONDA_RUN} cmake --preset mlx-release
echo "::endgroup::"

echo "::group::Install LLM requirements"
${CONDA_RUN} pip install -U "huggingface_hub[cli]<1.0"
${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
${CONDA_RUN} pip install transformers "optimum-executorch @ git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}"
echo "::endgroup::"

${CONDA_RUN} pip list

echo "::group::Export ${MODEL_NAME}"
${CONDA_RUN} python -m executorch.backends.mlx.examples.llm.export_llm_hf \
--model-id "${MODEL_ID}" \
--output /tmp/${MODEL_NAME}.pte \
--qlinear ${QCONFIG} \
--qembedding ${QCONFIG} \
${CUSTOM_ARGS}
echo "::endgroup::"

echo "::group::Run ${MODEL_NAME} inference"
OUTPUT=$(${CONDA_RUN} python -m executorch.backends.mlx.examples.llm.run_llm_hf \
--pte /tmp/${MODEL_NAME}.pte \
--model-id "${MODEL_ID}" \
--prompt "What is the capital of France?" \
--max-new-tokens 50 2>&1)
echo "$OUTPUT"
if echo "$OUTPUT" | grep -iq "Paris"; then
echo "Success: 'Paris' found in output"
else
echo "Failed: Expected 'Paris' not found in output"
exit 1
fi
echo "::endgroup::"
Loading
Loading