diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 48a851338af..71f98c03196 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -159,7 +159,7 @@ jobs: repo: "google" name: "gemma-3-4b-it" quant: "quantized-int4-weight-only" - # Voxtral Realtime only supports int4-tile-packed on CUDA (offline mode) + # Voxtral Realtime only supports int4-tile-packed on CUDA - model: repo: "mistralai" name: "Voxtral-Mini-4B-Realtime-2602" @@ -197,12 +197,7 @@ jobs: echo "::endgroup::" fi - # Voxtral Realtime uses offline mode for CUDA CI (not streaming) - VR_MODE="" - if [ "${{ matrix.model.name }}" = "Voxtral-Mini-4B-Realtime-2602" ]; then - VR_MODE="vr-offline" - fi - source .ci/scripts/export_model_artifact.sh cuda "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}" "$VR_MODE" + source .ci/scripts/export_model_artifact.sh cuda "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}" test-model-cuda-e2e: name: test-model-cuda-e2e @@ -237,7 +232,7 @@ jobs: repo: "google" name: "gemma-3-4b-it" quant: "quantized-int4-weight-only" - # Voxtral Realtime only supports int4-tile-packed on CUDA (offline mode) + # Voxtral Realtime only supports int4-tile-packed on CUDA - model: repo: "mistralai" name: "Voxtral-Mini-4B-Realtime-2602" @@ -256,12 +251,7 @@ jobs: download-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-${{ matrix.quant }} ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | - # Voxtral Realtime uses offline mode for CUDA CI (not streaming) - VR_MODE="" - if [ "${{ matrix.model.name }}" = "Voxtral-Mini-4B-Realtime-2602" ]; then - VR_MODE="vr-offline" - fi - source .ci/scripts/test_model_e2e.sh cuda "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}" "$VR_MODE" + source .ci/scripts/test_model_e2e.sh cuda "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}" test-cuda-pybind: name: test-cuda-pybind