diff --git a/.github/workflows/flash_attention.yml b/.github/workflows/flash_attention.yml index 7711806c..bc2cceb0 100644 --- a/.github/workflows/flash_attention.yml +++ b/.github/workflows/flash_attention.yml @@ -15,7 +15,7 @@ on: jobs: benchmark-flash-attn: name: Flash Attention CuTe DSL Benchmark - runs-on: B200 + runs-on: linux.dgx.b200.8 container: # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/ image: nvcr.io/nvidia/pytorch:25.06-py3 @@ -29,7 +29,7 @@ jobs: run: | set -x echo "Installing nvidia-cutlass-dsl" - pip install nvidia-cutlass-dsl==4.1.0.dev0 + pip install nvidia-cutlass-dsl==4.1.0 - name: Buid and Run FlashAttention CuTe DSL run: | set -x @@ -41,4 +41,4 @@ jobs: export PYTHONPATH=$(pwd) python benchmarks/benchmark_attn.py >> $GITHUB_STEP_SUMMARY - popd \ No newline at end of file + popd