From 33055946c2b75c1203a013bbba0c1ce9801147a7 Mon Sep 17 00:00:00 2001 From: Jean-Luc Duprat Date: Mon, 27 Oct 2025 12:06:54 -0700 Subject: [PATCH] FlashAttention Benchmark update FA4 now automatically picks up nvidia-cutlass-dsl from the project requirements. This fixes the failures from last few days where we were installing outdated package. Test output now clearly states system power limit Update Docker image version in workflow --- .github/workflows/flash_attention.yml | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/flash_attention.yml b/.github/workflows/flash_attention.yml index ffdff587..70f54f39 100644 --- a/.github/workflows/flash_attention.yml +++ b/.github/workflows/flash_attention.yml @@ -34,7 +34,7 @@ jobs: - name: Run Flash Attention benchmark in Docker env: - DOCKER_IMAGE: nvcr.io/nvidia/pytorch:25.06-py3 + DOCKER_IMAGE: nvcr.io/nvidia/pytorch:25.09-py3 run: | set -eux @@ -52,21 +52,19 @@ jobs: "${DOCKER_IMAGE}" ) - # Install CuTe DSL - docker exec -t "${container_name}" bash -c " - set -x - echo 'Installing nvidia-cutlass-dsl' - pip install nvidia-cutlass-dsl==4.1.0 - " - # Build and run FlashAttention CuTe DSL docker exec -t "${container_name}" bash -c " set -x pushd fa4 python setup.py install - - echo '

B200 1000W

' >> /tmp/workspace/fa4_output.txt + pip install -e flash_attn/cute/ + nvidia-smi + + echo '

B200' >> /tmp/workspace/fa4_output.txt + nvidia-smi -q -d POWER | grep 'Current Power Limit' | head -1 | cut -d : -f 2 >> /tmp/workspace/fa4_output.txt + echo '

' >> /tmp/workspace/fa4_output.txt + export PYTHONPATH=\$(pwd) python benchmarks/benchmark_attn.py >> /tmp/workspace/fa4_output.txt popd