From 33055946c2b75c1203a013bbba0c1ce9801147a7 Mon Sep 17 00:00:00 2001
From: Jean-Luc Duprat <jld@meta.com>
Date: Mon, 27 Oct 2025 12:06:54 -0700
Subject: [PATCH] FlashAttention Benchmark update

FA4 now automatically picks up nvidia-cutlass-dsl from the project requirements.  This fixes the failures from last few days where we were installing outdated package.

Test output now clearly states system power limit

Update Docker image version in workflow
---
 .github/workflows/flash_attention.yml | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/.github/workflows/flash_attention.yml b/.github/workflows/flash_attention.yml
index ffdff587..70f54f39 100644
--- a/.github/workflows/flash_attention.yml
+++ b/.github/workflows/flash_attention.yml
@@ -34,7 +34,7 @@ jobs:
 
       - name: Run Flash Attention benchmark in Docker
         env:
-          DOCKER_IMAGE: nvcr.io/nvidia/pytorch:25.06-py3
+          DOCKER_IMAGE: nvcr.io/nvidia/pytorch:25.09-py3
         run: |
           set -eux
 
@@ -52,21 +52,19 @@ jobs:
             "${DOCKER_IMAGE}"
           )
 
-          # Install CuTe DSL
-          docker exec -t "${container_name}" bash -c "
-            set -x
-            echo 'Installing nvidia-cutlass-dsl'
-            pip install nvidia-cutlass-dsl==4.1.0
-          "
-
           # Build and run FlashAttention CuTe DSL
           docker exec -t "${container_name}" bash -c "
             set -x
             pushd fa4
             python setup.py install
-
-            echo '<h1>B200 1000W</h1>' >> /tmp/workspace/fa4_output.txt
+            pip install -e flash_attn/cute/
+            
             nvidia-smi
+
+            echo '<h1>B200' >> /tmp/workspace/fa4_output.txt
+            nvidia-smi -q -d POWER | grep 'Current Power Limit' | head -1 | cut  -d : -f 2 >> /tmp/workspace/fa4_output.txt
+            echo '</h1>' >> /tmp/workspace/fa4_output.txt
+
             export PYTHONPATH=\$(pwd)
             python benchmarks/benchmark_attn.py >> /tmp/workspace/fa4_output.txt
             popd