[AMDGPU] Add target feature for waits before system scope stores. NFC. #164993

jayfoad · 2025-10-24T15:08:46Z

No description provided.

llvmbot · 2025-10-24T15:09:34Z

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/164993.diff

4 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+8-1)
(modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+5)
(modified) llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (+2-1)
(modified) llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll (+39-11)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 1c8383c3a682d..f78a2c0029dc9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1466,6 +1466,12 @@ def FeatureClusters : SubtargetFeature< "clusters",
   "Has clusters of workgroups support"
 >;
 
+def FeatureWaitsBeforeSystemScopeStores
+    : SubtargetFeature<"waits-before-system-scope-stores",
+                       "RequiresWaitsBeforeSystemScopeStores", "true",
+                       "Target requires waits for loads and atomics before "
+                       "system scope stores">;
+
 // Dummy feature used to disable assembler instructions.
 def FeatureDisable : SubtargetFeature<"",
   "FeatureDisable","true",
@@ -2060,7 +2066,8 @@ def FeatureISAVersion12 : FeatureSet<
    FeatureMaxHardClauseLength32,
    Feature1_5xVGPRs,
    FeatureMemoryAtomicFAddF32DenormalSupport,
-   FeatureBVHDualAndBVH8Insts
+   FeatureBVHDualAndBVH8Insts,
+   FeatureWaitsBeforeSystemScopeStores,
    ]>;
 
 def FeatureISAVersion12_50 : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index ac660d5fada79..f377b8aaf1333 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -290,6 +290,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool Has45BitNumRecordsBufferResource = false;
 
   bool HasClusters = false;
+  bool RequiresWaitsBeforeSystemScopeStores = false;
 
   // Dummy feature to use for assembler in tablegen.
   bool FeatureDisable = false;
@@ -1861,6 +1862,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool has45BitNumRecordsBufferResource() const {
     return Has45BitNumRecordsBufferResource;
   }
+
+  bool requiresWaitsBeforeSystemScopeStores() const {
+    return RequiresWaitsBeforeSystemScopeStores;
+  }
 };
 
 class GCNUserSGPRUsageInfo {
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 07264d973648f..d9f51d7b6592a 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -2673,7 +2673,8 @@ bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
   const unsigned Scope = CPol->getImm() & CPol::SCOPE;
 
   // GFX12.0 only: Extra waits needed before system scope stores.
-  if (!ST.hasGFX1250Insts() && !Atomic && Scope == CPol::SCOPE_SYS)
+  if (ST.requiresWaitsBeforeSystemScopeStores() && !Atomic &&
+      Scope == CPol::SCOPE_SYS)
     Changed |= insertWaitsBeforeSystemScopeStore(MI.getIterator());
 
   return Changed;
diff --git a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
index 2d7a91f0cd114..985bcbd6ff4f4 100644
--- a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
+++ b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
@@ -1,22 +1,50 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-GISEL %s
 
 define amdgpu_ps void @intrinsic_store_system_scope(i32 %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-; GFX12-LABEL: intrinsic_store_system_scope:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    buffer_store_b32 v0, v[1:2], s[0:3], s4 idxen offen scope:SCOPE_SYS
-; GFX12-NEXT:    s_endpgm
+; GFX1200-LABEL: intrinsic_store_system_scope:
+; GFX1200:       ; %bb.0:
+; GFX1200-NEXT:    buffer_store_b32 v0, v[1:2], s[0:3], s4 idxen offen scope:SCOPE_SYS
+; GFX1200-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: intrinsic_store_system_scope:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-SDAG-NEXT:    buffer_store_b32 v0, v[2:3], s[0:3], s4 idxen offen scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: intrinsic_store_system_scope:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX1250-GISEL-NEXT:    buffer_store_b32 v0, v[4:5], s[0:3], s4 idxen offen scope:SCOPE_SYS
+; GFX1250-GISEL-NEXT:    s_endpgm
   call void @llvm.amdgcn.struct.buffer.store.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 24)
   ret void
 }
 
 define amdgpu_ps void @generic_store_volatile(i32 %val, ptr addrspace(1) %out) {
-; GFX12-LABEL: generic_store_volatile:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    global_store_b32 v[1:2], v0, off scope:SCOPE_SYS
-; GFX12-NEXT:    s_wait_storecnt 0x0
-; GFX12-NEXT:    s_endpgm
+; GFX1200-LABEL: generic_store_volatile:
+; GFX1200:       ; %bb.0:
+; GFX1200-NEXT:    global_store_b32 v[1:2], v0, off scope:SCOPE_SYS
+; GFX1200-NEXT:    s_wait_storecnt 0x0
+; GFX1200-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: generic_store_volatile:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-SDAG-NEXT:    global_store_b32 v[2:3], v0, off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_storecnt 0x0
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: generic_store_volatile:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX1250-GISEL-NEXT:    global_store_b32 v[4:5], v0, off scope:SCOPE_SYS
+; GFX1250-GISEL-NEXT:    s_wait_storecnt 0x0
+; GFX1250-GISEL-NEXT:    s_endpgm
   store volatile i32 %val, ptr addrspace(1) %out
   ret void
 }

jayfoad · 2025-10-24T15:10:07Z

llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll

@@ -1,22 +1,50 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4


@petar-avramovic this test added in #82996 is strange because it never actually showed any waits being inserted.

Think waits are optimized out in final output of ll test, proper test where waits are inserted is mir version of the same test llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.mir
Iirc test shows how to generate scope:SCOPE_SYS, options are volatile store or intinsic with argument that corresponds to the bitmask for scope

shiltian · 2025-10-24T15:37:28Z

llvm/lib/Target/AMDGPU/AMDGPU.td

 >;

+def FeatureWaitsBeforeSystemScopeStores
+    : SubtargetFeature<"waits-before-system-scope-stores",


nit: would be easier to read if one line per argument.

This was clang-format's doing.

honestly, I don't think clang-format works well with tablegen files. Most of the time I feel it is very ugly after formatting. Lol.

Fair enough. I reformatted it manually more in the style of the other SubtargetFeatures.

I think we need to turn off clang-format for tablegen. I don't think I've ever seen it make a good suggestion

llvm#164993)

[AMDGPU] Add target feature for waits before system scope stores. NFC.

5184375

jayfoad requested review from Pierre-vh and petar-avramovic and removed request for Pierre-vh October 24, 2025 15:09

llvmbot added the backend:AMDGPU label Oct 24, 2025

jayfoad requested review from Pierre-vh and shiltian October 24, 2025 15:09

jayfoad commented Oct 24, 2025

View reviewed changes

shiltian approved these changes Oct 24, 2025

View reviewed changes

Manually reformat AMDGPU.td parts

474970d

Pierre-vh approved these changes Oct 27, 2025

View reviewed changes

jayfoad merged commit 60f20ea into llvm:main Oct 27, 2025
10 checks passed

jayfoad deleted the waits-before-system-scope-stores branch October 27, 2025 10:31

dvbuka pushed a commit to dvbuka/llvm-project that referenced this pull request Oct 27, 2025

[AMDGPU] Add target feature for waits before system scope stores. NFC. (

9978751

llvm#164993)

Lukacma pushed a commit to Lukacma/llvm-project that referenced this pull request Oct 29, 2025

[AMDGPU] Add target feature for waits before system scope stores. NFC. (

a062f8d

llvm#164993)

aokblast pushed a commit to aokblast/llvm-project that referenced this pull request Oct 30, 2025

[AMDGPU] Add target feature for waits before system scope stores. NFC. (

9a93d9a

llvm#164993)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Add target feature for waits before system scope stores. NFC. #164993

[AMDGPU] Add target feature for waits before system scope stores. NFC. #164993

jayfoad commented Oct 24, 2025

Uh oh!

llvmbot commented Oct 24, 2025

Uh oh!

jayfoad Oct 24, 2025

Uh oh!

petar-avramovic Oct 24, 2025

Uh oh!

shiltian Oct 24, 2025

Uh oh!

jayfoad Oct 24, 2025

Uh oh!

shiltian Oct 24, 2025

Uh oh!

jayfoad Oct 24, 2025

Uh oh!

arsenm Oct 26, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants

		@@ -1,22 +1,50 @@
		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4

[AMDGPU] Add target feature for waits before system scope stores. NFC. #164993

[AMDGPU] Add target feature for waits before system scope stores. NFC. #164993

Conversation

jayfoad commented Oct 24, 2025

Uh oh!

llvmbot commented Oct 24, 2025

Uh oh!

jayfoad Oct 24, 2025

Choose a reason for hiding this comment

Uh oh!

petar-avramovic Oct 24, 2025

Choose a reason for hiding this comment

Uh oh!

shiltian Oct 24, 2025

Choose a reason for hiding this comment

Uh oh!

jayfoad Oct 24, 2025

Choose a reason for hiding this comment

Uh oh!

shiltian Oct 24, 2025

Choose a reason for hiding this comment

Uh oh!

jayfoad Oct 24, 2025

Choose a reason for hiding this comment

Uh oh!

arsenm Oct 26, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants