Skip to content

Conversation

LU-JOHN
Copy link
Contributor

@LU-JOHN LU-JOHN commented Oct 1, 2025

Fix s_quadmask* instruction description so that it defines SCC.

Signed-off-by: John Lu <John.Lu@amd.com>
@llvmbot
Copy link
Member

llvmbot commented Oct 1, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: None (LU-JOHN)

Changes

Fix s_quadmask* instruction description so that it defines SCC.


Full diff: https://github.com/llvm/llvm-project/pull/161582.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+2)
  • (added) llvm/test/CodeGen/AMDGPU/scc_quadmask.ll (+57)
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index b3fd8c70dd045..84287b621fe78 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -352,10 +352,12 @@ def S_XNOR_SAVEEXEC_B64 : SOP1_64 <"s_xnor_saveexec_b64">;
 
 } // End hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC]
 
+let Defs = [SCC] in {
 def S_QUADMASK_B32 : SOP1_32 <"s_quadmask_b32",
   [(set i32:$sdst, (int_amdgcn_s_quadmask i32:$src0))]>;
 def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64",
   [(set i64:$sdst, (int_amdgcn_s_quadmask i64:$src0))]>;
+}
 
 let Uses = [M0] in {
 def S_MOVRELS_B32 : SOP1_32R <"s_movrels_b32">;
diff --git a/llvm/test/CodeGen/AMDGPU/scc_quadmask.ll b/llvm/test/CodeGen/AMDGPU/scc_quadmask.ll
new file mode 100644
index 0000000000000..34fb0a19f2ab4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/scc_quadmask.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck %s
+;; Ensure that AND/ICMP cannot be fused into an AND because s_quadmask_32 implicitly defines SCC.
+
+define amdgpu_kernel void @quadmask_32(i32 %val0, i32 %val1, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: quadmask_32:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_and_b32 s0, s0, 1
+; CHECK-NEXT:    s_quadmask_b32 s1, s1
+; CHECK-NEXT:    s_cmp_eq_u32 s0, 0
+; CHECK-NEXT:    v_mov_b32_e32 v3, s1
+; CHECK-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; CHECK-NEXT:    global_store_dword v2, v3, s[2:3]
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; CHECK-NEXT:    global_store_dword v[0:1], v2, off
+; CHECK-NEXT:    s_endpgm
+  %and = and i32 %val0, 1
+  %result = call i32 @llvm.amdgcn.s.quadmask.i32(i32 %val1) nounwind readnone
+  store i32 %result, ptr addrspace(1) %ptr
+  %cmp = icmp eq i32 %and, 0
+  %sel = select i1 %cmp, i32 1, i32 0
+  store i32 %sel, ptr addrspace(1) null, align 4
+  ret void
+}
+
+define amdgpu_kernel void @quadmask_64(i32 %val0, i64 %val1, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: quadmask_64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_load_dword s6, s[4:5], 0x0
+; CHECK-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x8
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_and_b32 s4, s6, 1
+; CHECK-NEXT:    s_quadmask_b64 s[0:1], s[0:1]
+; CHECK-NEXT:    v_mov_b32_e32 v0, s0
+; CHECK-NEXT:    v_mov_b32_e32 v1, s1
+; CHECK-NEXT:    s_cmp_eq_u32 s4, 0
+; CHECK-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; CHECK-NEXT:    global_store_dword v[0:1], v2, off
+; CHECK-NEXT:    s_endpgm
+  %and = and i32 %val0, 1
+  %result = call i64 @llvm.amdgcn.s.quadmask.i64(i64 %val1) nounwind readnone
+  store i64 %result, ptr addrspace(1) %ptr
+  %cmp = icmp eq i32 %and, 0
+  %sel = select i1 %cmp, i32 1, i32 0
+  store i32 %sel, ptr addrspace(1) null, align 4
+  ret void
+}

@LU-JOHN
Copy link
Contributor Author

LU-JOHN commented Oct 1, 2025

Prior to this PR the assembly generated for scc_quadmask.ll was:

        s_bitcmp0_b32 s0, 0
        s_quadmask_b32 s0, s1
        v_mov_b32_e32 v3, s0
        s_cselect_b64 s[0:1], -1, 0

@LU-JOHN LU-JOHN requested a review from OutOfCache October 1, 2025 20:48
Copy link
Contributor

@jayfoad jayfoad left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM thanks, just an optional suggestion inline.

@@ -0,0 +1,57 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest just adding tests to the end of llvm.amdgcn.quadmask.ll instead of creating a new file.

Copy link
Contributor Author

@LU-JOHN LU-JOHN Oct 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jayfoad If I move the tests to llvm.amdgcn.quadmask.ll I will need to modify the RUN lines since -global-isel=1 produces:

        s_quadmask_b64 s[0:1], s[0:1]
        s_and_b32 s4, s4, 1
        v_mov_b32_e32 v0, s0
        s_cmp_eq_u32 s4, 0

but -global-isel=0 produces:

        s_and_b32 s4, s6, 1
        s_quadmask_b64 s[0:1], s[0:1]
        s_cmp_eq_u32 s4, 0

Is this okay?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, change them to -check-prefixes=GFX11,GFX11-GISEL and -check-prefixes=GFX11,GFX11-SDAG respectively.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests moved to llvm.amdgcn.quadmask.ll. RUN lines modified.

Signed-off-by: John Lu <John.Lu@amd.com>
@LU-JOHN LU-JOHN merged commit 235cd75 into llvm:main Oct 2, 2025
9 checks passed
mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Oct 3, 2025
Fix s_quadmask* instruction description so that it defines SCC.

---------

Signed-off-by: John Lu <John.Lu@amd.com>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants