-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU] s_quadmask* implicitly defines SCC #161582
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Signed-off-by: John Lu <John.Lu@amd.com>
@llvm/pr-subscribers-backend-amdgpu Author: None (LU-JOHN) ChangesFix s_quadmask* instruction description so that it defines SCC. Full diff: https://github.com/llvm/llvm-project/pull/161582.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index b3fd8c70dd045..84287b621fe78 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -352,10 +352,12 @@ def S_XNOR_SAVEEXEC_B64 : SOP1_64 <"s_xnor_saveexec_b64">;
} // End hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC]
+let Defs = [SCC] in {
def S_QUADMASK_B32 : SOP1_32 <"s_quadmask_b32",
[(set i32:$sdst, (int_amdgcn_s_quadmask i32:$src0))]>;
def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64",
[(set i64:$sdst, (int_amdgcn_s_quadmask i64:$src0))]>;
+}
let Uses = [M0] in {
def S_MOVRELS_B32 : SOP1_32R <"s_movrels_b32">;
diff --git a/llvm/test/CodeGen/AMDGPU/scc_quadmask.ll b/llvm/test/CodeGen/AMDGPU/scc_quadmask.ll
new file mode 100644
index 0000000000000..34fb0a19f2ab4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/scc_quadmask.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck %s
+;; Ensure that AND/ICMP cannot be fused into an AND because s_quadmask_32 implicitly defines SCC.
+
+define amdgpu_kernel void @quadmask_32(i32 %val0, i32 %val1, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: quadmask_32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_and_b32 s0, s0, 1
+; CHECK-NEXT: s_quadmask_b32 s1, s1
+; CHECK-NEXT: s_cmp_eq_u32 s0, 0
+; CHECK-NEXT: v_mov_b32_e32 v3, s1
+; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
+; CHECK-NEXT: global_store_dword v2, v3, s[2:3]
+; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; CHECK-NEXT: global_store_dword v[0:1], v2, off
+; CHECK-NEXT: s_endpgm
+ %and = and i32 %val0, 1
+ %result = call i32 @llvm.amdgcn.s.quadmask.i32(i32 %val1) nounwind readnone
+ store i32 %result, ptr addrspace(1) %ptr
+ %cmp = icmp eq i32 %and, 0
+ %sel = select i1 %cmp, i32 1, i32 0
+ store i32 %sel, ptr addrspace(1) null, align 4
+ ret void
+}
+
+define amdgpu_kernel void @quadmask_64(i32 %val0, i64 %val1, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: quadmask_64:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_load_dword s6, s[4:5], 0x0
+; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x8
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_and_b32 s4, s6, 1
+; CHECK-NEXT: s_quadmask_b64 s[0:1], s[0:1]
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: v_mov_b32_e32 v1, s1
+; CHECK-NEXT: s_cmp_eq_u32 s4, 0
+; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; CHECK-NEXT: global_store_dword v[0:1], v2, off
+; CHECK-NEXT: s_endpgm
+ %and = and i32 %val0, 1
+ %result = call i64 @llvm.amdgcn.s.quadmask.i64(i64 %val1) nounwind readnone
+ store i64 %result, ptr addrspace(1) %ptr
+ %cmp = icmp eq i32 %and, 0
+ %sel = select i1 %cmp, i32 1, i32 0
+ store i32 %sel, ptr addrspace(1) null, align 4
+ ret void
+}
|
Prior to this PR the assembly generated for scc_quadmask.ll was:
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM thanks, just an optional suggestion inline.
@@ -0,0 +1,57 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggest just adding tests to the end of llvm.amdgcn.quadmask.ll
instead of creating a new file.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jayfoad If I move the tests to llvm.amdgcn.quadmask.ll
I will need to modify the RUN lines since -global-isel=1
produces:
s_quadmask_b64 s[0:1], s[0:1]
s_and_b32 s4, s4, 1
v_mov_b32_e32 v0, s0
s_cmp_eq_u32 s4, 0
but -global-isel=0
produces:
s_and_b32 s4, s6, 1
s_quadmask_b64 s[0:1], s[0:1]
s_cmp_eq_u32 s4, 0
Is this okay?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, change them to -check-prefixes=GFX11,GFX11-GISEL
and -check-prefixes=GFX11,GFX11-SDAG
respectively.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tests moved to llvm.amdgcn.quadmask.ll
. RUN lines modified.
Signed-off-by: John Lu <John.Lu@amd.com>
Fix s_quadmask* instruction description so that it defines SCC. --------- Signed-off-by: John Lu <John.Lu@amd.com>
Fix s_quadmask* instruction description so that it defines SCC.