-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Add test to show s_cselect generation from uniform select #79384
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write If you have received no comments on your PR for a week, you can request a review If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-backend-amdgpu Author: choikwa (choikwa) Changes…SK_B32 Full diff: https://github.com/llvm/llvm-project/pull/79384.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/insert_extract_element.ll b/llvm/test/CodeGen/AMDGPU/insert_extract_element.ll
new file mode 100644
index 000000000000000..16f499f060ac5e5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert_extract_element.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
+target triple = "amdgcn-amd-amdhsa"
+
+define amdgpu_kernel void @_Z8Kernel3DI3APE11GaugeAPEArgEvT0_(i32 %inc.i.i, i32 %dr.037.i.i) #0 {
+; GFX90A-LABEL: _Z8Kernel3DI3APE11GaugeAPEArgEvT0_:
+; GFX90A: ; %bb.0: ; %entry
+; GFX90A-NEXT: s_add_u32 flat_scratch_lo, s10, s15
+; GFX90A-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
+; GFX90A-NEXT: s_add_u32 s0, s0, s15
+; GFX90A-NEXT: s_addc_u32 s1, s1, 0
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX90A-NEXT: s_add_u32 s8, s6, 8
+; GFX90A-NEXT: s_addc_u32 s9, s7, 0
+; GFX90A-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
+; GFX90A-NEXT: s_getpc_b64 s[6:7]
+; GFX90A-NEXT: s_add_u32 s6, s6, _ZN3__XcviEv@gotpcrel32@lo+4
+; GFX90A-NEXT: s_addc_u32 s7, s7, _ZN3__XcviEv@gotpcrel32@hi+12
+; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX90A-NEXT: v_mov_b32_e32 v31, v0
+; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX90A-NEXT: s_mov_b32 s4, 0
+; GFX90A-NEXT: s_mov_b32 s5, s35
+; GFX90A-NEXT: s_and_b64 vcc, exec, -1
+; GFX90A-NEXT: s_mov_b32 s6, 0
+; GFX90A-NEXT: s_mov_b32 s7, 0
+; GFX90A-NEXT: s_mov_b32 s8, 0
+; GFX90A-NEXT: .LBB0_1: ; %for.body.i.i
+; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT: s_cmp_eq_u32 s5, 1
+; GFX90A-NEXT: s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT: s_and_b64 s[10:11], s[10:11], exec
+; GFX90A-NEXT: s_cselect_b32 s9, s6, s4
+; GFX90A-NEXT: s_cmp_eq_u32 s5, 2
+; GFX90A-NEXT: s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT: s_and_b64 s[10:11], s[10:11], exec
+; GFX90A-NEXT: s_cselect_b32 s9, s7, s9
+; GFX90A-NEXT: s_cmp_eq_u32 s5, 3
+; GFX90A-NEXT: s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT: s_and_b64 s[10:11], s[10:11], exec
+; GFX90A-NEXT: s_cselect_b32 s9, s8, s9
+; GFX90A-NEXT: s_or_b32 s9, s9, s34
+; GFX90A-NEXT: s_cmp_eq_u32 s5, 1
+; GFX90A-NEXT: s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT: s_and_b64 s[12:13], s[10:11], exec
+; GFX90A-NEXT: s_cselect_b32 s6, s9, s6
+; GFX90A-NEXT: s_cmp_eq_u32 s5, 3
+; GFX90A-NEXT: s_cselect_b64 s[12:13], -1, 0
+; GFX90A-NEXT: s_and_b64 s[14:15], s[12:13], exec
+; GFX90A-NEXT: s_cselect_b32 s8, s9, s8
+; GFX90A-NEXT: s_cmp_eq_u32 s5, 2
+; GFX90A-NEXT: s_cselect_b64 s[14:15], -1, 0
+; GFX90A-NEXT: s_and_b64 s[16:17], s[14:15], exec
+; GFX90A-NEXT: s_cselect_b32 s7, s9, s7
+; GFX90A-NEXT: s_cmp_eq_u32 s5, 0
+; GFX90A-NEXT: s_cselect_b32 s4, s9, s4
+; GFX90A-NEXT: s_or_b64 s[10:11], s[14:15], s[10:11]
+; GFX90A-NEXT: s_or_b64 s[10:11], s[12:13], s[10:11]
+; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[10:11]
+; GFX90A-NEXT: s_mov_b64 vcc, vcc
+; GFX90A-NEXT: s_cbranch_vccnz .LBB0_1
+; GFX90A-NEXT: ; %bb.2: ; %DummyReturnBlock
+; GFX90A-NEXT: s_endpgm
+entry:
+ %call.i = call i32 @_ZN3__XcviEv()
+ %0 = insertelement <4 x i32> zeroinitializer, i32 %call.i, i64 0
+ br label %for.body.i.i
+
+for.body.i.i: ; preds = %for.body.i.i, %entry
+ %x.sroa.0.036.i.i = phi <4 x i32> [ %0, %entry ], [ %4, %for.body.i.i ]
+ %X.sroa.0.035.i.i = phi <4 x i32> [ zeroinitializer, %entry ], [ %2, %for.body.i.i ]
+ %idxprom.i.i = zext i32 %dr.037.i.i to i64
+ %1 = extractelement <4 x i32> %X.sroa.0.035.i.i, i64 %idxprom.i.i
+ %add.i.i = or i32 %1, %inc.i.i
+ %2 = insertelement <4 x i32> %X.sroa.0.035.i.i, i32 %add.i.i, i64 %idxprom.i.i
+ %3 = extractelement <4 x i32> %x.sroa.0.036.i.i, i64 %idxprom.i.i
+ %4 = insertelement <4 x i32> %x.sroa.0.036.i.i, i32 %3, i64 0
+ br label %for.body.i.i
+}
+
+declare i32 @_ZN3__XcviEv()
+
+attributes #0 = { "target-cpu"="gfx90a" }
|
Can you fix the commit title split, and also prepend the commit title with [AMDGPU]? |
Addressed comments and rebased with 816f14d |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test looks OK, but what is it testing, and what does it have to do with V_CNDMASK_B32?
The test was reduced from a QUDA application with which rocm 5.5.1 was generating V_CNDMASK with an illegal operand type. This was found to be from a partial commit in attempt to reland another change. LLVM trunk no longer generates the instruction but I still thought it would be a good test to have. |
Do you know what commit fixed it? |
fixed by fbdea5a |
9660c09
to
68999e6
Compare
Changed title and testcase to uniform-select.ll per @jrbyrnes' suggestion |
latest update renames numbered variables |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…lvm#79384) Change-Id: I7c55803e4284a5837e8bb80a54b2a72e97d934a1
No description provided.