[AMDGPU] Add test to show s_cselect generation from uniform select #79384

choikwa · 2024-01-24T22:50:03Z

No description provided.

github-actions · 2024-01-24T22:50:22Z

Thank you for submitting a Pull Request (PR) to the LLVM Project!

This PR will be automatically labeled and the relevant teams will be
notified.

If you wish to, you can add reviewers by using the "Reviewers" section on this page.

If this is not working for you, it is probably because you do not have write
permissions for the repository. In which case you can instead tag reviewers by
name in a comment by using @ followed by their GitHub username.

If you have received no comments on your PR for a week, you can request a review
by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate
is once a week. Please remember that you are asking for valuable time from other developers.

If you have further questions, they may be answered by the LLVM GitHub User Guide.

You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums.

llvmbot · 2024-01-24T22:50:53Z

@llvm/pr-subscribers-backend-amdgpu

Author: choikwa (choikwa)

Changes

…SK_B32

Full diff: https://github.com/llvm/llvm-project/pull/79384.diff

1 Files Affected:

(added) llvm/test/CodeGen/AMDGPU/insert_extract_element.ll (+87)

diff --git a/llvm/test/CodeGen/AMDGPU/insert_extract_element.ll b/llvm/test/CodeGen/AMDGPU/insert_extract_element.ll
new file mode 100644
index 000000000000000..16f499f060ac5e5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert_extract_element.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
+target triple = "amdgcn-amd-amdhsa"
+
+define amdgpu_kernel void @_Z8Kernel3DI3APE11GaugeAPEArgEvT0_(i32 %inc.i.i, i32 %dr.037.i.i) #0 {
+; GFX90A-LABEL: _Z8Kernel3DI3APE11GaugeAPEArgEvT0_:
+; GFX90A:       ; %bb.0: ; %entry
+; GFX90A-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
+; GFX90A-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
+; GFX90A-NEXT:    s_add_u32 s0, s0, s15
+; GFX90A-NEXT:    s_addc_u32 s1, s1, 0
+; GFX90A-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; GFX90A-NEXT:    s_add_u32 s8, s6, 8
+; GFX90A-NEXT:    s_addc_u32 s9, s7, 0
+; GFX90A-NEXT:    s_load_dwordx2 s[34:35], s[6:7], 0x0
+; GFX90A-NEXT:    s_getpc_b64 s[6:7]
+; GFX90A-NEXT:    s_add_u32 s6, s6, _ZN3__XcviEv@gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s7, s7, _ZN3__XcviEv@gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v31, v0
+; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX90A-NEXT:    s_mov_b32 s4, 0
+; GFX90A-NEXT:    s_mov_b32 s5, s35
+; GFX90A-NEXT:    s_and_b64 vcc, exec, -1
+; GFX90A-NEXT:    s_mov_b32 s6, 0
+; GFX90A-NEXT:    s_mov_b32 s7, 0
+; GFX90A-NEXT:    s_mov_b32 s8, 0
+; GFX90A-NEXT:  .LBB0_1: ; %for.body.i.i
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 1
+; GFX90A-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[10:11], s[10:11], exec
+; GFX90A-NEXT:    s_cselect_b32 s9, s6, s4
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 2
+; GFX90A-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[10:11], s[10:11], exec
+; GFX90A-NEXT:    s_cselect_b32 s9, s7, s9
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 3
+; GFX90A-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[10:11], s[10:11], exec
+; GFX90A-NEXT:    s_cselect_b32 s9, s8, s9
+; GFX90A-NEXT:    s_or_b32 s9, s9, s34
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 1
+; GFX90A-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[12:13], s[10:11], exec
+; GFX90A-NEXT:    s_cselect_b32 s6, s9, s6
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 3
+; GFX90A-NEXT:    s_cselect_b64 s[12:13], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[14:15], s[12:13], exec
+; GFX90A-NEXT:    s_cselect_b32 s8, s9, s8
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 2
+; GFX90A-NEXT:    s_cselect_b64 s[14:15], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[16:17], s[14:15], exec
+; GFX90A-NEXT:    s_cselect_b32 s7, s9, s7
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 0
+; GFX90A-NEXT:    s_cselect_b32 s4, s9, s4
+; GFX90A-NEXT:    s_or_b64 s[10:11], s[14:15], s[10:11]
+; GFX90A-NEXT:    s_or_b64 s[10:11], s[12:13], s[10:11]
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[10:11]
+; GFX90A-NEXT:    s_mov_b64 vcc, vcc
+; GFX90A-NEXT:    s_cbranch_vccnz .LBB0_1
+; GFX90A-NEXT:  ; %bb.2: ; %DummyReturnBlock
+; GFX90A-NEXT:    s_endpgm
+entry:
+  %call.i = call i32 @_ZN3__XcviEv()
+  %0 = insertelement <4 x i32> zeroinitializer, i32 %call.i, i64 0
+  br label %for.body.i.i
+
+for.body.i.i:                                     ; preds = %for.body.i.i, %entry
+  %x.sroa.0.036.i.i = phi <4 x i32> [ %0, %entry ], [ %4, %for.body.i.i ]
+  %X.sroa.0.035.i.i = phi <4 x i32> [ zeroinitializer, %entry ], [ %2, %for.body.i.i ]
+  %idxprom.i.i = zext i32 %dr.037.i.i to i64
+  %1 = extractelement <4 x i32> %X.sroa.0.035.i.i, i64 %idxprom.i.i
+  %add.i.i = or i32 %1, %inc.i.i
+  %2 = insertelement <4 x i32> %X.sroa.0.035.i.i, i32 %add.i.i, i64 %idxprom.i.i
+  %3 = extractelement <4 x i32> %x.sroa.0.036.i.i, i64 %idxprom.i.i
+  %4 = insertelement <4 x i32> %x.sroa.0.036.i.i, i32 %3, i64 0
+  br label %for.body.i.i
+}
+
+declare i32 @_ZN3__XcviEv()
+
+attributes #0 = { "target-cpu"="gfx90a" }

choikwa · 2024-01-24T22:55:04Z

@bcahoon @jrbyrnes Could you review this change please? Thank you

jrbyrnes · 2024-01-25T00:00:29Z

Can you fix the commit title split, and also prepend the commit title with [AMDGPU]?

llvm/test/CodeGen/AMDGPU/insert_extract_element.ll

choikwa · 2024-01-25T03:28:03Z

Addressed comments and rebased with 816f14d

jayfoad

The test looks OK, but what is it testing, and what does it have to do with V_CNDMASK_B32?

llvm/test/CodeGen/AMDGPU/insert_extract_element.ll

choikwa · 2024-01-25T08:36:38Z

The test looks OK, but what is it testing, and what does it have to do with V_CNDMASK_B32?

The test was reduced from a QUDA application with which rocm 5.5.1 was generating V_CNDMASK with an illegal operand type. This was found to be from a partial commit in attempt to reland another change. LLVM trunk no longer generates the instruction but I still thought it would be a good test to have.

arsenm · 2024-01-25T15:27:44Z

The test looks OK, but what is it testing, and what does it have to do with V_CNDMASK_B32?

The test was reduced from a QUDA application with which rocm 5.5.1 was generating V_CNDMASK with an illegal operand type. This was found to be from a partial commit in attempt to reland another change. LLVM trunk no longer generates the instruction but I still thought it would be a good test to have.

Do you know what commit fixed it?

choikwa · 2024-01-25T15:29:32Z

fixed by fbdea5a

choikwa · 2024-01-26T01:24:43Z

Changed title and testcase to uniform-select.ll per @jrbyrnes' suggestion

choikwa · 2024-01-29T15:20:23Z

latest update renames numbered variables

jrbyrnes

LGTM

…lvm#79384) Change-Id: I7c55803e4284a5837e8bb80a54b2a72e97d934a1

llvmbot added the backend:AMDGPU label Jan 24, 2024

jrbyrnes self-requested a review January 24, 2024 22:59

jrbyrnes reviewed Jan 25, 2024

View reviewed changes

choikwa changed the title ~~Add insert/extract test to test the legal form of instruction V_CNDMA…~~ [AMDGPU] Add insert/extract test to test instruction V_CNDMASK_B32 Jan 25, 2024

choikwa force-pushed the main branch from c9aa6b1 to 816f14d Compare January 25, 2024 03:25

jayfoad reviewed Jan 25, 2024

View reviewed changes

llvm/test/CodeGen/AMDGPU/insert_extract_element.ll Outdated Show resolved Hide resolved

arsenm reviewed Jan 25, 2024

View reviewed changes

llvm/test/CodeGen/AMDGPU/insert_extract_element.ll Outdated Show resolved Hide resolved

llvm/test/CodeGen/AMDGPU/insert_extract_element.ll Outdated Show resolved Hide resolved

llvm/test/CodeGen/AMDGPU/insert_extract_element.ll Outdated Show resolved Hide resolved

choikwa force-pushed the main branch from 816f14d to e68d12a Compare January 25, 2024 10:10

choikwa changed the title ~~[AMDGPU] Add insert/extract test to test instruction V_CNDMASK_B32~~ [AMDGPU] Add insert/extract element test for v4i32 Jan 25, 2024

choikwa force-pushed the main branch from e68d12a to 714cfc3 Compare January 25, 2024 12:53

choikwa force-pushed the main branch 3 times, most recently from 9660c09 to 68999e6 Compare January 26, 2024 01:23

choikwa changed the title ~~[AMDGPU] Add insert/extract element test for v4i32~~ [AMDGPU] Add test to show s_cselect generation from uniform select Jan 26, 2024

[AMDGPU] Add test to show s_cselect generation from uniform select

bc25016

choikwa force-pushed the main branch from 68999e6 to bc25016 Compare January 29, 2024 15:12

jrbyrnes approved these changes Jan 29, 2024

View reviewed changes

jrbyrnes merged commit 0b77b19 into llvm:main Feb 9, 2024
3 of 4 checks passed

searlmc1 pushed a commit to ROCm/llvm-project that referenced this pull request May 23, 2024

[AMDGPU] Add test to show s_cselect generation from uniform select (l…

d758eaf

…lvm#79384) Change-Id: I7c55803e4284a5837e8bb80a54b2a72e97d934a1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AMDGPU] Add test to show s_cselect generation from uniform select #79384

[AMDGPU] Add test to show s_cselect generation from uniform select #79384

choikwa commented Jan 24, 2024 •

edited

github-actions bot commented Jan 24, 2024

llvmbot commented Jan 24, 2024

choikwa commented Jan 24, 2024

jrbyrnes commented Jan 25, 2024

choikwa commented Jan 25, 2024

jayfoad left a comment

choikwa commented Jan 25, 2024

arsenm commented Jan 25, 2024

choikwa commented Jan 25, 2024

choikwa commented Jan 26, 2024

choikwa commented Jan 29, 2024

jrbyrnes left a comment

[AMDGPU] Add test to show s_cselect generation from uniform select #79384

[AMDGPU] Add test to show s_cselect generation from uniform select #79384

Conversation

choikwa commented Jan 24, 2024 • edited

github-actions bot commented Jan 24, 2024

llvmbot commented Jan 24, 2024

choikwa commented Jan 24, 2024

jrbyrnes commented Jan 25, 2024

choikwa commented Jan 25, 2024

jayfoad left a comment

Choose a reason for hiding this comment

choikwa commented Jan 25, 2024

arsenm commented Jan 25, 2024

choikwa commented Jan 25, 2024

choikwa commented Jan 26, 2024

choikwa commented Jan 29, 2024

jrbyrnes left a comment

Choose a reason for hiding this comment

choikwa commented Jan 24, 2024 •

edited