Skip to content

Commit

Permalink
[AMDGPU] Fix typos in performCtlz_CttzCombine()
Browse files Browse the repository at this point in the history
Fix two obvious errors in the code and also update the test check.
Also add one test to catch the failure.

Patch by Ruiling Song!

Differential Revision: https://reviews.llvm.org/D83280
  • Loading branch information
jayfoad committed Jul 14, 2020
1 parent 959eaa5 commit 5ab2e14
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 12 deletions.
16 changes: 8 additions & 8 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Expand Up @@ -3462,24 +3462,24 @@ SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue C
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
SDValue CmpLHS = Cond.getOperand(0);

unsigned Opc = isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 :
AMDGPUISD::FFBH_U32;

// select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
// select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x
if (CCOpcode == ISD::SETEQ &&
(isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
RHS.getOperand(0) == CmpLHS &&
isNegativeOne(LHS)) {
RHS.getOperand(0) == CmpLHS && isNegativeOne(LHS)) {
unsigned Opc =
isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
return getFFBX_U32(DAG, CmpLHS, SL, Opc);
}

// select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
// select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x
if (CCOpcode == ISD::SETNE &&
(isCtlzOpc(LHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
LHS.getOperand(0) == CmpLHS &&
isNegativeOne(RHS)) {
(isCtlzOpc(LHS.getOpcode()) || isCttzOpc(LHS.getOpcode())) &&
LHS.getOperand(0) == CmpLHS && isNegativeOne(RHS)) {
unsigned Opc =
isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;

return getFFBX_U32(DAG, CmpLHS, SL, Opc);
}

Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
Expand Up @@ -198,8 +198,8 @@ define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(i64 addrspace(1)* n
}

; FUNC-LABEL: {{^}}v_cttz_i32_sel_eq_neg1:
; SI: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL:v[0-9]+]]
; SI: v_cmp_ne_u32_e32 vcc, 0, [[VAL]]
; SI: v_ffbl_b32_e32 [[VAL:v[0-9]+]], v{{[0-9]+}}
; SI: buffer_store_dword [[VAL]],
; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW
; EG: FFBL_INT
Expand All @@ -213,8 +213,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
}

; FUNC-LABEL: {{^}}v_cttz_i32_sel_ne_neg1:
; SI: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL:v[0-9]+]]
; SI: v_cmp_ne_u32_e32 vcc, 0, [[VAL]]
; SI: v_ffbl_b32_e32 [[VAL:v[0-9]+]], v{{[0-9]+}}
; SI: buffer_store_dword [[VAL]],
; SI: s_endpgm
; EG: MEM_RAT_CACHELESS STORE_RAW
; EG: FFBL_INT
Expand Down
42 changes: 42 additions & 0 deletions llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
@@ -0,0 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s

declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
declare i32 @llvm.amdgcn.sffbh.i32(i32) nounwind readnone speculatable
define amdgpu_kernel void @select_constant_cttz(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
; GCN-LABEL: select_constant_cttz:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_load_dword s8, s[2:3], 0x0
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_lshr_b32 s0, 1, s8
; GCN-NEXT: s_ff1_i32_b32 s0, s0
; GCN-NEXT: s_mov_b32 s6, -1
; GCN-NEXT: v_mov_b32_e32 v0, s0
; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], s8, 0
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -1, s[2:3]
; GCN-NEXT: v_ffbh_i32_e32 v1, v0
; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0
; GCN-NEXT: v_sub_i32_e32 v0, vcc, 31, v1
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -1, s[0:1]
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GCN-NEXT: s_endpgm
%v = load i32, i32 addrspace(1)* %arrayidx, align 4
%sr = lshr i32 1, %v
%cmp = icmp ne i32 %v, 0
%cttz = call i32 @llvm.cttz.i32(i32 %sr, i1 true), !range !0
%sel = select i1 %cmp, i32 -1, i32 %cttz
%ffbh = call i32 @llvm.amdgcn.sffbh.i32(i32 %sel)
%sub = sub i32 31, %ffbh
%cmp2 = icmp eq i32 %sel, 0
%or = or i1 %cmp, %cmp2
%sel2 = select i1 %or, i32 -1, i32 %sub
store i32 %sel2, i32 addrspace(1)* %out
ret void
}

!0 = !{i32 0, i32 33}

0 comments on commit 5ab2e14

Please sign in to comment.