Skip to content

Commit

Permalink
Allow binop C1, (select cc, CF, CT) -> select folding
Browse files Browse the repository at this point in the history
Previously this folding was done only if select is a first operand.
However, for non-commutative operations constant may go before
select.

Differential Revision: https://reviews.llvm.org/D48223

llvm-svn: 335167
  • Loading branch information
rampitec committed Jun 20, 2018
1 parent 7d796ec commit 20279dc
Show file tree
Hide file tree
Showing 4 changed files with 353 additions and 189 deletions.
38 changes: 28 additions & 10 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1877,16 +1877,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
"Unexpected binary operator");

// Bail out if any constants are opaque because we can't constant fold those.
SDValue C1 = BO->getOperand(1);
if (!isConstantOrConstantVector(C1, true) &&
!isConstantFPBuildVectorOrConstantFP(C1))
return SDValue();

// Don't do this unless the old select is going away. We want to eliminate the
// binary operator, not replace a binop with a select.
// TODO: Handle ISD::SELECT_CC.
unsigned SelOpNo = 0;
SDValue Sel = BO->getOperand(0);
if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
SelOpNo = 1;
Sel = BO->getOperand(1);
}

if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
return SDValue();

Expand All @@ -1900,18 +1900,36 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
!isConstantFPBuildVectorOrConstantFP(CF))
return SDValue();

// Bail out if any constants are opaque because we can't constant fold those.
SDValue CBO = BO->getOperand(SelOpNo ^ 1);
if (!isConstantOrConstantVector(CBO, true) &&
!isConstantFPBuildVectorOrConstantFP(CBO))
return SDValue();

EVT VT = Sel.getValueType();

// In case of shift value and shift amount may have different VT. For instance
// on x86 shift amount is i8 regardles of LHS type. Bail out if we have
// swapped operands and value types do not match. NB: x86 is fine if operands
// are not swapped with shift amount VT being not bigger than shifted value.
// TODO: that is possible to check for a shift operation, correct VTs and
// still perform optimization on x86 if needed.
if (SelOpNo && VT != CBO.getValueType())
return SDValue();

// We have a select-of-constants followed by a binary operator with a
// constant. Eliminate the binop by pulling the constant math into the select.
// Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
EVT VT = Sel.getValueType();
// Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
SDLoc DL(Sel);
SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
: DAG.getNode(BinOpcode, DL, VT, CT, CBO);
if (!NewCT.isUndef() &&
!isConstantOrConstantVector(NewCT, true) &&
!isConstantFPBuildVectorOrConstantFP(NewCT))
return SDValue();

SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
: DAG.getNode(BinOpcode, DL, VT, CF, CBO);
if (!NewCF.isUndef() &&
!isConstantOrConstantVector(NewCF, true) &&
!isConstantFPBuildVectorOrConstantFP(NewCF))
Expand Down
182 changes: 182 additions & 0 deletions llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s

; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants:
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9,
define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, i32 -4, i32 3
%bo = sub i32 5, %sel
store i32 %bo, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16:
; TODO: shrink i16 constant. This is correct but suboptimal.
; GCN: v_mov_b32_e32 [[T:v[0-9]+]], 0xffff0009
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[T]],
define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16(i16 addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, i16 -4, i16 3
%bo = sub i16 5, %sel
store i16 %bo, i16 addrspace(1)* %p, align 2
ret void
}

; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16_neg:
; GCN: v_mov_b32_e32 [[F:v[0-9]+]], 0xfffff449
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, [[F]], -3,
define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16_neg(i16 addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, i16 4, i16 3000
%bo = sub i16 1, %sel
store i16 %bo, i16 addrspace(1)* %p, align 2
ret void
}

; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v2i16:
; GCN-DAG: v_mov_b32_e32 [[F:v[0-9]+]], 0x60002
; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x50009
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, [[F]], [[T]],
define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v2i16(<2 x i16> addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, <2 x i16> <i16 -4, i16 2>, <2 x i16> <i16 3, i16 1>
%bo = sub <2 x i16> <i16 5, i16 7>, %sel
store <2 x i16> %bo, <2 x i16> addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v4i32:
; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9,
; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 6, 5,
; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 10, 6,
; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 14, 7,
define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v4i32(<4 x i32> addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, <4 x i32> <i32 -4, i32 2, i32 3, i32 4>, <4 x i32> <i32 3, i32 1, i32 -1, i32 -3>
%bo = sub <4 x i32> <i32 5, i32 7, i32 9, i32 11>, %sel
store <4 x i32> %bo, <4 x i32> addrspace(1)* %p, align 32
ret void
}

; GCN-LABEL: {{^}}sdiv_constant_sel_constants:
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 5, 0,
define amdgpu_kernel void @sdiv_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, i32 121, i32 23
%bo = sdiv i32 120, %sel
store i32 %bo, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}udiv_constant_sel_constants:
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 5, 0,
define amdgpu_kernel void @udiv_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, i32 -4, i32 23
%bo = udiv i32 120, %sel
store i32 %bo, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}srem_constant_sel_constants:
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 3, 33,
define amdgpu_kernel void @srem_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, i32 34, i32 15
%bo = srem i32 33, %sel
store i32 %bo, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}urem_constant_sel_constants:
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 3, 33,
define amdgpu_kernel void @urem_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, i32 34, i32 15
%bo = urem i32 33, %sel
store i32 %bo, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}shl_constant_sel_constants:
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 8, 4,
define amdgpu_kernel void @shl_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, i32 2, i32 3
%bo = shl i32 1, %sel
store i32 %bo, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}lshr_constant_sel_constants:
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 8, 16,
define amdgpu_kernel void @lshr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, i32 2, i32 3
%bo = lshr i32 64, %sel
store i32 %bo, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}ashr_constant_sel_constants:
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 16, 32,
define amdgpu_kernel void @ashr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, i32 2, i32 3
%bo = ashr i32 128, %sel
store i32 %bo, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}fsub_constant_sel_constants:
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, -4.0, 1.0,
define amdgpu_kernel void @fsub_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, float -2.0, float 3.0
%bo = fsub float -1.0, %sel
store float %bo, float addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}fsub_constant_sel_constants_f16:
; TODO: it shall be possible to fold constants with OpSel
; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x3c00
; GCN-DAG: v_mov_b32_e32 [[F:v[0-9]+]], 0xc400
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, [[F]], [[T]],
define amdgpu_kernel void @fsub_constant_sel_constants_f16(half addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, half -2.0, half 3.0
%bo = fsub half -1.0, %sel
store half %bo, half addrspace(1)* %p, align 2
ret void
}

; GCN-LABEL: {{^}}fsub_constant_sel_constants_v2f16:
; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x45003c00
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, -2.0, [[T]],
define amdgpu_kernel void @fsub_constant_sel_constants_v2f16(<2 x half> addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, <2 x half> <half -2.0, half -3.0>, <2 x half> <half -1.0, half 4.0>
%bo = fsub <2 x half> <half -1.0, half 2.0>, %sel
store <2 x half> %bo, <2 x half> addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}fsub_constant_sel_constants_v4f32:
; GCN-DAG: v_mov_b32_e32 [[T2:v[0-9]+]], 0x40a00000
; GCN-DAG: v_mov_b32_e32 [[T3:v[0-9]+]], 0x41100000
; GCN-DAG: v_mov_b32_e32 [[T4:v[0-9]+]], 0x41500000
; GCN-DAG: v_mov_b32_e32 [[F4:v[0-9]+]], 0x40c00000
; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0,
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, [[T2]],
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[T3]],
; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[F4]], [[T4]],
define amdgpu_kernel void @fsub_constant_sel_constants_v4f32(<4 x float> addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, <4 x float> <float -2.0, float -3.0, float -4.0, float -5.0>, <4 x float> <float -1.0, float 0.0, float 1.0, float 2.0>
%bo = fsub <4 x float> <float -1.0, float 2.0, float 5.0, float 8.0>, %sel
store <4 x float> %bo, <4 x float> addrspace(1)* %p, align 32
ret void
}

; GCN-LABEL: {{^}}fdiv_constant_sel_constants:
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 4.0, -2.0,
define amdgpu_kernel void @fdiv_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, float -4.0, float 2.0
%bo = fdiv float 8.0, %sel
store float %bo, float addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}frem_constant_sel_constants:
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0,
define amdgpu_kernel void @frem_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
%sel = select i1 %cond, float -4.0, float 3.0
%bo = frem float 5.0, %sel
store float %bo, float addrspace(1)* %p, align 4
ret void
}
Loading

0 comments on commit 20279dc

Please sign in to comment.