Allow binop C1, (select cc, CF, CT) -> select folding

Previously this folding was done only if select is a first operand. However, for non-commutative operations constant may go before select. Differential Revision: https://reviews.llvm.org/D48223 llvm-svn: 335167
llvm · Jun 20, 2018 · 20279dc · 20279dc
1 parent 7d796ec
commit 20279dc
Show file tree

Hide file tree

Showing 4 changed files with 353 additions and 189 deletions.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1877,16 +1877,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
          "Unexpected binary operator");
 
-  // Bail out if any constants are opaque because we can't constant fold those.
-  SDValue C1 = BO->getOperand(1);
-  if (!isConstantOrConstantVector(C1, true) &&
-      !isConstantFPBuildVectorOrConstantFP(C1))
-    return SDValue();
-
   // Don't do this unless the old select is going away. We want to eliminate the
   // binary operator, not replace a binop with a select.
   // TODO: Handle ISD::SELECT_CC.
+  unsigned SelOpNo = 0;
   SDValue Sel = BO->getOperand(0);
+  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
+    SelOpNo = 1;
+    Sel = BO->getOperand(1);
+  }
+
   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
     return SDValue();
 
@@ -1900,18 +1900,36 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
       !isConstantFPBuildVectorOrConstantFP(CF))
     return SDValue();
 
+  // Bail out if any constants are opaque because we can't constant fold those.
+  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
+  if (!isConstantOrConstantVector(CBO, true) &&
+      !isConstantFPBuildVectorOrConstantFP(CBO))
+    return SDValue();
+
+  EVT VT = Sel.getValueType();
+
+  // In case of shift value and shift amount may have different VT. For instance
+  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
+  // swapped operands and value types do not match. NB: x86 is fine if operands
+  // are not swapped with shift amount VT being not bigger than shifted value.
+  // TODO: that is possible to check for a shift operation, correct VTs and
+  // still perform optimization on x86 if needed.
+  if (SelOpNo && VT != CBO.getValueType())
+    return SDValue();
+
   // We have a select-of-constants followed by a binary operator with a
   // constant. Eliminate the binop by pulling the constant math into the select.
-  // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
-  EVT VT = Sel.getValueType();
+  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
   SDLoc DL(Sel);
-  SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
+  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
+                          : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
   if (!NewCT.isUndef() &&
       !isConstantOrConstantVector(NewCT, true) &&
       !isConstantFPBuildVectorOrConstantFP(NewCT))
     return SDValue();
 
-  SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
+  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
+                          : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
   if (!NewCF.isUndef() &&
       !isConstantOrConstantVector(NewCF, true) &&
       !isConstantFPBuildVectorOrConstantFP(NewCF))

diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
@@ -0,0 +1,182 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants:
+; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9,
+define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, i32 -4, i32 3
+  %bo = sub i32 5, %sel
+  store i32 %bo, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16:
+; TODO: shrink i16 constant. This is correct but suboptimal.
+; GCN: v_mov_b32_e32 [[T:v[0-9]+]], 0xffff0009
+; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[T]],
+define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16(i16 addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, i16 -4, i16 3
+  %bo = sub i16 5, %sel
+  store i16 %bo, i16 addrspace(1)* %p, align 2
+  ret void
+}
+
+; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16_neg:
+; GCN: v_mov_b32_e32 [[F:v[0-9]+]], 0xfffff449
+; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, [[F]], -3,
+define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16_neg(i16 addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, i16 4, i16 3000
+  %bo = sub i16 1, %sel
+  store i16 %bo, i16 addrspace(1)* %p, align 2
+  ret void
+}
+
+; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v2i16:
+; GCN-DAG: v_mov_b32_e32 [[F:v[0-9]+]], 0x60002
+; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x50009
+; GCN:     v_cndmask_b32_e32 v{{[0-9]+}}, [[F]], [[T]],
+define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v2i16(<2 x i16> addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, <2 x i16> <i16 -4, i16 2>, <2 x i16> <i16 3, i16 1>
+  %bo = sub <2 x i16> <i16 5, i16 7>, %sel
+  store <2 x i16> %bo, <2 x i16> addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v4i32:
+; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9,
+; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 6, 5,
+; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 10, 6,
+; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 14, 7,
+define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v4i32(<4 x i32> addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, <4 x i32> <i32 -4, i32 2, i32 3, i32 4>, <4 x i32> <i32 3, i32 1, i32 -1, i32 -3>
+  %bo = sub <4 x i32> <i32 5, i32 7, i32 9, i32 11>, %sel
+  store <4 x i32> %bo, <4 x i32> addrspace(1)* %p, align 32
+  ret void
+}
+
+; GCN-LABEL: {{^}}sdiv_constant_sel_constants:
+; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 5, 0,
+define amdgpu_kernel void @sdiv_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, i32 121, i32 23
+  %bo = sdiv i32 120, %sel
+  store i32 %bo, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}udiv_constant_sel_constants:
+; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 5, 0,
+define amdgpu_kernel void @udiv_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, i32 -4, i32 23
+  %bo = udiv i32 120, %sel
+  store i32 %bo, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}srem_constant_sel_constants:
+; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 3, 33,
+define amdgpu_kernel void @srem_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, i32 34, i32 15
+  %bo = srem i32 33, %sel
+  store i32 %bo, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}urem_constant_sel_constants:
+; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 3, 33,
+define amdgpu_kernel void @urem_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, i32 34, i32 15
+  %bo = urem i32 33, %sel
+  store i32 %bo, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}shl_constant_sel_constants:
+; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 8, 4,
+define amdgpu_kernel void @shl_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, i32 2, i32 3
+  %bo = shl i32 1, %sel
+  store i32 %bo, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}lshr_constant_sel_constants:
+; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 8, 16,
+define amdgpu_kernel void @lshr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, i32 2, i32 3
+  %bo = lshr i32 64, %sel
+  store i32 %bo, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}ashr_constant_sel_constants:
+; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 16, 32,
+define amdgpu_kernel void @ashr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, i32 2, i32 3
+  %bo = ashr i32 128, %sel
+  store i32 %bo, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}fsub_constant_sel_constants:
+; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, -4.0, 1.0,
+define amdgpu_kernel void @fsub_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, float -2.0, float 3.0
+  %bo = fsub float -1.0, %sel
+  store float %bo, float addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}fsub_constant_sel_constants_f16:
+; TODO: it shall be possible to fold constants with OpSel
+; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x3c00
+; GCN-DAG: v_mov_b32_e32 [[F:v[0-9]+]], 0xc400
+; GCN:     v_cndmask_b32_e32 v{{[0-9]+}}, [[F]], [[T]],
+define amdgpu_kernel void @fsub_constant_sel_constants_f16(half addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, half -2.0, half 3.0
+  %bo = fsub half -1.0, %sel
+  store half %bo, half addrspace(1)* %p, align 2
+  ret void
+}
+
+; GCN-LABEL: {{^}}fsub_constant_sel_constants_v2f16:
+; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x45003c00
+; GCN:     v_cndmask_b32_e32 v{{[0-9]+}}, -2.0, [[T]],
+define amdgpu_kernel void @fsub_constant_sel_constants_v2f16(<2 x half> addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, <2 x half> <half -2.0, half -3.0>, <2 x half> <half -1.0, half 4.0>
+  %bo = fsub <2 x half> <half -1.0, half 2.0>, %sel
+  store <2 x half> %bo, <2 x half> addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}fsub_constant_sel_constants_v4f32:
+; GCN-DAG: v_mov_b32_e32 [[T2:v[0-9]+]], 0x40a00000
+; GCN-DAG: v_mov_b32_e32 [[T3:v[0-9]+]], 0x41100000
+; GCN-DAG: v_mov_b32_e32 [[T4:v[0-9]+]], 0x41500000
+; GCN-DAG: v_mov_b32_e32 [[F4:v[0-9]+]], 0x40c00000
+; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0,
+; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, [[T2]],
+; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[T3]],
+; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[F4]], [[T4]],
+define amdgpu_kernel void @fsub_constant_sel_constants_v4f32(<4 x float> addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, <4 x float> <float -2.0, float -3.0, float -4.0, float -5.0>, <4 x float> <float -1.0, float 0.0, float 1.0, float 2.0>
+  %bo = fsub <4 x float> <float -1.0, float 2.0, float 5.0, float 8.0>, %sel
+  store <4 x float> %bo, <4 x float> addrspace(1)* %p, align 32
+  ret void
+}
+
+; GCN-LABEL: {{^}}fdiv_constant_sel_constants:
+; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 4.0, -2.0,
+define amdgpu_kernel void @fdiv_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, float -4.0, float 2.0
+  %bo = fdiv float 8.0, %sel
+  store float %bo, float addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}frem_constant_sel_constants:
+; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0,
+define amdgpu_kernel void @frem_constant_sel_constants(float addrspace(1)* %p, i1 %cond) {
+  %sel = select i1 %cond, float -4.0, float 3.0
+  %bo = frem float 5.0, %sel
+  store float %bo, float addrspace(1)* %p, align 4
+  ret void
+}