Skip to content

Commit

Permalink
DAG combine "and|or (select c, -1, 0), x" -> "select c, x, 0|-1"
Browse files Browse the repository at this point in the history
Allowed folding for "and/or" binops with non-constant operand if
arguments of select are 0/-1 values.

Normally this code with "and" opcode does not get to a DAG combiner
and simplified yet in the InstCombine. However AMDGPU produces it
during lowering and InstCombine has no chance to optimize it out.

In turn the same pattern with "or" opcode can reach DAG.

Differential Revision: https://reviews.llvm.org/D48301

llvm-svn: 335250
  • Loading branch information
rampitec committed Jun 21, 2018
1 parent 21a2973 commit 22ee191
Show file tree
Hide file tree
Showing 4 changed files with 195 additions and 40 deletions.
17 changes: 14 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -1901,8 +1901,19 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
return SDValue();

// Bail out if any constants are opaque because we can't constant fold those.
// The exception is "and" and "or" with either 0 or -1 in which case we can
// propagate non constant operands into select. I.e.:
// and (select Cond, 0, -1), X --> select Cond, 0, X
// or X, (select Cond, -1, 0) --> select Cond, -1, X
bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
(isNullConstantOrNullSplatConstant(CT) ||
isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
(isNullConstantOrNullSplatConstant(CF) ||
isAllOnesConstantOrAllOnesSplatConstant(CF));

SDValue CBO = BO->getOperand(SelOpNo ^ 1);
if (!isConstantOrConstantVector(CBO, true) &&
if (!CanFoldNonConst &&
!isConstantOrConstantVector(CBO, true) &&
!isConstantFPBuildVectorOrConstantFP(CBO))
return SDValue();

Expand All @@ -1923,14 +1934,14 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
SDLoc DL(Sel);
SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
: DAG.getNode(BinOpcode, DL, VT, CT, CBO);
if (!NewCT.isUndef() &&
if (!CanFoldNonConst && !NewCT.isUndef() &&
!isConstantOrConstantVector(NewCT, true) &&
!isConstantFPBuildVectorOrConstantFP(NewCT))
return SDValue();

SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
: DAG.getNode(BinOpcode, DL, VT, CF, CBO);
if (!NewCF.isUndef() &&
if (!CanFoldNonConst && !NewCF.isUndef() &&
!isConstantOrConstantVector(NewCF, true) &&
!isConstantFPBuildVectorOrConstantFP(NewCF))
return SDValue();
Expand Down
102 changes: 102 additions & 0 deletions llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
@@ -1,5 +1,107 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s

; GCN-LABEL: {{^}}select_and1:
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
; GCN-NOT: v_and_b32
; GCN: store_dword v[{{[0-9:]+}}], [[SEL]],
define amdgpu_kernel void @select_and1(i32 addrspace(1)* %p, i32 %x, i32 %y) {
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 0, i32 -1
%a = and i32 %y, %s
store i32 %a, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}select_and2:
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
; GCN-NOT: v_and_b32
; GCN: store_dword v[{{[0-9:]+}}], [[SEL]],
define amdgpu_kernel void @select_and2(i32 addrspace(1)* %p, i32 %x, i32 %y) {
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 0, i32 -1
%a = and i32 %s, %y
store i32 %a, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}select_and3:
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
; GCN-NOT: v_and_b32
; GCN: store_dword v[{{[0-9:]+}}], [[SEL]],
define amdgpu_kernel void @select_and3(i32 addrspace(1)* %p, i32 %x, i32 %y) {
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 -1, i32 0
%a = and i32 %y, %s
store i32 %a, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}select_and_v4:
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
; GCN-NOT: v_and_b32
; GCN: store_dword
define amdgpu_kernel void @select_and_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) {
%c = icmp slt i32 %x, 11
%s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
%a = and <4 x i32> %s, %y
store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32
ret void
}

; GCN-LABEL: {{^}}select_or1:
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
; GCN-NOT: v_or_b32
; GCN: store_dword v[{{[0-9:]+}}], [[SEL]],
define amdgpu_kernel void @select_or1(i32 addrspace(1)* %p, i32 %x, i32 %y) {
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 0, i32 -1
%a = or i32 %y, %s
store i32 %a, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}select_or2:
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
; GCN-NOT: v_or_b32
; GCN: store_dword v[{{[0-9:]+}}], [[SEL]],
define amdgpu_kernel void @select_or2(i32 addrspace(1)* %p, i32 %x, i32 %y) {
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 0, i32 -1
%a = or i32 %s, %y
store i32 %a, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}select_or3:
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
; GCN-NOT: v_or_b32
; GCN: store_dword v[{{[0-9:]+}}], [[SEL]],
define amdgpu_kernel void @select_or3(i32 addrspace(1)* %p, i32 %x, i32 %y) {
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 -1, i32 0
%a = or i32 %y, %s
store i32 %a, i32 addrspace(1)* %p, align 4
ret void
}

; GCN-LABEL: {{^}}select_or_v4:
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
; GCN-NOT: v_or_b32
; GCN: store_dword
define amdgpu_kernel void @select_or_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) {
%c = icmp slt i32 %x, 11
%s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
%a = or <4 x i32> %s, %y
store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32
ret void
}

; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants:
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9,
define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
Expand Down
26 changes: 9 additions & 17 deletions llvm/test/CodeGen/AMDGPU/udivrem.ll
Expand Up @@ -31,25 +31,25 @@
; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]]
; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]]
; SI-DAG: v_sub_{{[iu]}}32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]]
; SI: v_cndmask_b32_e64
; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
; SI: v_cmp_eq_u32_e64 [[CC1:s\[[0-9:]+\]]], 0, [[RCP_HI]]
; SI: v_cndmask_b32_e64 [[CND1:v[0-9]+]], [[RCP_LO]], [[NEG_RCP_LO]], [[CC1]]
; SI: v_mul_hi_u32 [[E:v[0-9]+]], [[CND1]], [[RCP]]
; SI-DAG: v_add_{{[iu]}}32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]]
; SI-DAG: v_subrev_{{[iu]}}32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]]
; SI: v_cndmask_b32_e64
; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]]
; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]]
; SI: v_cndmask_b32_e64 [[CND2:v[0-9]+]], [[RCP_S_E]], [[RCP_A_E]], [[CC1]]
; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]], [[CND2]],
; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]], [[CND2]]
; SI-DAG: v_add_{{[iu]}}32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]]
; SI-DAG: v_sub_{{[iu]}}32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Quotient_S_One:v[0-9]+]],
; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Remainder_S_Den:v[0-9]+]],
; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_add_{{[iu]}}32_e32 [[Remainder_A_Den:v[0-9]+]],
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-NOT: v_and_b32
; SI: s_endpgm
define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) {
%result0 = udiv i32 %x, %y
Expand Down Expand Up @@ -124,8 +124,6 @@ define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1
; SI-DAG: v_mul_lo_i32
; SI-DAG: v_subrev_{{[iu]}}32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32
; SI-DAG: v_add_{{[iu]}}32_e32
; SI-DAG: v_subrev_{{[iu]}}32_e32
; SI-DAG: v_cndmask_b32_e64
Expand All @@ -147,8 +145,6 @@ define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1
; SI-DAG: v_mul_lo_i32
; SI-DAG: v_subrev_{{[iu]}}32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32
; SI-DAG: v_add_{{[iu]}}32_e32
; SI-DAG: v_subrev_{{[iu]}}32_e32
; SI-DAG: v_cndmask_b32_e64
Expand All @@ -157,6 +153,7 @@ define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1
; SI-DAG: v_subrev_{{[iu]}}32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-NOT: v_and_b32
; SI: s_endpgm
define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) {
%result0 = udiv <2 x i32> %x, %y
Expand Down Expand Up @@ -274,8 +271,6 @@ define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i3
; SI-DAG: v_mul_lo_i32
; SI-DAG: v_subrev_{{[iu]}}32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32
; SI-DAG: v_add_{{[iu]}}32_e32
; SI-DAG: v_subrev_{{[iu]}}32_e32
; SI-DAG: v_cndmask_b32_e64
Expand All @@ -297,8 +292,6 @@ define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i3
; SI-DAG: v_mul_lo_i32
; SI-DAG: v_subrev_{{[iu]}}32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32
; SI-DAG: v_add_{{[iu]}}32_e32
; SI-DAG: v_subrev_{{[iu]}}32_e32
; SI-DAG: v_cndmask_b32_e64
Expand All @@ -320,8 +313,6 @@ define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i3
; SI-DAG: v_mul_lo_i32
; SI-DAG: v_subrev_{{[iu]}}32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32
; SI-DAG: v_add_{{[iu]}}32_e32
; SI-DAG: v_subrev_{{[iu]}}32_e32
; SI-DAG: v_cndmask_b32_e64
Expand All @@ -339,6 +330,7 @@ define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i3
; SI-DAG: v_add_{{[iu]}}32_e32
; SI-DAG: v_subrev_{{[iu]}}32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-NOT: v_and_b32
; SI: s_endpgm
define amdgpu_kernel void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
%result0 = udiv <4 x i32> %x, %y
Expand Down
90 changes: 70 additions & 20 deletions llvm/test/CodeGen/X86/dagcombine-select.ll
Expand Up @@ -6,9 +6,7 @@ define i32 @select_and1(i32 %x, i32 %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $11, %edi
; CHECK-NEXT: setl %al
; CHECK-NEXT: decl %eax
; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: cmovgel %esi, %eax
; CHECK-NEXT: retq
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 0, i32 -1
Expand All @@ -21,24 +19,50 @@ define i32 @select_and2(i32 %x, i32 %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $11, %edi
; CHECK-NEXT: setl %al
; CHECK-NEXT: decl %eax
; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: cmovgel %esi, %eax
; CHECK-NEXT: retq
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 0, i32 -1
%a = and i32 %s, %y
ret i32 %a
}

define i32 @select_and3(i32 %x, i32 %y) {
; CHECK-LABEL: select_and3:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $11, %edi
; CHECK-NEXT: cmovll %esi, %eax
; CHECK-NEXT: retq
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 -1, i32 0
%a = and i32 %y, %s
ret i32 %a
}

define <4 x i32> @select_and_v4(i32 %x, <4 x i32> %y) {
; CHECK-LABEL: select_and_v4:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl $11, %edi
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: jl .LBB3_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: movaps %xmm0, %xmm1
; CHECK-NEXT: .LBB3_2:
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%c = icmp slt i32 %x, 11
%s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32><i32 -1, i32 -1, i32 -1, i32 -1>
%a = and <4 x i32> %s, %y
ret <4 x i32> %a
}

define i32 @select_or1(i32 %x, i32 %y) {
; CHECK-LABEL: select_or1:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $11, %edi
; CHECK-NEXT: setl %al
; CHECK-NEXT: decl %eax
; CHECK-NEXT: orl %esi, %eax
; CHECK-NEXT: movl $-1, %eax
; CHECK-NEXT: cmovll %esi, %eax
; CHECK-NEXT: retq
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 0, i32 -1
Expand All @@ -49,18 +73,44 @@ define i32 @select_or1(i32 %x, i32 %y) {
define i32 @select_or2(i32 %x, i32 %y) {
; CHECK-LABEL: select_or2:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $11, %edi
; CHECK-NEXT: setl %al
; CHECK-NEXT: decl %eax
; CHECK-NEXT: orl %esi, %eax
; CHECK-NEXT: movl $-1, %eax
; CHECK-NEXT: cmovll %esi, %eax
; CHECK-NEXT: retq
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 0, i32 -1
%a = or i32 %s, %y
ret i32 %a
}

define i32 @select_or3(i32 %x, i32 %y) {
; CHECK-LABEL: select_or3:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl $11, %edi
; CHECK-NEXT: movl $-1, %eax
; CHECK-NEXT: cmovgel %esi, %eax
; CHECK-NEXT: retq
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 -1, i32 0
%a = or i32 %y, %s
ret i32 %a
}

define <4 x i32> @select_or_v4(i32 %x, <4 x i32> %y) {
; CHECK-LABEL: select_or_v4:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl $11, %edi
; CHECK-NEXT: jl .LBB7_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: .LBB7_2:
; CHECK-NEXT: retq
%c = icmp slt i32 %x, 11
%s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32><i32 -1, i32 -1, i32 -1, i32 -1>
%a = or <4 x i32> %s, %y
ret <4 x i32> %a
}

define i32 @sel_constants_sub_constant_sel_constants(i1 %cond) {
; CHECK-LABEL: sel_constants_sub_constant_sel_constants:
; CHECK: # %bb.0:
Expand Down Expand Up @@ -186,11 +236,11 @@ define double @fsub_constant_sel_constants(i1 %cond) {
; CHECK-LABEL: fsub_constant_sel_constants:
; CHECK: # %bb.0:
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: jne .LBB13_1
; CHECK-NEXT: jne .LBB17_1
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB13_1:
; CHECK-NEXT: .LBB17_1:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: retq
%sel = select i1 %cond, double -4.0, double 23.3
Expand All @@ -202,11 +252,11 @@ define double @fdiv_constant_sel_constants(i1 %cond) {
; CHECK-LABEL: fdiv_constant_sel_constants:
; CHECK: # %bb.0:
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: jne .LBB14_1
; CHECK-NEXT: jne .LBB18_1
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB14_1:
; CHECK-NEXT: .LBB18_1:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: retq
%sel = select i1 %cond, double -4.0, double 23.3
Expand All @@ -218,11 +268,11 @@ define double @frem_constant_sel_constants(i1 %cond) {
; CHECK-LABEL: frem_constant_sel_constants:
; CHECK: # %bb.0:
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: jne .LBB15_1
; CHECK-NEXT: jne .LBB19_1
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB15_1:
; CHECK-NEXT: .LBB19_1:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: retq
%sel = select i1 %cond, double -4.0, double 23.3
Expand Down

0 comments on commit 22ee191

Please sign in to comment.