Skip to content

Commit

Permalink
DAGCombiner: Canonicalize select(and/or,x,y) depending on target.
Browse files Browse the repository at this point in the history
This is based on the following equivalences:
select(C0 & C1, X, Y) <=> select(C0, select(C1, X, Y), Y)
select(C0 | C1, X, Y) <=> select(C0, X, select(C1, X, Y))

Many target cannot perform and/or on the CPU flags and therefore the
right side should be choosen to avoid materializign the i1 flags in an
integer register. If the target can perform this operation efficiently
we normalize to the left form.

Differential Revision: http://reviews.llvm.org/D7622

llvm-svn: 231507
  • Loading branch information
MatzeB committed Mar 6, 2015
1 parent 3ecb557 commit 898d11e
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 14 deletions.
19 changes: 19 additions & 0 deletions llvm/include/llvm/Target/TargetLowering.h
Expand Up @@ -1097,6 +1097,25 @@ class TargetLoweringBase {
virtual LoadInst *lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
return nullptr;
}

/// Returns true if we should normalize
/// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
/// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
/// that it saves us from materializing N0 and N1 in an integer register.
/// Targets that are able to perform and/or on flags should return false here.
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
EVT VT) const {
// If a target has multiple condition registers, then it likely has logical
// operations on those registers.
if (hasMultipleConditionRegisters())
return false;
// Only do the transform if the value won't be split into multiple
// registers.
LegalizeTypeAction Action = getTypeAction(Context, VT);
return Action != TypeExpandInteger && Action != TypeExpandFloat &&
Action != TypeSplitVector;
}

//===--------------------------------------------------------------------===//
// TargetLowering Configuration Methods - These methods should be invoked by
// the derived class constructor to configure this object for the target.
Expand Down
63 changes: 63 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -4819,6 +4819,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SimplifySelect(SDLoc(N), N0, N1, N2);
}

if (VT0 == MVT::i1) {
if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
// select (and Cond0, Cond1), X, Y
// -> select Cond0, (select Cond1, X, Y), Y
if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
N1.getValueType(), Cond1, N1, N2);
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
InnerSelect, N2);
}
// select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
N1.getValueType(), Cond1, N1, N2);
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
InnerSelect);
}
}

// select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
if (N1->getOpcode() == ISD::SELECT) {
SDValue N1_0 = N1->getOperand(0);
SDValue N1_1 = N1->getOperand(1);
SDValue N1_2 = N1->getOperand(2);
if (N1_2 == N2) {
// Create the actual and node if we can generate good code for it.
if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
N0, N1_0);
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
N1_1, N2);
}
// Otherwise see if we can optimize the "and" to a better pattern.
if (SDValue Combined = visitANDLike(N0, N1_0, N))
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
N1_1, N2);
}
}
// select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
if (N2->getOpcode() == ISD::SELECT) {
SDValue N2_0 = N2->getOperand(0);
SDValue N2_1 = N2->getOperand(1);
SDValue N2_2 = N2->getOperand(2);
if (N2_1 == N1) {
// Create the actual or node if we can generate good code for it.
if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
N0, N2_0);
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
N1, N2_2);
}
// Otherwise see if we can optimize to a better pattern.
if (SDValue Combined = visitORLike(N0, N2_0, N))
return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
N1, N2_2);
}
}
}

return SDValue();
}

Expand Down
50 changes: 50 additions & 0 deletions llvm/test/CodeGen/ARM/movcc-double.ll
@@ -0,0 +1,50 @@
; RUN: llc -o - %s | FileCheck %s
target triple = "arm-unknown-unknown"

; select with and i1/or i1 condition should be implemented as a series of 2
; cmovs, not by producing two conditions and using and on them.

define i32 @select_and(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
; CHECK-LABEL: select_and
; CHECK-NOT: tst
; CHECK-NOT: movne
; CHECK: mov{{lo|hs}}
; CHECK: mov{{lo|hs}}
%cmp0 = icmp ult i32 %a0, %a1
%cmp1 = icmp ult i32 %a2, %a3
%and = and i1 %cmp0, %cmp1
%res = select i1 %and, i32 %a4, i32 %a5
ret i32 %res
}

define i32 @select_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
; select with and i1 condition should be implemented as a series of 2 cmovs, not
; by producing two conditions and using and on them.
; CHECK-LABEL: select_or
; CHECK-NOT: orss
; CHECK-NOT: tst
; CHECK: mov{{lo|hs}}
; CHECK: mov{{lo|hs}}
%cmp0 = icmp ult i32 %a0, %a1
%cmp1 = icmp ult i32 %a2, %a3
%and = or i1 %cmp0, %cmp1
%res = select i1 %and, i32 %a4, i32 %a5
ret i32 %res
}

; If one of the conditions is materialized as a 0/1 value anyway, then the
; sequence of 2 cmovs should not be used.

@var32 = global i32 0
define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
; CHECK-LABEL: select_noopt
; CHECK: orrs
; CHECK: movne
%cmp0 = icmp ult i32 %a0, %a1
%cmp1 = icmp ult i32 %a1, %a2
%or = or i1 %cmp0, %cmp1
%zero_one = zext i1 %or to i32
store volatile i32 %zero_one, i32* @var32
%res = select i1 %or, i32 %a3, i32 %a4
ret i32 %res
}
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/R600/or.ll
Expand Up @@ -156,14 +156,14 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}

; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
%a = load float, float addrspace(1)* %in0
%b = load float, float addrspace(1)* %in1
%acmp = fcmp oge float %a, 0.000000e+00
%bcmp = fcmp oge float %b, 0.000000e+00
%or = or i1 %acmp, %bcmp
%result = select i1 %or, float %a, float %b
store float %result, float addrspace(1)* %out
%result = zext i1 %or to i32
store i32 %result, i32 addrspace(1)* %out
ret void
}

Expand Down
52 changes: 52 additions & 0 deletions llvm/test/CodeGen/X86/cmov-double.ll
@@ -0,0 +1,52 @@
; RUN: llc -o - %s | FileCheck %s
target triple = "x86_64-unknown-unknown"

; select with and i1/or i1 condition should be implemented as a series of 2
; cmovs, not by producing two conditions and using and on them.

define i32 @select_and(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
; CHECK-LABEL: select_and
; CHECK-NOT: set
; CHECK-NOT: and[lb]
; CHECK-NOT: test
; CHECK: cmov
; CHECK: cmov
%cmp0 = icmp ult i32 %a0, %a1
%cmp1 = fcmp olt float %a2, %a3
%and = and i1 %cmp0, %cmp1
%res = select i1 %and, i32 %a4, i32 %a5
ret i32 %res
}

define i32 @select_or(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
; select with and i1 condition should be implemented as a series of 2 cmovs, not
; by producing two conditions and using and on them.
; CHECK-LABEL: select_or
; CHECK-NOT: set
; CHECK-NOT: or[lb]
; CHECK-NOT: test
; CHECK: cmov
; CHECK: cmov
%cmp0 = icmp ult i32 %a0, %a1
%cmp1 = fcmp olt float %a2, %a3
%and = or i1 %cmp0, %cmp1
%res = select i1 %and, i32 %a4, i32 %a5
ret i32 %res
}

; If one of the conditions is materialized as a 0/1 value anyway, then the
; sequence of 2 cmovs should not be used.

@var32 = global i32 0
define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
; CHECK-LABEL: select_noopt
; CHECK: cmov
; CHECK-NOT: cmov
%cmp0 = icmp ult i32 %a0, %a1
%cmp1 = icmp ult i32 %a1, %a2
%or = or i1 %cmp0, %cmp1
%zero_one = zext i1 %or to i32
store volatile i32 %zero_one, i32* @var32
%res = select i1 %or, i32 %a3, i32 %a4
ret i32 %res
}
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/X86/jump_sign.ll
Expand Up @@ -217,17 +217,15 @@ entry:
; PR13475
; If we have sub a, b and cmp b, a and the result of cmp is used
; by sbb, we should not optimize cmp away.
define i32 @func_q(i32 %j.4, i32 %w, i32 %el) {
define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: func_q:
; CHECK: cmp
; CHECK-NEXT: sbb
%tmp532 = add i32 %j.4, %w
%tmp533 = icmp ugt i32 %tmp532, %el
%tmp534 = icmp ult i32 %w, %el
%or.cond = and i1 %tmp533, %tmp534
%tmp535 = sub i32 %el, %w
%j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4
ret i32 %j.5
%1 = icmp ult i32 %a0, %a1
%2 = sub i32 %a1, %a0
%3 = select i1 %1, i32 -1, i32 0
%4 = xor i32 %2, %3
ret i32 %4
}
; rdar://11873276
define i8* @func_r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/CodeGen/X86/zext-sext.ll
Expand Up @@ -34,11 +34,12 @@ entry:
%tmp12 = add i64 %tmp11, 5089792279245435153

; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z0-9]+]]
; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
; CHECK: cmpl $-8608074, %e[[REGISTER_zext]]
; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
; CHECK-NOT: [[REGISTER_zext]]
; CHECK-DAG: testl %e[[REGISTER_zext]]
; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext]]
; CHECK-DAG: cmpl $2138875573, %e[[REGISTER_zext]]
; CHECK: movq [[REGISTER_sext]], [[REGISTER_sext2:%[a-z0-9]+]]
; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext2]]

%tmp13 = sub i64 %tmp12, 2138875574
%tmp14 = zext i32 %tmp4 to i64
Expand Down

0 comments on commit 898d11e

Please sign in to comment.