Skip to content

Commit

Permalink
[CodeGen][SelectionDAG] Flip Booleans More Often
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D77201
  • Loading branch information
Pierre-vh committed Apr 7, 2020
1 parent 39e9149 commit 23342bd
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 165 deletions.
26 changes: 15 additions & 11 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -2592,23 +2592,27 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
if (V.getOpcode() != ISD::XOR)
return SDValue();

ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1),
/*AllowUndefs*/ false,
/*AllowTruncation*/ true);
if (!Const)
return SDValue();

EVT VT = V.getValueType();

bool IsFlip = false;
switch(TLI.getBooleanContents(VT)) {
case TargetLowering::ZeroOrOneBooleanContent:
IsFlip = Const->isOne();
break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
IsFlip = Const->isAllOnesValue();
break;
case TargetLowering::UndefinedBooleanContent:
IsFlip = (Const->getAPIntValue() & 0x01) == 1;
break;
APInt ConstValue =
Const->getAPIntValue().sextOrTrunc(VT.getScalarSizeInBits());
switch (TLI.getBooleanContents(VT)) {
case TargetLowering::ZeroOrOneBooleanContent:
IsFlip = ConstValue.isOneValue();
break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
IsFlip = ConstValue.isAllOnesValue();
break;
case TargetLowering::UndefinedBooleanContent:
IsFlip = (ConstValue & 0x01) == 1;
break;
}

if (IsFlip)
Expand Down
51 changes: 17 additions & 34 deletions llvm/test/CodeGen/Thumb2/mve-pred-or.ll
Expand Up @@ -6,8 +6,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpeqz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 ne, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -22,8 +21,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpnez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 eq, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -38,8 +36,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsltz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 ge, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -54,8 +51,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgtz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 le, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -70,8 +66,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpslez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 gt, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -86,8 +81,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 lt, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand Down Expand Up @@ -116,8 +110,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpugtz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 eq, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand Down Expand Up @@ -165,8 +158,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpeq_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i3
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 ne, q1, q2
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -181,8 +173,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpne_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i3
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 eq, q1, q2
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -197,8 +188,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpslt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 le, q2, q1
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -213,8 +203,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 le, q1, q2
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -229,8 +218,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsle_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 lt, q2, q1
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -245,8 +233,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsge_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 lt, q1, q2
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand Down Expand Up @@ -340,8 +327,7 @@ define arm_aapcs_vfpcc <8 x i16> @cmpeqz_v8i1(<8 x i16> %a, <8 x i16> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i16 ne, q0, zr
; CHECK-NEXT: vcmpt.i16 ne, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <8 x i16> %a, zeroinitializer
Expand All @@ -356,8 +342,7 @@ define arm_aapcs_vfpcc <8 x i16> @cmpeq_v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i16 ne, q0, zr
; CHECK-NEXT: vcmpt.i16 ne, q1, q2
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <8 x i16> %a, zeroinitializer
Expand All @@ -373,8 +358,7 @@ define arm_aapcs_vfpcc <16 x i8> @cmpeqz_v16i1(<16 x i8> %a, <16 x i8> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i8 ne, q0, zr
; CHECK-NEXT: vcmpt.i8 ne, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <16 x i8> %a, zeroinitializer
Expand All @@ -389,8 +373,7 @@ define arm_aapcs_vfpcc <16 x i8> @cmpeq_v16i1(<16 x i8> %a, <16 x i8> %b, <16 x
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i8 ne, q0, zr
; CHECK-NEXT: vcmpt.i8 ne, q1, q2
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <16 x i8> %a, zeroinitializer
Expand Down
36 changes: 12 additions & 24 deletions llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
Expand Up @@ -109,8 +109,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp one <4 x float> %src, %src2
Expand Down Expand Up @@ -485,8 +484,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q1, q0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ugt <4 x float> %src, %src2
Expand Down Expand Up @@ -538,8 +536,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVEFP-LABEL: vcmp_uge_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp uge <4 x float> %src, %src2
Expand Down Expand Up @@ -591,8 +588,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVEFP-LABEL: vcmp_ult_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ult <4 x float> %src, %src2
Expand Down Expand Up @@ -644,8 +640,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVEFP-LABEL: vcmp_ule_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ule <4 x float> %src, %src2
Expand Down Expand Up @@ -698,8 +693,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ord <4 x float> %src, %src2
Expand Down Expand Up @@ -1019,8 +1013,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_one_v8f16(<8 x half> %src, <8 x half> %s
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f16 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp one <8 x half> %src, %src2
Expand Down Expand Up @@ -1905,8 +1898,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %s
; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q1, q0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ugt <8 x half> %src, %src2
Expand Down Expand Up @@ -2030,8 +2022,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %s
; CHECK-MVEFP-LABEL: vcmp_uge_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp uge <8 x half> %src, %src2
Expand Down Expand Up @@ -2155,8 +2146,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %s
; CHECK-MVEFP-LABEL: vcmp_ult_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ult <8 x half> %src, %src2
Expand Down Expand Up @@ -2280,8 +2270,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %s
; CHECK-MVEFP-LABEL: vcmp_ule_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ule <8 x half> %src, %src2
Expand Down Expand Up @@ -2406,8 +2395,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %s
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f16 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ord <8 x half> %src, %src2
Expand Down

0 comments on commit 23342bd

Please sign in to comment.