Skip to content

Commit

Permalink
[Target][ARM] Add PerformVSELECTCombine for MVE Integer Ops
Browse files Browse the repository at this point in the history
This patch adds an implementation of PerformVSELECTCombine in the
ARM DAG Combiner that transforms vselect(not(cond), lhs, rhs) into
vselect(cond, rhs, lhs).

Normally, this should be done by the target-independent DAG Combiner,
but it doesn't handle the kind of constants that we generate, so we
have to reimplement it here.

Differential Revision: https://reviews.llvm.org/D77712
  • Loading branch information
Pierre-vh committed May 5, 2020
1 parent 48aebfc commit ffdda49
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 154 deletions.
40 changes: 40 additions & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -1460,6 +1460,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);

if (Subtarget->hasMVEIntegerOps())
setTargetDAGCombine(ISD::VSELECT);

if (Subtarget->hasV6Ops())
setTargetDAGCombine(ISD::SRL);
if (Subtarget->isThumb1Only())
Expand Down Expand Up @@ -11719,6 +11722,42 @@ static SDValue PerformAddeSubeCombine(SDNode *N,
return SDValue();
}

static SDValue PerformVSELECTCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
//
// We need to re-implement this optimization here as the implementation in the
// Target-Independent DAGCombiner does not handle the kind of constant we make
// (it calls isConstOrConstSplat with AllowTruncation set to false - and for
// good reason, allowing truncation there would break other targets).
//
// Currently, this is only done for MVE, as it's the only target that benefits
// from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
if (!Subtarget->hasMVEIntegerOps())
return SDValue();

if (N->getOperand(0).getOpcode() != ISD::XOR)
return SDValue();
SDValue XOR = N->getOperand(0);

// Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
// It is important to check with truncation allowed as the BUILD_VECTORs we
// generate in those situations will truncate their operands.
ConstantSDNode *Const =
isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
/*AllowTruncation*/ true);
if (!Const || !Const->isOne())
return SDValue();

// Rewrite into vselect(cond, rhs, lhs).
SDValue Cond = XOR->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
EVT Type = N->getValueType(0);
return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
}

static SDValue PerformABSCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
Expand Down Expand Up @@ -15225,6 +15264,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
Expand Down
51 changes: 17 additions & 34 deletions llvm/test/CodeGen/Thumb2/mve-pred-or.ll
Expand Up @@ -6,8 +6,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpeqz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 ne, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -22,8 +21,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpnez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 eq, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -38,8 +36,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsltz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 ge, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -54,8 +51,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgtz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 le, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -70,8 +66,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpslez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 gt, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -86,8 +81,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 lt, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand Down Expand Up @@ -116,8 +110,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpugtz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 eq, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand Down Expand Up @@ -165,8 +158,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpeq_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i3
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 ne, q1, q2
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -181,8 +173,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpne_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i3
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 eq, q1, q2
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -197,8 +188,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpslt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 le, q2, q1
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -213,8 +203,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 le, q1, q2
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -229,8 +218,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsle_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 lt, q2, q1
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand All @@ -245,8 +233,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsge_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 lt, q1, q2
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
Expand Down Expand Up @@ -340,8 +327,7 @@ define arm_aapcs_vfpcc <8 x i16> @cmpeqz_v8i1(<8 x i16> %a, <8 x i16> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i16 ne, q0, zr
; CHECK-NEXT: vcmpt.i16 ne, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <8 x i16> %a, zeroinitializer
Expand All @@ -356,8 +342,7 @@ define arm_aapcs_vfpcc <8 x i16> @cmpeq_v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i16 ne, q0, zr
; CHECK-NEXT: vcmpt.i16 ne, q1, q2
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <8 x i16> %a, zeroinitializer
Expand All @@ -373,8 +358,7 @@ define arm_aapcs_vfpcc <16 x i8> @cmpeqz_v16i1(<16 x i8> %a, <16 x i8> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i8 ne, q0, zr
; CHECK-NEXT: vcmpt.i8 ne, q1, zr
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <16 x i8> %a, zeroinitializer
Expand All @@ -389,8 +373,7 @@ define arm_aapcs_vfpcc <16 x i8> @cmpeq_v16i1(<16 x i8> %a, <16 x i8> %b, <16 x
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i8 ne, q0, zr
; CHECK-NEXT: vcmpt.i8 ne, q1, q2
; CHECK-NEXT: vpnot
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <16 x i8> %a, zeroinitializer
Expand Down
36 changes: 12 additions & 24 deletions llvm/test/CodeGen/Thumb2/mve-vcmpf.ll
Expand Up @@ -109,8 +109,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp one <4 x float> %src, %src2
Expand Down Expand Up @@ -485,8 +484,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q1, q0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ugt <4 x float> %src, %src2
Expand Down Expand Up @@ -538,8 +536,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVEFP-LABEL: vcmp_uge_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp uge <4 x float> %src, %src2
Expand Down Expand Up @@ -591,8 +588,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVEFP-LABEL: vcmp_ult_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ult <4 x float> %src, %src2
Expand Down Expand Up @@ -644,8 +640,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVEFP-LABEL: vcmp_ule_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ule <4 x float> %src, %src2
Expand Down Expand Up @@ -698,8 +693,7 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float>
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ord <4 x float> %src, %src2
Expand Down Expand Up @@ -1019,8 +1013,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_one_v8f16(<8 x half> %src, <8 x half> %s
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f16 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp one <8 x half> %src, %src2
Expand Down Expand Up @@ -1905,8 +1898,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ugt_v8f16(<8 x half> %src, <8 x half> %s
; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q1, q0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ugt <8 x half> %src, %src2
Expand Down Expand Up @@ -2030,8 +2022,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_uge_v8f16(<8 x half> %src, <8 x half> %s
; CHECK-MVEFP-LABEL: vcmp_uge_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp uge <8 x half> %src, %src2
Expand Down Expand Up @@ -2155,8 +2146,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ult_v8f16(<8 x half> %src, <8 x half> %s
; CHECK-MVEFP-LABEL: vcmp_ult_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ult <8 x half> %src, %src2
Expand Down Expand Up @@ -2280,8 +2270,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ule_v8f16(<8 x half> %src, <8 x half> %s
; CHECK-MVEFP-LABEL: vcmp_ule_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ule <8 x half> %src, %src2
Expand Down Expand Up @@ -2406,8 +2395,7 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_ord_v8f16(<8 x half> %src, <8 x half> %s
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f16 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q1
; CHECK-MVEFP-NEXT: vpnot
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ord <8 x half> %src, %src2
Expand Down

0 comments on commit ffdda49

Please sign in to comment.