Skip to content

Commit

Permalink
[ARM] OrCombineToBFI function
Browse files Browse the repository at this point in the history
Extract the functionality to combine OR to BFI into its own function.

Differential Revision: https://reviews.llvm.org/D39001

llvm-svn: 316563
  • Loading branch information
sparker-arm committed Oct 25, 2017
1 parent ccb209b commit 1f74211
Showing 1 changed file with 109 additions and 92 deletions.
201 changes: 109 additions & 92 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -10357,95 +10357,17 @@ static SDValue PerformORCombineToSMULWBT(SDNode *OR,
return SDValue(OR, 0);
}

/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
static SDValue PerformORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// Attempt to use immediate-form VORR
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
SDLoc dl(N);
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;

if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();

APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (BVN && Subtarget->hasNEON() &&
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
EVT VorrVT;
SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VorrVT, VT.is128BitVector(),
OtherModImm);
if (Val.getNode()) {
SDValue Input =
DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
}
}
}

if (!Subtarget->isThumb1Only()) {
// fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;
if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
return Result;
}

// The code below optimizes (or (and X, Y), Z).
// The AND operand needs to have a single user to make these optimizations
// profitable.
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
return SDValue();
SDValue N1 = N->getOperand(1);

// (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
APInt SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;

APInt SplatBits0, SplatBits1;
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
// Ensure that the second operand of both ands are constants
if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
HasAnyUndefs) && !HasAnyUndefs) {
if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
HasAnyUndefs) && !HasAnyUndefs) {
// Ensure that the bit width of the constants are the same and that
// the splat arguments are logical inverses as per the pattern we
// are trying to simplify.
if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
SplatBits0 == ~SplatBits1) {
// Canonicalize the vector type to make instruction selection
// simpler.
EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
N0->getOperand(1),
N0->getOperand(0),
N1->getOperand(0));
return DAG.getNode(ISD::BITCAST, dl, VT, Result);
}
}
}
}

// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
// reasonable.

static SDValue PerformORCombineToBFI(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// BFI is only available on V6T2+
if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
return SDValue();

EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
// 1) or (and A, mask), val => ARMbfi A, val, mask
// iff (val & mask) == val
Expand Down Expand Up @@ -10487,9 +10409,10 @@ static SDValue PerformORCombine(SDNode *N,
DAG.getConstant(Val, DL, MVT::i32),
DAG.getConstant(Mask, DL, MVT::i32));

// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
return SDValue();
// Return value from the original node to inform the combiner than N is
// now dead.
return SDValue(N, 0);
}
} else if (N1.getOpcode() == ISD::AND) {
// case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
Expand All @@ -10513,9 +10436,10 @@ static SDValue PerformORCombine(SDNode *N,
DAG.getConstant(amt, DL, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
DAG.getConstant(Mask, DL, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
return SDValue();
// Return value from the original node to inform the combiner than N is
// now dead.
return SDValue(N, 0);
} else if (ARM::isBitFieldInvertedMask(~Mask) &&
(~Mask == Mask2)) {
// The pack halfword instruction works better for masks that fit it,
Expand All @@ -10529,9 +10453,10 @@ static SDValue PerformORCombine(SDNode *N,
DAG.getConstant(lsb, DL, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
DAG.getConstant(Mask2, DL, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
return SDValue();
// Return value from the original node to inform the combiner than N is
// now dead.
return SDValue(N, 0);
}
}

Expand All @@ -10549,10 +10474,102 @@ static SDValue PerformORCombine(SDNode *N,
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
DAG.getConstant(~Mask, DL, MVT::i32));

// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
// Return value from the original node to inform the combiner than N is
// now dead.
return SDValue(N, 0);
}

return SDValue();
}

/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
static SDValue PerformORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// Attempt to use immediate-form VORR
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
SDLoc dl(N);
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;

if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();

APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (BVN && Subtarget->hasNEON() &&
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
EVT VorrVT;
SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VorrVT, VT.is128BitVector(),
OtherModImm);
if (Val.getNode()) {
SDValue Input =
DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
}
}
}

if (!Subtarget->isThumb1Only()) {
// fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;
if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
return Result;
}

// The code below optimizes (or (and X, Y), Z).
// The AND operand needs to have a single user to make these optimizations
// profitable.
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
return SDValue();
SDValue N1 = N->getOperand(1);

// (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
APInt SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;

APInt SplatBits0, SplatBits1;
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
// Ensure that the second operand of both ands are constants
if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
HasAnyUndefs) && !HasAnyUndefs) {
if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
HasAnyUndefs) && !HasAnyUndefs) {
// Ensure that the bit width of the constants are the same and that
// the splat arguments are logical inverses as per the pattern we
// are trying to simplify.
if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
SplatBits0 == ~SplatBits1) {
// Canonicalize the vector type to make instruction selection
// simpler.
EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
N0->getOperand(1),
N0->getOperand(0),
N1->getOperand(0));
return DAG.getNode(ISD::BITCAST, dl, VT, Result);
}
}
}
}

// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
// reasonable.
if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
return Res;

return SDValue();
}

Expand Down

0 comments on commit 1f74211

Please sign in to comment.