Skip to content

Commit

Permalink
[x86] make mayFold* helpers visible to more files; NFC
Browse files Browse the repository at this point in the history
The first function is needed for D112464, but we might
as well keep these together in case the others can be
used someday.
  • Loading branch information
rotateright committed Oct 29, 2021
1 parent 8f786b4 commit 837518d
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 28 deletions.
57 changes: 29 additions & 28 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -5039,8 +5039,8 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
// Other Lowering Hooks
//===----------------------------------------------------------------------===//

static bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
bool AssumeSingleUse = false) {
bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
bool AssumeSingleUse) {
if (!AssumeSingleUse && !Op.hasOneUse())
return false;
if (!ISD::isNormalLoad(Op.getNode()))
Expand All @@ -5058,11 +5058,11 @@ static bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
return true;
}

static bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
const X86Subtarget &Subtarget,
bool AssumeSingleUse = false) {
bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
const X86Subtarget &Subtarget,
bool AssumeSingleUse) {
assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory");
if (!mayFoldLoad(Op, Subtarget, AssumeSingleUse))
if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
return false;

// We can not replace a wide volatile load with a broadcast-from-memory,
Expand All @@ -5072,11 +5072,11 @@ static bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
}

static bool MayFoldIntoStore(SDValue Op) {
bool X86::mayFoldIntoStore(SDValue Op) {
return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
}

static bool MayFoldIntoZeroExtend(SDValue Op) {
bool X86::mayFoldIntoZeroExtend(SDValue Op) {
if (Op.hasOneUse()) {
unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
return (ISD::ZERO_EXTEND == Opcode);
Expand Down Expand Up @@ -9013,7 +9013,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
Broadcast = concatSubVectors(Broadcast, Broadcast, DAG, DL);
} else {
if (!Subtarget.hasAVX2() &&
!mayFoldLoadIntoBroadcastFromMem(
!X86::mayFoldLoadIntoBroadcastFromMem(
RepeatLoad, RepeatVT.getScalarType().getSimpleVT(),
Subtarget,
/*AssumeSingleUse=*/true))
Expand Down Expand Up @@ -12746,7 +12746,7 @@ static SDValue lowerShuffleAsDecomposedShuffleMerge(
MutableArrayRef<int> InputMask) {
unsigned EltSizeInBits = Input.getScalarValueSizeInBits();
if (!Subtarget.hasAVX2() && (!Subtarget.hasAVX() || EltSizeInBits < 32 ||
!mayFoldLoad(Input, Subtarget)))
!X86::mayFoldLoad(Input, Subtarget)))
return;
if (isNoopShuffleMask(InputMask))
return;
Expand Down Expand Up @@ -16431,7 +16431,7 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
bool SplatLo = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1);
bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1);
if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() && V1.hasOneUse() &&
mayFoldLoad(peekThroughOneUseBitcasts(V1), Subtarget)) {
X86::mayFoldLoad(peekThroughOneUseBitcasts(V1), Subtarget)) {
auto *Ld = cast<LoadSDNode>(peekThroughOneUseBitcasts(V1));
if (!Ld->isNonTemporal()) {
MVT MemVT = VT.getHalfNumVectorElementsVT();
Expand Down Expand Up @@ -19068,8 +19068,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
if (VT.getSizeInBits() == 8) {
// If IdxVal is 0, it's cheaper to do a move instead of a pextrb, unless
// we're going to zero extend the register or fold the store.
if (llvm::isNullConstant(Idx) && !MayFoldIntoZeroExtend(Op) &&
!MayFoldIntoStore(Op))
if (llvm::isNullConstant(Idx) && !X86::mayFoldIntoZeroExtend(Op) &&
!X86::mayFoldIntoStore(Op))
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Vec), Idx));
Expand Down Expand Up @@ -19225,8 +19225,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (VT == MVT::i16) {
// If IdxVal is 0, it's cheaper to do a move instead of a pextrw, unless
// we're going to zero extend the register or fold the store (SSE41 only).
if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op) &&
!(Subtarget.hasSSE41() && MayFoldIntoStore(Op))) {
if (IdxVal == 0 && !X86::mayFoldIntoZeroExtend(Op) &&
!(Subtarget.hasSSE41() && X86::mayFoldIntoStore(Op))) {
if (Subtarget.hasFP16())
return Op;

Expand Down Expand Up @@ -19432,7 +19432,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
if (!VT.is128BitVector() && IdxVal >= NumEltsIn128 &&
((Subtarget.hasAVX2() && EltSizeInBits != 8) ||
(Subtarget.hasAVX() && (EltSizeInBits >= 32) &&
mayFoldLoad(N1, Subtarget)))) {
X86::mayFoldLoad(N1, Subtarget)))) {
SDValue N1SplatVec = DAG.getSplatBuildVector(VT, dl, N1);
SmallVector<int, 8> BlendMask;
for (unsigned i = 0; i != NumElts; ++i)
Expand Down Expand Up @@ -19505,7 +19505,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// combine either bitwise AND or insert of float 0.0 to set these bits.

bool MinSize = DAG.getMachineFunction().getFunction().hasMinSize();
if (IdxVal == 0 && (!MinSize || !mayFoldLoad(N1, Subtarget))) {
if (IdxVal == 0 && (!MinSize || !X86::mayFoldLoad(N1, Subtarget))) {
// If this is an insertion of 32-bits into the low 32-bits of
// a vector, we prefer to generate a blend with immediate rather
// than an insertps. Blends are simpler operations in hardware and so
Expand Down Expand Up @@ -24645,8 +24645,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// being inserted between two CMOV's. (in i16 case too TBN)
// https://bugs.llvm.org/show_bug.cgi?id=40974
if ((Op.getValueType() == MVT::i8 && Subtarget.hasCMov()) ||
(Op.getValueType() == MVT::i16 && !mayFoldLoad(Op1, Subtarget) &&
!mayFoldLoad(Op2, Subtarget))) {
(Op.getValueType() == MVT::i16 && !X86::mayFoldLoad(Op1, Subtarget) &&
!X86::mayFoldLoad(Op2, Subtarget))) {
Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
SDValue Ops[] = { Op2, Op1, CC, Cond };
Expand Down Expand Up @@ -36997,7 +36997,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (isUndefOrEqual(Mask, 0)) {
if (V1.getValueType() == MaskVT &&
V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
mayFoldLoad(V1.getOperand(0), Subtarget)) {
X86::mayFoldLoad(V1.getOperand(0), Subtarget)) {
if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
return SDValue(); // Nothing to do!
Res = V1.getOperand(0);
Expand Down Expand Up @@ -38440,8 +38440,8 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
unsigned Imm = V.getConstantOperandVal(2);
const X86Subtarget &Subtarget =
static_cast<const X86Subtarget &>(DAG.getSubtarget());
if (!mayFoldLoad(peekThroughOneUseBitcasts(N0), Subtarget) ||
mayFoldLoad(peekThroughOneUseBitcasts(N1), Subtarget))
if (!X86::mayFoldLoad(peekThroughOneUseBitcasts(N0), Subtarget) ||
X86::mayFoldLoad(peekThroughOneUseBitcasts(N1), Subtarget))
return SDValue();
Imm = ((Imm & 0x0F) << 4) | ((Imm & 0xF0) >> 4);
return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0,
Expand Down Expand Up @@ -51713,8 +51713,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
// concat_vectors(movddup(x),movddup(x)) -> broadcast(x)
if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 &&
(Subtarget.hasAVX2() ||
mayFoldLoadIntoBroadcastFromMem(Op0.getOperand(0), VT.getScalarType(),
Subtarget)))
X86::mayFoldLoadIntoBroadcastFromMem(Op0.getOperand(0),
VT.getScalarType(), Subtarget)))
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64,
Op0.getOperand(0),
Expand All @@ -51723,7 +51723,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
// concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x)
if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR &&
(Subtarget.hasAVX2() ||
(EltSizeInBits >= 32 && mayFoldLoad(Op0.getOperand(0), Subtarget))) &&
(EltSizeInBits >= 32 &&
X86::mayFoldLoad(Op0.getOperand(0), Subtarget))) &&
Op0.getOperand(0).getValueType() == VT.getScalarType())
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0));

Expand Down Expand Up @@ -53056,7 +53057,7 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
case ISD::SRL: {
SDValue N0 = Op.getOperand(0);
// Look out for (store (shl (load), x)).
if (mayFoldLoad(N0, Subtarget) && IsFoldableRMW(N0, Op))
if (X86::mayFoldLoad(N0, Subtarget) && IsFoldableRMW(N0, Op))
return false;
break;
}
Expand All @@ -53071,11 +53072,11 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
// Avoid disabling potential load folding opportunities.
if (mayFoldLoad(N1, Subtarget) &&
if (X86::mayFoldLoad(N1, Subtarget) &&
(!Commute || !isa<ConstantSDNode>(N0) ||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N1, Op))))
return false;
if (mayFoldLoad(N0, Subtarget) &&
if (X86::mayFoldLoad(N0, Subtarget) &&
((Commute && !isa<ConstantSDNode>(N1)) ||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op))))
return false;
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Expand Up @@ -911,6 +911,25 @@ namespace llvm {
/// as zero if AllowPartialUndefs is set, else we fail and return false.
bool isConstantSplat(SDValue Op, APInt &SplatVal,
bool AllowPartialUndefs = true);

/// Check if Op is a load operation that could be folded into some other x86
/// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
bool AssumeSingleUse = false);

/// Check if Op is a load operation that could be folded into a vector splat
/// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
const X86Subtarget &Subtarget,
bool AssumeSingleUse = false);

/// Check if Op is a value that could be used to fold a store into some
/// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
bool mayFoldIntoStore(SDValue Op);

/// Check if Op is an operation that could be folded into a zero extend x86
/// instruction.
bool mayFoldIntoZeroExtend(SDValue Op);
} // end namespace X86

//===--------------------------------------------------------------------===//
Expand Down

0 comments on commit 837518d

Please sign in to comment.