Skip to content

Commit

Permalink
[X86] LowerBUILD_VECTOR - share the same SDLoc argument instead of re…
Browse files Browse the repository at this point in the history
…creating it over and over again.
  • Loading branch information
RKSimon committed Feb 8, 2024
1 parent a643ab8 commit 7d4733a
Showing 1 changed file with 42 additions and 45 deletions.
87 changes: 42 additions & 45 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7135,6 +7135,7 @@ static bool isFoldableUseOfShuffle(SDNode *N) {
/// The VBROADCAST node is returned when a pattern is found,
/// or SDValue() otherwise.
static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
const SDLoc &dl,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// VBROADCAST requires AVX.
Expand All @@ -7145,8 +7146,6 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,

MVT VT = BVOp->getSimpleValueType(0);
unsigned NumElts = VT.getVectorNumElements();
SDLoc dl(BVOp);

assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
"Unsupported vector type for broadcast.");

Expand Down Expand Up @@ -7492,14 +7491,13 @@ static SDValue LowerBUILD_VECTORvXbf16(SDValue Op, SelectionDAG &DAG,
}

// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {

MVT VT = Op.getSimpleValueType();
assert((VT.getVectorElementType() == MVT::i1) &&
"Unexpected type in LowerBUILD_VECTORvXi1!");

SDLoc dl(Op);
if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
ISD::isBuildVectorAllOnes(Op.getNode()))
return Op;
Expand Down Expand Up @@ -7618,7 +7616,7 @@ LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) {
/// See the corrected implementation in isHopBuildVector(). Can we reduce this
/// code because it is only used for partial h-op matching now?
static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode,
SelectionDAG &DAG,
const SDLoc &DL, SelectionDAG &DAG,
unsigned BaseIdx, unsigned LastIdx,
SDValue &V0, SDValue &V1) {
EVT VT = N->getValueType(0);
Expand Down Expand Up @@ -7928,6 +7926,7 @@ static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
/// 'fsubadd' operation accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB or
/// X86ISD::FMSUBADD node.
static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
const SDLoc &DL,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Opnd0, Opnd1;
Expand All @@ -7938,7 +7937,6 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
return SDValue();

MVT VT = BV->getSimpleValueType(0);
SDLoc DL(BV);

// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
Expand Down Expand Up @@ -8057,22 +8055,22 @@ static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG,
}

static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
SelectionDAG &DAG, unsigned HOpcode,
SDValue V0, SDValue V1) {
const SDLoc &DL, SelectionDAG &DAG,
unsigned HOpcode, SDValue V0, SDValue V1) {
// If either input vector is not the same size as the build vector,
// extract/insert the low bits to the correct size.
// This is free (examples: zmm --> xmm, xmm --> ymm).
MVT VT = BV->getSimpleValueType(0);
unsigned Width = VT.getSizeInBits();
if (V0.getValueSizeInBits() > Width)
V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), Width);
V0 = extractSubVector(V0, 0, DAG, DL, Width);
else if (V0.getValueSizeInBits() < Width)
V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, SDLoc(BV), Width);
V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, DL, Width);

if (V1.getValueSizeInBits() > Width)
V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), Width);
V1 = extractSubVector(V1, 0, DAG, DL, Width);
else if (V1.getValueSizeInBits() < Width)
V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width);
V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, DL, Width);

unsigned NumElts = VT.getVectorNumElements();
APInt DemandedElts = APInt::getAllOnes(NumElts);
Expand All @@ -8084,17 +8082,17 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
unsigned HalfNumElts = NumElts / 2;
if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) {
MVT HalfVT = VT.getHalfNumVectorElementsVT();
V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), 128);
V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), 128);
SDValue Half = DAG.getNode(HOpcode, SDLoc(BV), HalfVT, V0, V1);
return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, SDLoc(BV), 256);
V0 = extractSubVector(V0, 0, DAG, DL, 128);
V1 = extractSubVector(V1, 0, DAG, DL, 128);
SDValue Half = DAG.getNode(HOpcode, DL, HalfVT, V0, V1);
return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, DL, 256);
}

return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1);
return DAG.getNode(HOpcode, DL, VT, V0, V1);
}

/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const SDLoc &DL,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// We need at least 2 non-undef elements to make this worthwhile by default.
Expand All @@ -8114,7 +8112,7 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
unsigned HOpcode;
SDValue V0, V1;
if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
return getHopForBuildVector(BV, DL, DAG, HOpcode, V0, V1);
}

// Try harder to match 256-bit ops by using extract/concat.
Expand All @@ -8134,22 +8132,21 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
if (BV->getOperand(i)->isUndef())
NumUndefsHI++;

SDLoc DL(BV);
SDValue InVec0, InVec1;
if (VT == MVT::v8i32 || VT == MVT::v16i16) {
SDValue InVec2, InVec3;
unsigned X86Opcode;
bool CanFold = true;

if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
isHorizontalBinOpPart(BV, ISD::ADD, DAG, Half, NumElts, InVec2,
if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, Half, InVec0, InVec1) &&
isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, Half, NumElts, InVec2,
InVec3) &&
((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
X86Opcode = X86ISD::HADD;
else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, Half, InVec0,
else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, Half, InVec0,
InVec1) &&
isHorizontalBinOpPart(BV, ISD::SUB, DAG, Half, NumElts, InVec2,
isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, Half, NumElts, InVec2,
InVec3) &&
((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
Expand Down Expand Up @@ -8179,15 +8176,16 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
VT == MVT::v16i16) {
unsigned X86Opcode;
if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, NumElts, InVec0,
InVec1))
X86Opcode = X86ISD::HADD;
else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, NumElts, InVec0,
else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, NumElts, InVec0,
InVec1))
X86Opcode = X86ISD::HSUB;
else if (isHorizontalBinOpPart(BV, ISD::FADD, DAG, 0, NumElts, InVec0,
else if (isHorizontalBinOpPart(BV, ISD::FADD, DL, DAG, 0, NumElts, InVec0,
InVec1))
X86Opcode = X86ISD::FHADD;
else if (isHorizontalBinOpPart(BV, ISD::FSUB, DAG, 0, NumElts, InVec0,
else if (isHorizontalBinOpPart(BV, ISD::FSUB, DL, DAG, 0, NumElts, InVec0,
InVec1))
X86Opcode = X86ISD::FHSUB;
else
Expand Down Expand Up @@ -8218,10 +8216,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
/// NOTE: Its not in our interest to start make a general purpose vectorizer
/// from this, but enough scalar bit operations are created from the later
/// legalization + scalarization stages to need basic support.
static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, const SDLoc &DL,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
MVT VT = Op->getSimpleValueType(0);
unsigned NumElems = VT.getVectorNumElements();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Expand Down Expand Up @@ -8296,9 +8293,9 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
/// Create a vector constant without a load. SSE/AVX provide the bare minimum
/// functionality to do this, so it's all zeros, all ones, or some derivation
/// that is cheap to calculate.
static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
static SDValue materializeVectorConstant(SDValue Op, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();

// Vectors containing all zeros can be matched by pxor and xorps.
Expand All @@ -8322,7 +8319,7 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
/// from a vector of source values and a vector of extraction indices.
/// The vectors might be manipulated to match the type of the permute op.
static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
SDLoc &DL, SelectionDAG &DAG,
const SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT ShuffleVT = VT;
EVT IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
Expand Down Expand Up @@ -8590,7 +8587,8 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
// TODO: Utilize pshufb and zero mask blending to support more efficient
// construction of vectors with constant-0 elements.
static SDValue
LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
LowerBUILD_VECTORAsVariablePermute(SDValue V, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue SrcVec, IndicesVec;
// Check for a match of the permute source vector and permute index elements.
Expand Down Expand Up @@ -8629,7 +8627,6 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
return SDValue();
}

SDLoc DL(V);
MVT VT = V.getSimpleValueType();
return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
}
Expand All @@ -8645,14 +8642,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {

// Generate vectors for predicate vectors.
if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget);
return LowerBUILD_VECTORvXi1(Op, dl, DAG, Subtarget);

if (VT.getVectorElementType() == MVT::bf16 &&
(Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16()))
return LowerBUILD_VECTORvXbf16(Op, DAG, Subtarget);

if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
return VectorConstant;
if (SDValue VectorCst = materializeVectorConstant(Op, dl, DAG, Subtarget))
return VectorCst;

unsigned EVTBits = EltVT.getSizeInBits();
APInt UndefMask = APInt::getZero(NumElems);
Expand Down Expand Up @@ -8747,13 +8744,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
}

if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, dl, Subtarget, DAG))
return AddSub;
if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
if (SDValue HorizontalOp = LowerToHorizontalOp(BV, dl, Subtarget, DAG))
return HorizontalOp;
if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG))
if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, dl, Subtarget, DAG))
return Broadcast;
if (SDValue BitOp = lowerBuildVectorToBitOp(BV, Subtarget, DAG))
if (SDValue BitOp = lowerBuildVectorToBitOp(BV, dl, Subtarget, DAG))
return BitOp;

unsigned NumZero = ZeroMask.popcount();
Expand Down Expand Up @@ -8901,8 +8898,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (IsAllConstants)
return SDValue();

if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, DAG, Subtarget))
return V;
if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, dl, DAG, Subtarget))
return V;

// See if we can use a vector load to get all of the elements.
{
Expand Down

0 comments on commit 7d4733a

Please sign in to comment.