95 changes: 50 additions & 45 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4070,10 +4070,9 @@ static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG,
}

/// Break an operation into 2 half sized ops and then concatenate the results.
static SDValue splitVectorOp(SDValue Op, SelectionDAG &DAG) {
static SDValue splitVectorOp(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) {
unsigned NumOps = Op.getNumOperands();
EVT VT = Op.getValueType();
SDLoc dl(Op);

// Extract the LHS Lo/Hi vectors
SmallVector<SDValue> LoOps(NumOps, SDValue());
Expand All @@ -4096,7 +4095,8 @@ static SDValue splitVectorOp(SDValue Op, SelectionDAG &DAG) {

/// Break an unary integer operation into 2 half sized ops and then
/// concatenate the result back.
static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) {
// Make sure we only try to split 256/512-bit types to avoid creating
// narrow vectors.
EVT VT = Op.getValueType();
Expand All @@ -4107,19 +4107,20 @@ static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
assert(Op.getOperand(0).getValueType().getVectorNumElements() ==
VT.getVectorNumElements() &&
"Unexpected VTs!");
return splitVectorOp(Op, DAG);
return splitVectorOp(Op, DAG, dl);
}

/// Break a binary integer operation into 2 half sized ops and then
/// concatenate the result back.
static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG) {
static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) {
// Assert that all the types match.
EVT VT = Op.getValueType();
(void)VT;
assert(Op.getOperand(0).getValueType() == VT &&
Op.getOperand(1).getValueType() == VT && "Unexpected VTs!");
assert((VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!");
return splitVectorOp(Op, DAG);
return splitVectorOp(Op, DAG, dl);
}

// Helper for splitting operands of an operation to legal target size and
Expand Down Expand Up @@ -20054,7 +20055,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,

if (VT == MVT::v32i16 && !Subtarget.hasBWI()) {
assert(InVT == MVT::v32i8 && "Unexpected VT!");
return splitVectorIntUnary(Op, DAG);
return splitVectorIntUnary(Op, DAG, dl);
}

if (Subtarget.hasInt256())
Expand Down Expand Up @@ -20635,7 +20636,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget.hasAVX512()) {
if (InVT == MVT::v32i16 && !Subtarget.hasBWI()) {
assert(VT == MVT::v32i8 && "Unexpected VT!");
return splitVectorIntUnary(Op, DAG);
return splitVectorIntUnary(Op, DAG, DL);
}

// word to byte only under BWI. Otherwise we have to promoted to v16i32
Expand Down Expand Up @@ -21615,7 +21616,8 @@ SDValue X86TargetLowering::LowerFP_TO_BF16(SDValue Op,

/// Depending on uarch and/or optimizing for size, we might prefer to use a
/// vector operation in place of the typical scalar operation.
static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
static SDValue lowerAddSubToHorizontalOp(SDValue Op, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// If both operands have other uses, this is probably not profitable.
SDValue LHS = Op.getOperand(0);
Expand Down Expand Up @@ -21671,7 +21673,6 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,

// Creating a 256-bit horizontal op would be wasteful, and there is no 512-bit
// equivalent, so extract the 256/512-bit source op to 128-bit if we can.
SDLoc DL(Op);
if (BitWidth == 256 || BitWidth == 512) {
unsigned LaneIdx = LExtIndex / NumEltsPerLane;
X = extract128BitVector(X, LaneIdx * NumEltsPerLane, DAG, DL);
Expand All @@ -21692,7 +21693,7 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const {
assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) &&
"Only expecting float/double");
return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
return lowerAddSubToHorizontalOp(Op, SDLoc(Op), DAG, Subtarget);
}

/// ISD::FROUND is defined to round to nearest with ties rounding away from 0.
Expand Down Expand Up @@ -24449,7 +24450,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,

if (VT == MVT::v32i16 && !Subtarget.hasBWI()) {
assert(InVT == MVT::v32i8 && "Unexpected VT!");
return splitVectorIntUnary(Op, DAG);
return splitVectorIntUnary(Op, DAG, dl);
}

if (Subtarget.hasInt256())
Expand Down Expand Up @@ -27812,7 +27813,7 @@ static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG,
// Split vector, it's Lo and Hi parts will be handled in next iteration.
if (NumElems > 16 ||
(NumElems == 16 && !Subtarget.canExtendTo512DQ()))
return splitVectorIntUnary(Op, DAG);
return splitVectorIntUnary(Op, DAG, dl);

MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
assert((NewVT.is256BitVector() || NewVT.is512BitVector()) &&
Expand Down Expand Up @@ -27922,11 +27923,11 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,

// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitVectorIntUnary(Op, DAG);
return splitVectorIntUnary(Op, DAG, DL);

// Decompose 512-bit ops into smaller 256-bit ops.
if (VT.is512BitVector() && !Subtarget.hasBWI())
return splitVectorIntUnary(Op, DAG);
return splitVectorIntUnary(Op, DAG, DL);

assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB");
return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
Expand Down Expand Up @@ -27999,16 +28000,18 @@ static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);

if (VT == MVT::i16 || VT == MVT::i32)
return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
return lowerAddSubToHorizontalOp(Op, DL, DAG, Subtarget);

if (VT == MVT::v32i16 || VT == MVT::v64i8)
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, DL);

assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, DL);
}

static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
Expand All @@ -28022,7 +28025,7 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
(VT.is256BitVector() && !Subtarget.hasInt256())) {
assert(Op.getSimpleValueType().isInteger() &&
"Only handle AVX vector integer operation");
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, DL);
}

// Avoid the generic expansion with min/max if we don't have pminu*/pmaxu*.
Expand Down Expand Up @@ -28084,10 +28087,11 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);

if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) {
// Since X86 does not have CMOV for 8-bit integer, we don't convert
// 8-bit integer abs to NEG and CMOV.
SDLoc DL(Op);
SDValue N0 = Op.getOperand(0);
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
DAG.getConstant(0, DL, VT), N0);
Expand All @@ -28098,7 +28102,6 @@ static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,

// ABS(vXi64 X) --> VPBLENDVPD(X, 0-X, X).
if ((VT == MVT::v2i64 || VT == MVT::v4i64) && Subtarget.hasSSE41()) {
SDLoc DL(Op);
SDValue Src = Op.getOperand(0);
SDValue Sub =
DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
Expand All @@ -28108,11 +28111,11 @@ static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
if (VT.is256BitVector() && !Subtarget.hasInt256()) {
assert(VT.isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return splitVectorIntUnary(Op, DAG);
return splitVectorIntUnary(Op, DAG, DL);
}

if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
return splitVectorIntUnary(Op, DAG);
return splitVectorIntUnary(Op, DAG, DL);

// Default to expand.
return SDValue();
Expand All @@ -28121,13 +28124,14 @@ static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
static SDValue LowerAVG(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);

// For AVX1 cases, split to use legal ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, DL);

if (VT == MVT::v32i16 || VT == MVT::v64i8)
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, DL);

// Default to expand.
return SDValue();
Expand All @@ -28136,13 +28140,14 @@ static SDValue LowerAVG(SDValue Op, const X86Subtarget &Subtarget,
static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);

// For AVX1 cases, split to use legal ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, DL);

if (VT == MVT::v32i16 || VT == MVT::v64i8)
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, DL);

// Default to expand.
return SDValue();
Expand Down Expand Up @@ -28299,15 +28304,15 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);

// For AVX1 cases, split to use legal ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, dl);

if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.useBWIRegs())
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, dl);

SDLoc dl(Op);
bool IsSigned = Op.getOpcode() == ISD::ABDS;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();

Expand Down Expand Up @@ -28350,10 +28355,10 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,

// Decompose 256-bit ops into 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, dl);

if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, dl);

SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
Expand Down Expand Up @@ -28576,10 +28581,10 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,

// Decompose 256-bit ops into 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, dl);

if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, dl);

if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) {
assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
Expand Down Expand Up @@ -29757,10 +29762,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,

// Decompose 256-bit shifts into 128-bit shifts.
if (VT.is256BitVector())
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, dl);

if (VT == MVT::v32i16 || VT == MVT::v64i8)
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, dl);

return SDValue();
}
Expand Down Expand Up @@ -29837,7 +29842,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
EltSizeInBits < 32)) {
// Pre-mask the amount modulo using the wider vector.
Op = DAG.getNode(Op.getOpcode(), DL, VT, Op0, Op1, AmtMod);
return splitVectorOp(Op, DAG);
return splitVectorOp(Op, DAG, DL);
}

// Attempt to fold scalar shift as unpack(y,x) << zext(splat(z))
Expand Down Expand Up @@ -29999,7 +30004,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,

// Split 256-bit integers on XOP/pre-AVX2 targets.
if (VT.is256BitVector() && (Subtarget.hasXOP() || !Subtarget.hasAVX2()))
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, DL);

// XOP has 128-bit vector variable + immediate rotates.
// +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL.
Expand Down Expand Up @@ -30035,7 +30040,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,

// Split 512-bit integers on non 512-bit BWI targets.
if (VT.is512BitVector() && !Subtarget.useBWIRegs())
return splitVectorIntBinary(Op, DAG);
return splitVectorIntBinary(Op, DAG, DL);

assert(
(VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
Expand Down Expand Up @@ -31115,11 +31120,11 @@ static SDValue LowerVectorCTPOP(SDValue Op, const SDLoc &DL,

// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitVectorIntUnary(Op, DAG);
return splitVectorIntUnary(Op, DAG, DL);

// Decompose 512-bit ops into smaller 256-bit ops.
if (VT.is512BitVector() && !Subtarget.hasBWI())
return splitVectorIntUnary(Op, DAG);
return splitVectorIntUnary(Op, DAG, DL);

// For element types greater than i8, do vXi8 pop counts and a bytesum.
if (VT.getScalarType() != MVT::i8) {
Expand Down Expand Up @@ -31243,7 +31248,7 @@ static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {

// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector())
return splitVectorIntUnary(Op, DAG);
return splitVectorIntUnary(Op, DAG, DL);

assert(VT.is128BitVector() &&
"Only 128-bit vector bitreverse lowering supported.");
Expand Down Expand Up @@ -31282,11 +31287,11 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,

// Split 512-bit ops without BWI so that we can still use the PSHUFB lowering.
if (VT.is512BitVector() && !Subtarget.hasBWI())
return splitVectorIntUnary(Op, DAG);
return splitVectorIntUnary(Op, DAG, DL);

// Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitVectorIntUnary(Op, DAG);
return splitVectorIntUnary(Op, DAG, DL);

// Lower vXi16/vXi32/vXi64 as BSWAP + vXi8 BITREVERSE.
if (VT.getScalarType() != MVT::i8) {
Expand Down Expand Up @@ -55933,7 +55938,7 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
if (isConcatenatedNot(InVecBC.getOperand(0)) ||
isConcatenatedNot(InVecBC.getOperand(1))) {
// extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1
SDValue Concat = splitVectorIntBinary(InVecBC, DAG);
SDValue Concat = splitVectorIntBinary(InVecBC, DAG, SDLoc(InVecBC));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT,
DAG.getBitcast(InVecVT, Concat), N->getOperand(1));
}
Expand Down