Skip to content

Commit

Permalink
[VectorUtils][X86] De-templatize scaleShuffleMask and 2 X86 shuffle m…
Browse files Browse the repository at this point in the history
…ask helpers and move their implementation to cpp files

Summary: These were templated due to SelectionDAG using int masks for shuffles and IR using unsigned masks for shuffles. But now that D72467 has landed we have an int mask version of IRBuilder::CreateShuffleVector. So just use int instead of a template

Reviewers: spatel, efriedma, RKSimon

Reviewed By: efriedma

Subscribers: hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D77183
  • Loading branch information
topperc committed Apr 1, 2020
1 parent 15f34ff commit f92563f
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 74 deletions.
18 changes: 2 additions & 16 deletions llvm/include/llvm/Analysis/VectorUtils.h
Expand Up @@ -339,22 +339,8 @@ bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
///
/// This is the reverse process of "canWidenShuffleElements", but can always
/// succeed.
template <typename T>
void scaleShuffleMask(size_t Scale, ArrayRef<T> Mask,
SmallVectorImpl<T> &ScaledMask) {
assert(Scale > 0 && "Unexpected scaling factor");

// Fast-path: if no scaling, then it is just a copy.
if (Scale == 1) {
ScaledMask.assign(Mask.begin(), Mask.end());
return;
}

ScaledMask.clear();
for (int MaskElt : Mask)
for (int ScaleElt = 0; ScaleElt != (int)Scale; ++ScaleElt)
ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + ScaleElt);
}
void scaleShuffleMask(size_t Scale, ArrayRef<int> Mask,
SmallVectorImpl<int> &ScaledMask);

/// Compute a map of integer instructions to their minimum legal type
/// size.
Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/Analysis/VectorUtils.cpp
Expand Up @@ -397,6 +397,22 @@ bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
return false;
}

void llvm::scaleShuffleMask(size_t Scale, ArrayRef<int> Mask,
SmallVectorImpl<int> &ScaledMask) {
assert(Scale > 0 && "Unexpected scaling factor");

// Fast-path: if no scaling, then it is just a copy.
if (Scale == 1) {
ScaledMask.assign(Mask.begin(), Mask.end());
return;
}

ScaledMask.clear();
for (int MaskElt : Mask)
for (int ScaleElt = 0; ScaleElt != (int)Scale; ++ScaleElt)
ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + ScaleElt);
}

MapVector<Instruction *, uint64_t>
llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
const TargetTransformInfo *TTI) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -19815,8 +19815,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
SmallVector<int, 8> InnerMask;
SmallVector<int, 8> OuterMask;
scaleShuffleMask<int>(InnerScale, InnerSVN->getMask(), InnerMask);
scaleShuffleMask<int>(OuterScale, SVN->getMask(), OuterMask);
scaleShuffleMask(InnerScale, InnerSVN->getMask(), InnerMask);
scaleShuffleMask(OuterScale, SVN->getMask(), OuterMask);

// Merge the shuffle masks.
SmallVector<int, 8> NewMask;
Expand Down
49 changes: 39 additions & 10 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -6133,6 +6133,35 @@ static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
return SDValue();
}

void llvm::createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
bool Lo, bool Unary) {
assert(Mask.empty() && "Expected an empty shuffle mask vector");
int NumElts = VT.getVectorNumElements();
int NumEltsInLane = 128 / VT.getScalarSizeInBits();
for (int i = 0; i < NumElts; ++i) {
unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
int Pos = (i % NumEltsInLane) / 2 + LaneStart;
Pos += (Unary ? 0 : NumElts * (i % 2));
Pos += (Lo ? 0 : NumEltsInLane / 2);
Mask.push_back(Pos);
}
}

/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
/// imposed by AVX and specific to the unary pattern. Example:
/// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
/// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
bool Lo) {
assert(Mask.empty() && "Expected an empty shuffle mask vector");
int NumElts = VT.getVectorNumElements();
for (int i = 0; i < NumElts; ++i) {
int Pos = i / 2;
Pos += (Lo ? 0 : NumElts / 2);
Mask.push_back(Pos);
}
}

/// Returns a vector_shuffle node for an unpackl operation.
static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
SDValue V1, SDValue V2) {
Expand Down Expand Up @@ -7320,8 +7349,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,

size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
SmallVector<int, 64> Mask0, Mask1;
scaleShuffleMask<int>(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
scaleShuffleMask<int>(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
scaleShuffleMask(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
scaleShuffleMask(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
for (size_t i = 0; i != MaskSize; ++i) {
if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef)
Mask.push_back(SM_SentinelUndef);
Expand Down Expand Up @@ -7379,7 +7408,7 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
if ((NumSubElts % SubMask.size()) == 0) {
int Scale = NumSubElts / SubMask.size();
SmallVector<int,64> ScaledSubMask;
scaleShuffleMask<int>(Scale, SubMask, ScaledSubMask);
scaleShuffleMask(Scale, SubMask, ScaledSubMask);
SubMask = ScaledSubMask;
} else {
int Scale = SubMask.size() / NumSubElts;
Expand Down Expand Up @@ -16279,7 +16308,7 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
SmallVector<int, 2> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) {
SmallVector<int, 4> PSHUFDMask;
scaleShuffleMask<int>(2, RepeatedMask, PSHUFDMask);
scaleShuffleMask(2, RepeatedMask, PSHUFDMask);
return DAG.getBitcast(
MVT::v4i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32,
Expand Down Expand Up @@ -16928,7 +16957,7 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
SmallVector<int, 2> Widened256Mask;
if (canWidenShuffleElements(Widened128Mask, Widened256Mask)) {
Widened128Mask.clear();
llvm::scaleShuffleMask<int>(2, Widened256Mask, Widened128Mask);
llvm::scaleShuffleMask(2, Widened256Mask, Widened128Mask);
}

// Try to lower to vshuf64x2/vshuf32x4.
Expand Down Expand Up @@ -17079,7 +17108,7 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
SmallVector<int, 2> Repeated128Mask;
if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) {
SmallVector<int, 4> PSHUFDMask;
scaleShuffleMask<int>(2, Repeated128Mask, PSHUFDMask);
scaleShuffleMask(2, Repeated128Mask, PSHUFDMask);
return DAG.getBitcast(
MVT::v8i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32,
Expand Down Expand Up @@ -20166,7 +20195,7 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
// Scale shuffle mask to avoid bitcasts and help ComputeNumSignBits.
SmallVector<int, 64> Mask;
int Scale = 64 / OutVT.getScalarSizeInBits();
scaleShuffleMask<int>(Scale, ArrayRef<int>({ 0, 2, 1, 3 }), Mask);
scaleShuffleMask(Scale, { 0, 2, 1, 3 }, Mask);
Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask);

if (DstVT.is256BitVector())
Expand Down Expand Up @@ -33612,7 +33641,7 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Narrow the repeated mask to create 32-bit element permutes.
SmallVector<int, 4> WordMask = RepeatedMask;
if (MaskScalarSizeInBits == 64)
scaleShuffleMask<int>(2, RepeatedMask, WordMask);
scaleShuffleMask(2, RepeatedMask, WordMask);

Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI);
ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
Expand Down Expand Up @@ -34065,7 +34094,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (BaseMaskEltSizeInBits > 64) {
assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
int MaskScale = BaseMaskEltSizeInBits / 64;
scaleShuffleMask<int>(MaskScale, BaseMask, Mask);
scaleShuffleMask(MaskScale, BaseMask, Mask);
} else {
Mask = SmallVector<int, 64>(BaseMask.begin(), BaseMask.end());
}
Expand Down Expand Up @@ -38189,7 +38218,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
if ((NumSrcElts % Mask.size()) == 0) {
SmallVector<int, 16> ScaledMask;
int Scale = NumSrcElts / Mask.size();
scaleShuffleMask<int>(Scale, Mask, ScaledMask);
scaleShuffleMask(Scale, Mask, ScaledMask);
Mask = std::move(ScaledMask);
} else if ((Mask.size() % NumSrcElts) == 0) {
// Simplify Mask based on demanded element.
Expand Down
27 changes: 3 additions & 24 deletions llvm/lib/Target/X86/X86ISelLowering.h
Expand Up @@ -1564,35 +1564,14 @@ namespace llvm {
};

/// Generate unpacklo/unpackhi shuffle mask.
template <typename T = int>
void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
bool Unary) {
assert(Mask.empty() && "Expected an empty shuffle mask vector");
int NumElts = VT.getVectorNumElements();
int NumEltsInLane = 128 / VT.getScalarSizeInBits();
for (int i = 0; i < NumElts; ++i) {
unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
int Pos = (i % NumEltsInLane) / 2 + LaneStart;
Pos += (Unary ? 0 : NumElts * (i % 2));
Pos += (Lo ? 0 : NumEltsInLane / 2);
Mask.push_back(Pos);
}
}
void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
bool Unary);

/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
/// imposed by AVX and specific to the unary pattern. Example:
/// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
/// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
template <typename T = int>
void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo) {
assert(Mask.empty() && "Expected an empty shuffle mask vector");
int NumElts = VT.getVectorNumElements();
for (int i = 0; i < NumElts; ++i) {
int Pos = i / 2;
Pos += (Lo ? 0 : NumElts / 2);
Mask.push_back(Pos);
}
}
void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);

} // end namespace llvm

Expand Down
36 changes: 18 additions & 18 deletions llvm/lib/Target/X86/X86InterleavedAccess.cpp
Expand Up @@ -325,19 +325,19 @@ void X86InterleavedAccessGroup::interleave8bitStride4VF8(

MVT VT = MVT::v8i16;
TransposedMatrix.resize(2);
SmallVector<uint32_t, 16> MaskLow;
SmallVector<uint32_t, 32> MaskLowTemp1, MaskLowWord;
SmallVector<uint32_t, 32> MaskHighTemp1, MaskHighWord;
SmallVector<int, 16> MaskLow;
SmallVector<int, 32> MaskLowTemp1, MaskLowWord;
SmallVector<int, 32> MaskHighTemp1, MaskHighWord;

for (unsigned i = 0; i < 8; ++i) {
MaskLow.push_back(i);
MaskLow.push_back(i + 8);
}

createUnpackShuffleMask<uint32_t>(VT, MaskLowTemp1, true, false);
createUnpackShuffleMask<uint32_t>(VT, MaskHighTemp1, false, false);
scaleShuffleMask<uint32_t>(2, MaskHighTemp1, MaskHighWord);
scaleShuffleMask<uint32_t>(2, MaskLowTemp1, MaskLowWord);
createUnpackShuffleMask(VT, MaskLowTemp1, true, false);
createUnpackShuffleMask(VT, MaskHighTemp1, false, false);
scaleShuffleMask(2, MaskHighTemp1, MaskHighWord);
scaleShuffleMask(2, MaskLowTemp1, MaskLowWord);
// IntrVec1Low = c0 m0 c1 m1 c2 m2 c3 m3 c4 m4 c5 m5 c6 m6 c7 m7
// IntrVec2Low = y0 k0 y1 k1 y2 k2 y3 k3 y4 k4 y5 k5 y6 k6 y7 k7
Value *IntrVec1Low =
Expand Down Expand Up @@ -367,25 +367,25 @@ void X86InterleavedAccessGroup::interleave8bitStride4(
MVT HalfVT = scaleVectorType(VT);

TransposedMatrix.resize(4);
SmallVector<uint32_t, 32> MaskHigh;
SmallVector<uint32_t, 32> MaskLow;
SmallVector<uint32_t, 32> LowHighMask[2];
SmallVector<uint32_t, 32> MaskHighTemp;
SmallVector<uint32_t, 32> MaskLowTemp;
SmallVector<int, 32> MaskHigh;
SmallVector<int, 32> MaskLow;
SmallVector<int, 32> LowHighMask[2];
SmallVector<int, 32> MaskHighTemp;
SmallVector<int, 32> MaskLowTemp;

// MaskHighTemp and MaskLowTemp built in the vpunpckhbw and vpunpcklbw X86
// shuffle pattern.

createUnpackShuffleMask<uint32_t>(VT, MaskLow, true, false);
createUnpackShuffleMask<uint32_t>(VT, MaskHigh, false, false);
createUnpackShuffleMask(VT, MaskLow, true, false);
createUnpackShuffleMask(VT, MaskHigh, false, false);

// MaskHighTemp1 and MaskLowTemp1 built in the vpunpckhdw and vpunpckldw X86
// shuffle pattern.

createUnpackShuffleMask<uint32_t>(HalfVT, MaskLowTemp, true, false);
createUnpackShuffleMask<uint32_t>(HalfVT, MaskHighTemp, false, false);
scaleShuffleMask<uint32_t>(2, MaskLowTemp, LowHighMask[0]);
scaleShuffleMask<uint32_t>(2, MaskHighTemp, LowHighMask[1]);
createUnpackShuffleMask(HalfVT, MaskLowTemp, true, false);
createUnpackShuffleMask(HalfVT, MaskHighTemp, false, false);
scaleShuffleMask(2, MaskLowTemp, LowHighMask[0]);
scaleShuffleMask(2, MaskHighTemp, LowHighMask[1]);

// IntrVec1Low = c0 m0 c1 m1 ... c7 m7 | c16 m16 c17 m17 ... c23 m23
// IntrVec1High = c8 m8 c9 m9 ... c15 m15 | c24 m24 c25 m25 ... c31 m31
Expand Down
8 changes: 4 additions & 4 deletions llvm/unittests/Analysis/VectorUtilsTest.cpp
Expand Up @@ -100,10 +100,10 @@ TEST_F(BasicTest, isSplat) {

TEST_F(BasicTest, scaleShuffleMask) {
SmallVector<int, 16> ScaledMask;
scaleShuffleMask<int>(1, {3,2,0,-2}, ScaledMask);
EXPECT_EQ(makeArrayRef<int>(ScaledMask), makeArrayRef<int>({3,2,0,-2}));
scaleShuffleMask<int>(4, {3,2,0,-1}, ScaledMask);
EXPECT_EQ(makeArrayRef<int>(ScaledMask), makeArrayRef<int>({12,13,14,15,8,9,10,11,0,1,2,3,-1,-1,-1,-1}));
scaleShuffleMask(1, {3,2,0,-2}, ScaledMask);
EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({3,2,0,-2}));
scaleShuffleMask(4, {3,2,0,-1}, ScaledMask);
EXPECT_EQ(makeArrayRef(ScaledMask), makeArrayRef({12,13,14,15,8,9,10,11,0,1,2,3,-1,-1,-1,-1}));
}

TEST_F(BasicTest, getSplatIndex) {
Expand Down

0 comments on commit f92563f

Please sign in to comment.