Skip to content

Commit

Permalink
Pass shufflevector indices as int instead of unsigned.
Browse files Browse the repository at this point in the history
No functionality change intended.
  • Loading branch information
d0k committed Apr 15, 2020
1 parent cb1ee34 commit 316b49d
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 59 deletions.
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Expand Up @@ -11747,7 +11747,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Splat the 8-bits of immediate 4 times to help the loop wrap around.
Imm = (Imm & 0xff) * 0x01010101;

uint32_t Indices[16];
int Indices[16];
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0; i != NumLaneElts; ++i) {
unsigned Index = Imm % NumLaneElts;
Expand Down
50 changes: 24 additions & 26 deletions llvm/lib/IR/AutoUpgrade.cpp
Expand Up @@ -912,7 +912,7 @@ static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
// we'll just return the zero vector.
if (Shift < 16) {
uint32_t Idxs[64];
int Idxs[64];
// 256/512-bit version is split into 2/4 16-byte lanes.
for (unsigned l = 0; l != NumElts; l += 16)
for (unsigned i = 0; i != 16; ++i) {
Expand Down Expand Up @@ -946,7 +946,7 @@ static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
// we'll just return the zero vector.
if (Shift < 16) {
uint32_t Idxs[64];
int Idxs[64];
// 256/512-bit version is split into 2/4 16-byte lanes.
for (unsigned l = 0; l != NumElts; l += 16)
for (unsigned i = 0; i != 16; ++i) {
Expand All @@ -972,7 +972,7 @@ static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
// If we have less than 8 elements, then the starting mask was an i8 and
// we need to extract down to the right number of elements.
if (NumElts < 8) {
uint32_t Indices[4];
int Indices[4];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
Mask = Builder.CreateShuffleVector(Mask, Mask,
Expand Down Expand Up @@ -1041,7 +1041,7 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
Op0 = llvm::Constant::getNullValue(Op0->getType());
}

uint32_t Indices[64];
int Indices[64];
// 256-bit palignr operates on 128-bit lanes so we need to handle that
for (unsigned l = 0; l < NumElts; l += 16) {
for (unsigned i = 0; i != 16; ++i) {
Expand Down Expand Up @@ -1352,7 +1352,7 @@ static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
}

if (NumElts < 8) {
uint32_t Indices[8];
int Indices[8];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
for (unsigned i = NumElts; i != 8; ++i)
Expand Down Expand Up @@ -1878,7 +1878,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
unsigned NumElts = CI->getType()->getScalarSizeInBits();
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
uint32_t Indices[64];
int Indices[64];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;

Expand Down Expand Up @@ -2127,8 +2127,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
unsigned NumDstElts = DstTy->getNumElements();
if (NumDstElts < SrcTy->getNumElements()) {
assert(NumDstElts == 2 && "Unexpected vector size");
uint32_t ShuffleMask[2] = { 0, 1 };
Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
}

bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
Expand Down Expand Up @@ -2159,8 +2158,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
unsigned NumDstElts = DstTy->getNumElements();
if (NumDstElts != SrcTy->getNumElements()) {
assert(NumDstElts == 4 && "Unexpected vector size");
uint32_t ShuffleMask[4] = {0, 1, 2, 3};
Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
}
Rep = Builder.CreateBitCast(
Rep, VectorType::get(Type::getHalfTy(C), NumDstElts));
Expand Down Expand Up @@ -2310,7 +2308,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
unsigned NumDstElts = DstTy->getNumElements();

// Extract a subvector of the first NumDstElts lanes and sign/zero extend.
SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
SmallVector<int, 8> ShuffleMask(NumDstElts);
for (unsigned i = 0; i != NumDstElts; ++i)
ShuffleMask[i] = i;

Expand Down Expand Up @@ -2356,7 +2354,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
unsigned ControlBitsMask = NumLanes - 1;
unsigned NumControlBits = NumLanes / 2;
SmallVector<uint32_t, 8> ShuffleMask(0);
SmallVector<int, 8> ShuffleMask(0);

for (unsigned l = 0; l != NumLanes; ++l) {
unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
Expand All @@ -2376,7 +2374,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
cast<VectorType>(CI->getArgOperand(0)->getType())->getNumElements();
unsigned NumDstElts = cast<VectorType>(CI->getType())->getNumElements();

SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
SmallVector<int, 8> ShuffleMask(NumDstElts);
for (unsigned i = 0; i != NumDstElts; ++i)
ShuffleMask[i] = i % NumSrcElts;

Expand Down Expand Up @@ -2466,7 +2464,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
VectorType *VecTy = cast<VectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();

SmallVector<uint32_t, 16> Idxs(NumElts);
SmallVector<int, 16> Idxs(NumElts);
for (unsigned i = 0; i != NumElts; ++i)
Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;

Expand All @@ -2486,7 +2484,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {

// Extend the second operand into a vector the size of the destination.
Value *UndefV = UndefValue::get(Op1->getType());
SmallVector<uint32_t, 8> Idxs(DstNumElts);
SmallVector<int, 8> Idxs(DstNumElts);
for (unsigned i = 0; i != SrcNumElts; ++i)
Idxs[i] = i;
for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
Expand Down Expand Up @@ -2529,7 +2527,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Imm = Imm % Scale;

// Get indexes for the subvector of the input vector.
SmallVector<uint32_t, 8> Idxs(DstNumElts);
SmallVector<int, 8> Idxs(DstNumElts);
for (unsigned i = 0; i != DstNumElts; ++i) {
Idxs[i] = i + (Imm * DstNumElts);
}
Expand All @@ -2548,7 +2546,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
VectorType *VecTy = cast<VectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();

SmallVector<uint32_t, 8> Idxs(NumElts);
SmallVector<int, 8> Idxs(NumElts);
for (unsigned i = 0; i != NumElts; ++i)
Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);

Expand All @@ -2571,7 +2569,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {

unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
unsigned HalfSize = NumElts / 2;
SmallVector<uint32_t, 8> ShuffleMask(NumElts);
SmallVector<int, 8> ShuffleMask(NumElts);

// Determine which operand(s) are actually in use for this instruction.
Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
Expand Down Expand Up @@ -2605,7 +2603,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
unsigned IdxMask = ((1 << IdxSize) - 1);

SmallVector<uint32_t, 8> Idxs(NumElts);
SmallVector<int, 8> Idxs(NumElts);
// Lookup the bits for this element, wrapping around the immediate every
// 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
// to offset by the first index of each group.
Expand All @@ -2623,7 +2621,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();

SmallVector<uint32_t, 16> Idxs(NumElts);
SmallVector<int, 16> Idxs(NumElts);
for (unsigned l = 0; l != NumElts; l += 8) {
for (unsigned i = 0; i != 4; ++i)
Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
Expand All @@ -2642,7 +2640,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();

SmallVector<uint32_t, 16> Idxs(NumElts);
SmallVector<int, 16> Idxs(NumElts);
for (unsigned l = 0; l != NumElts; l += 8) {
for (unsigned i = 0; i != 4; ++i)
Idxs[i + l] = i + l;
Expand All @@ -2664,7 +2662,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
unsigned HalfLaneElts = NumLaneElts / 2;

SmallVector<uint32_t, 16> Idxs(NumElts);
SmallVector<int, 16> Idxs(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
// Base index is the starting element of the lane.
Idxs[i] = i - (i % NumLaneElts);
Expand All @@ -2691,7 +2689,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
if (Name.startswith("avx512.mask.movshdup."))
Offset = 1;

SmallVector<uint32_t, 16> Idxs(NumElts);
SmallVector<int, 16> Idxs(NumElts);
for (unsigned l = 0; l != NumElts; l += NumLaneElts)
for (unsigned i = 0; i != NumLaneElts; i += 2) {
Idxs[i + l + 0] = i + l + Offset;
Expand All @@ -2709,7 +2707,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
int NumElts = cast<VectorType>(CI->getType())->getNumElements();
int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();

SmallVector<uint32_t, 64> Idxs(NumElts);
SmallVector<int, 64> Idxs(NumElts);
for (int l = 0; l != NumElts; l += NumLaneElts)
for (int i = 0; i != NumLaneElts; ++i)
Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
Expand All @@ -2725,7 +2723,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
int NumElts = cast<VectorType>(CI->getType())->getNumElements();
int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();

SmallVector<uint32_t, 64> Idxs(NumElts);
SmallVector<int, 64> Idxs(NumElts);
for (int l = 0; l != NumElts; l += NumLaneElts)
for (int i = 0; i != NumLaneElts; ++i)
Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
Expand Down Expand Up @@ -3304,7 +3302,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
if (IsSubAdd)
std::swap(Even, Odd);

SmallVector<uint32_t, 32> Idxs(NumElts);
SmallVector<int, 32> Idxs(NumElts);
for (int i = 0; i != NumElts; ++i)
Idxs[i] = i + (i % 2) * NumElts;

Expand Down
64 changes: 32 additions & 32 deletions llvm/lib/Target/X86/X86InterleavedAccess.cpp
Expand Up @@ -229,11 +229,11 @@ static MVT scaleVectorType(MVT VT) {
VT.getVectorNumElements() / 2);
}

static uint32_t Concat[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 };
static constexpr int Concat[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};

// genShuffleBland - Creates shuffle according to two vectors.This function is
// only works on instructions with lane inside 256 registers. According to
Expand All @@ -251,9 +251,9 @@ static uint32_t Concat[] = {
// By computing the shuffle on a sequence of 16 elements(one lane) and add the
// correct offset. We are creating a vpsuffed + blend sequence between two
// shuffles.
static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask,
SmallVectorImpl<uint32_t> &Out, int LowOffset,
int HighOffset) {
static void genShuffleBland(MVT VT, ArrayRef<int> Mask,
SmallVectorImpl<int> &Out, int LowOffset,
int HighOffset) {
assert(VT.getSizeInBits() >= 256 &&
"This function doesn't accept width smaller then 256");
unsigned NumOfElm = VT.getVectorNumElements();
Expand Down Expand Up @@ -282,9 +282,9 @@ static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask,
// Invec[2] - |2|5|8|11| TransposedMatrix[2] - |8|9|10|11|

static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix,
ArrayRef<Value *> Vec, ArrayRef<uint32_t> VPShuf,
unsigned VecElems, unsigned Stride,
IRBuilder<> &Builder) {
ArrayRef<Value *> Vec, ArrayRef<int> VPShuf,
unsigned VecElems, unsigned Stride,
IRBuilder<> &Builder) {

if (VecElems == 16) {
for (unsigned i = 0; i < Stride; i++)
Expand All @@ -293,7 +293,7 @@ static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix,
return;
}

SmallVector<uint32_t, 32> OptimizeShuf;
SmallVector<int, 32> OptimizeShuf;
Value *Temp[8];

for (unsigned i = 0; i < (VecElems / 16) * Stride; i += 2) {
Expand Down Expand Up @@ -433,7 +433,7 @@ void X86InterleavedAccessGroup::interleave8bitStride4(
// For example shuffle pattern for VF 16 register size 256 -> lanes = 2
// {<[0|3|6|1|4|7|2|5]-[8|11|14|9|12|15|10|13]>}
static void createShuffleStride(MVT VT, int Stride,
SmallVectorImpl<uint32_t> &Mask) {
SmallVectorImpl<int> &Mask) {
int VectorSize = VT.getSizeInBits();
int VF = VT.getVectorNumElements();
int LaneCount = std::max(VectorSize / 128, 1);
Expand All @@ -446,7 +446,7 @@ static void createShuffleStride(MVT VT, int Stride,
// inside mask a shuffleMask. A mask contains exactly 3 groups, where
// each group is a monotonically increasing sequence with stride 3.
// For example shuffleMask {0,3,6,1,4,7,2,5} => {3,3,2}
static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
static void setGroupSize(MVT VT, SmallVectorImpl<int> &SizeInfo) {
int VectorSize = VT.getSizeInBits();
int VF = VT.getVectorNumElements() / std::max(VectorSize / 128, 1);
for (int i = 0, FirstGroupElement = 0; i < 3; i++) {
Expand All @@ -470,7 +470,7 @@ static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
// direction of the alignment. (false - align to the "right" side while true -
// align to the "left" side)
static void DecodePALIGNRMask(MVT VT, unsigned Imm,
SmallVectorImpl<uint32_t> &ShuffleMask,
SmallVectorImpl<int> &ShuffleMask,
bool AlignDirection = true, bool Unary = false) {
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1);
Expand Down Expand Up @@ -547,11 +547,11 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3(
// Matrix[2]= b5 c5 a6 b6 c6 a7 b7 c7

TransposedMatrix.resize(3);
SmallVector<uint32_t, 32> VPShuf;
SmallVector<uint32_t, 32> VPAlign[2];
SmallVector<uint32_t, 32> VPAlign2;
SmallVector<uint32_t, 32> VPAlign3;
SmallVector<uint32_t, 3> GroupSize;
SmallVector<int, 32> VPShuf;
SmallVector<int, 32> VPAlign[2];
SmallVector<int, 32> VPAlign2;
SmallVector<int, 32> VPAlign3;
SmallVector<int, 3> GroupSize;
Value *Vec[6], *TempVector[3];

MVT VT = MVT::getVT(Shuffles[0]->getType());
Expand Down Expand Up @@ -605,8 +605,8 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3(
// group2Shuffle reorder the shuffle stride back into continuous order.
// For example For VF16 with Mask1 = {0,3,6,9,12,15,2,5,8,11,14,1,4,7,10,13} =>
// MaskResult = {0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5}.
static void group2Shuffle(MVT VT, SmallVectorImpl<uint32_t> &Mask,
SmallVectorImpl<uint32_t> &Output) {
static void group2Shuffle(MVT VT, SmallVectorImpl<int> &Mask,
SmallVectorImpl<int> &Output) {
int IndexGroup[3] = {0, 0, 0};
int Index = 0;
int VectorWidth = VT.getSizeInBits();
Expand All @@ -633,11 +633,11 @@ void X86InterleavedAccessGroup::interleave8bitStride3(
// Matrix[2]= c0 c1 c2 c3 c3 a7 b7 c7

TransposedMatrix.resize(3);
SmallVector<uint32_t, 3> GroupSize;
SmallVector<uint32_t, 32> VPShuf;
SmallVector<uint32_t, 32> VPAlign[3];
SmallVector<uint32_t, 32> VPAlign2;
SmallVector<uint32_t, 32> VPAlign3;
SmallVector<int, 3> GroupSize;
SmallVector<int, 32> VPShuf;
SmallVector<int, 32> VPAlign[3];
SmallVector<int, 32> VPAlign2;
SmallVector<int, 32> VPAlign3;

Value *Vec[3], *TempVector[3];
MVT VT = MVT::getVectorVT(MVT::i8, VecElems);
Expand Down Expand Up @@ -692,25 +692,25 @@ void X86InterleavedAccessGroup::transpose_4x4(
TransposedMatrix.resize(4);

// dst = src1[0,1],src2[0,1]
uint32_t IntMask1[] = {0, 1, 4, 5};
ArrayRef<uint32_t> Mask = makeArrayRef(IntMask1, 4);
static constexpr int IntMask1[] = {0, 1, 4, 5};
ArrayRef<int> Mask = makeArrayRef(IntMask1, 4);
Value *IntrVec1 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
Value *IntrVec2 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);

// dst = src1[2,3],src2[2,3]
uint32_t IntMask2[] = {2, 3, 6, 7};
static constexpr int IntMask2[] = {2, 3, 6, 7};
Mask = makeArrayRef(IntMask2, 4);
Value *IntrVec3 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
Value *IntrVec4 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);

// dst = src1[0],src2[0],src1[2],src2[2]
uint32_t IntMask3[] = {0, 4, 2, 6};
static constexpr int IntMask3[] = {0, 4, 2, 6};
Mask = makeArrayRef(IntMask3, 4);
TransposedMatrix[0] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
TransposedMatrix[2] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);

// dst = src1[1],src2[1],src1[3],src2[3]
uint32_t IntMask4[] = {1, 5, 3, 7};
static constexpr int IntMask4[] = {1, 5, 3, 7};
Mask = makeArrayRef(IntMask4, 4);
TransposedMatrix[1] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
TransposedMatrix[3] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);
Expand Down

0 comments on commit 316b49d

Please sign in to comment.