Skip to content

Commit

Permalink
[AArch64] Extending lowering of 'zext <Y x i8> %x to <Y x i8X>' to us…
Browse files Browse the repository at this point in the history
…e tbl instructions

Adding support for ZExt lowering for destination types beyond the existing support for (8|16) x i32

Patch for lowering zext instructions to 'tbl' for (8|16)xi8 -> (8|16)xi32 conversions in https://reviews.llvm.org/D120571 is extended to support zext to 'tbl' lowering for Y x i8 to Y x i8X where X > 2 and X < 8, that is, any number of vector elements & any destination element type whose size is a multiple of 8 and lies between 16 & 64 is allowed for this transformation.

Related microbenchmarks are in https://reviews.llvm.org/D136274 & https://reviews.llvm.org/D138059

Differential Revision: https://reviews.llvm.org/D136722
  • Loading branch information
nilanjana87 committed Dec 9, 2022
1 parent af42d80 commit 955c0f1
Show file tree
Hide file tree
Showing 2 changed files with 906 additions and 523 deletions.
56 changes: 34 additions & 22 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -13845,19 +13845,32 @@ bool AArch64TargetLowering::shouldSinkOperands(

static void createTblShuffleForZExt(ZExtInst *ZExt, bool IsLittleEndian) {
Value *Op = ZExt->getOperand(0);
auto *SrcTy = dyn_cast<FixedVectorType>(Op->getType());
auto *DstTy = dyn_cast<FixedVectorType>(ZExt->getType());
auto *SrcTy = cast<FixedVectorType>(Op->getType());
auto *DstTy = cast<FixedVectorType>(ZExt->getType());
auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
assert(DstWidth % SrcWidth == 0 &&
"TBL lowering is not supported for a ZExt instruction with this "
"source & destination element type.");
unsigned ZExtFactor = DstWidth / SrcWidth;
unsigned NumElts = SrcTy->getNumElements();
IRBuilder<> Builder(ZExt);
SmallVector<int> Mask(4 * NumElts, NumElts);
// Create a mask that selects <0,0,0,Op[i]> for each lane of vector of i32 to
// replace the original ZExt. This can later be lowered to a set of tbl
// instructions.
for (unsigned i = 0; i < NumElts; i++) {
if (IsLittleEndian)
Mask[i * 4] = i;
else
Mask[i * 4 + 3] = i;
SmallVector<int> Mask;
// Create a mask that selects <0,...,Op[i]> for each lane of the destination
// vector to replace the original ZExt. This can later be lowered to a set of
// tbl instructions.
for (unsigned i = 0; i < NumElts * ZExtFactor; i++) {
if (IsLittleEndian) {
if (i % ZExtFactor == 0)
Mask.push_back(i / ZExtFactor);
else
Mask.push_back(NumElts);
} else {
if ((i + 1) % ZExtFactor == 0)
Mask.push_back((i - ZExtFactor + 1) / ZExtFactor);
else
Mask.push_back(NumElts);
}
}

auto *FirstEltZero = Builder.CreateInsertElement(
Expand Down Expand Up @@ -13922,21 +13935,20 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(Instruction *I,
if (!SrcTy || !DstTy)
return false;

// Convert 'zext <(8|16) x i8> %x to <(8|16) x i32>' to a shuffle that can be
// lowered to either 2 or 4 tbl instructions to insert the original i8
// elements into i32 lanes.
// Convert 'zext <Y x i8> %x to <Y x i8X>' to a shuffle that can be
// lowered to tbl instructions to insert the original i8 elements
// into i8x lanes. This is enabled for cases where it is beneficial.
auto *ZExt = dyn_cast<ZExtInst>(I);
if (ZExt && (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
SrcTy->getElementType()->isIntegerTy(8) &&
DstTy->getElementType()->isIntegerTy(32)) {
createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
return true;
if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
if (DstWidth % 8 == 0 && DstWidth > 16 && DstWidth < 64) {
createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
return true;
}
}

auto *UIToFP = dyn_cast<UIToFPInst>(I);
if (UIToFP &&
(SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
SrcTy->getElementType()->isIntegerTy(8) &&
if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
DstTy->getElementType()->isFloatTy()) {
IRBuilder<> Builder(I);
auto *ZExt = cast<ZExtInst>(
Expand Down

0 comments on commit 955c0f1

Please sign in to comment.