Skip to content

Commit

Permalink
Expand masked mem intrinsics correctly wrt big-endian
Browse files Browse the repository at this point in the history
Need to take endianness into account when doing vector to scalar casts
such as %bc = bitcast <8 x i1> %v to i8
Companion commit for https://reviews.llvm.org/D94867
Upload in response to
https://lists.llvm.org/pipermail/llvm-dev/2021-January/147862.html
Attempting to document the actual memory layout rules for vectors in
https://reviews.llvm.org/D94964

Differential Revision: https://reviews.llvm.org/D94765
  • Loading branch information
markuslavin authored and davemgreen committed Feb 11, 2021
1 parent 1db7b9c commit 9498315
Show file tree
Hide file tree
Showing 6 changed files with 505 additions and 393 deletions.
58 changes: 35 additions & 23 deletions llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
Expand Up @@ -105,6 +105,11 @@ static bool isConstantIntVector(Value *Mask) {
return true;
}

static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
unsigned Idx) {
return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx;
}

// Translate a masked load intrinsic like
// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
// <16 x i1> %mask, <16 x i32> %passthru)
Expand Down Expand Up @@ -137,8 +142,8 @@ static bool isConstantIntVector(Value *Mask) {
// %10 = extractelement <16 x i1> %mask, i32 2
// br i1 %10, label %cond.load4, label %else5
//
static void scalarizeMaskedLoad(CallInst *CI, DomTreeUpdater *DTU,
bool &ModifiedDT) {
static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI,
DomTreeUpdater *DTU, bool &ModifiedDT) {
Value *Ptr = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
Expand Down Expand Up @@ -207,7 +212,8 @@ static void scalarizeMaskedLoad(CallInst *CI, DomTreeUpdater *DTU,
//
Value *Predicate;
if (VectorWidth != 1) {
Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
Value *Mask = Builder.getInt(APInt::getOneBitSet(
VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
Builder.getIntN(VectorWidth, 0));
} else {
Expand Down Expand Up @@ -278,8 +284,8 @@ static void scalarizeMaskedLoad(CallInst *CI, DomTreeUpdater *DTU,
// store i32 %6, i32* %7
// br label %else2
// . . .
static void scalarizeMaskedStore(CallInst *CI, DomTreeUpdater *DTU,
bool &ModifiedDT) {
static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI,
DomTreeUpdater *DTU, bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptr = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
Expand Down Expand Up @@ -340,7 +346,8 @@ static void scalarizeMaskedStore(CallInst *CI, DomTreeUpdater *DTU,
//
Value *Predicate;
if (VectorWidth != 1) {
Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
Value *Mask = Builder.getInt(APInt::getOneBitSet(
VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
Builder.getIntN(VectorWidth, 0));
} else {
Expand Down Expand Up @@ -405,8 +412,8 @@ static void scalarizeMaskedStore(CallInst *CI, DomTreeUpdater *DTU,
// . . .
// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
// ret <16 x i32> %Result
static void scalarizeMaskedGather(CallInst *CI, DomTreeUpdater *DTU,
bool &ModifiedDT) {
static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI,
DomTreeUpdater *DTU, bool &ModifiedDT) {
Value *Ptrs = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
Expand Down Expand Up @@ -461,7 +468,8 @@ static void scalarizeMaskedGather(CallInst *CI, DomTreeUpdater *DTU,

Value *Predicate;
if (VectorWidth != 1) {
Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
Value *Mask = Builder.getInt(APInt::getOneBitSet(
VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
Builder.getIntN(VectorWidth, 0));
} else {
Expand Down Expand Up @@ -534,8 +542,8 @@ static void scalarizeMaskedGather(CallInst *CI, DomTreeUpdater *DTU,
// store i32 %Elt1, i32* %Ptr1, align 4
// br label %else2
// . . .
static void scalarizeMaskedScatter(CallInst *CI, DomTreeUpdater *DTU,
bool &ModifiedDT) {
static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI,
DomTreeUpdater *DTU, bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptrs = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
Expand Down Expand Up @@ -587,7 +595,8 @@ static void scalarizeMaskedScatter(CallInst *CI, DomTreeUpdater *DTU,
//
Value *Predicate;
if (VectorWidth != 1) {
Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
Value *Mask = Builder.getInt(APInt::getOneBitSet(
VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
Builder.getIntN(VectorWidth, 0));
} else {
Expand Down Expand Up @@ -623,8 +632,8 @@ static void scalarizeMaskedScatter(CallInst *CI, DomTreeUpdater *DTU,
ModifiedDT = true;
}

static void scalarizeMaskedExpandLoad(CallInst *CI, DomTreeUpdater *DTU,
bool &ModifiedDT) {
static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI,
DomTreeUpdater *DTU, bool &ModifiedDT) {
Value *Ptr = CI->getArgOperand(0);
Value *Mask = CI->getArgOperand(1);
Value *PassThru = CI->getArgOperand(2);
Expand Down Expand Up @@ -692,7 +701,8 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, DomTreeUpdater *DTU,

Value *Predicate;
if (VectorWidth != 1) {
Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
Value *Mask = Builder.getInt(APInt::getOneBitSet(
VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
Builder.getIntN(VectorWidth, 0));
} else {
Expand Down Expand Up @@ -749,7 +759,8 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, DomTreeUpdater *DTU,
ModifiedDT = true;
}

static void scalarizeMaskedCompressStore(CallInst *CI, DomTreeUpdater *DTU,
static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI,
DomTreeUpdater *DTU,
bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptr = CI->getArgOperand(1);
Expand Down Expand Up @@ -800,7 +811,8 @@ static void scalarizeMaskedCompressStore(CallInst *CI, DomTreeUpdater *DTU,
//
Value *Predicate;
if (VectorWidth != 1) {
Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
Value *Mask = Builder.getInt(APInt::getOneBitSet(
VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
Builder.getIntN(VectorWidth, 0));
} else {
Expand Down Expand Up @@ -934,14 +946,14 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
CI->getType(),
cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
return false;
scalarizeMaskedLoad(CI, DTU, ModifiedDT);
scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT);
return true;
case Intrinsic::masked_store:
if (TTI.isLegalMaskedStore(
CI->getArgOperand(0)->getType(),
cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
return false;
scalarizeMaskedStore(CI, DTU, ModifiedDT);
scalarizeMaskedStore(DL, CI, DTU, ModifiedDT);
return true;
case Intrinsic::masked_gather: {
unsigned AlignmentInt =
Expand All @@ -951,7 +963,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy);
if (TTI.isLegalMaskedGather(LoadTy, Alignment))
return false;
scalarizeMaskedGather(CI, DTU, ModifiedDT);
scalarizeMaskedGather(DL, CI, DTU, ModifiedDT);
return true;
}
case Intrinsic::masked_scatter: {
Expand All @@ -962,18 +974,18 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy);
if (TTI.isLegalMaskedScatter(StoreTy, Alignment))
return false;
scalarizeMaskedScatter(CI, DTU, ModifiedDT);
scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT);
return true;
}
case Intrinsic::masked_expandload:
if (TTI.isLegalMaskedExpandLoad(CI->getType()))
return false;
scalarizeMaskedExpandLoad(CI, DTU, ModifiedDT);
scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT);
return true;
case Intrinsic::masked_compressstore:
if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
return false;
scalarizeMaskedCompressStore(CI, DTU, ModifiedDT);
scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT);
return true;
}
}
Expand Down

0 comments on commit 9498315

Please sign in to comment.