Skip to content

Commit

Permalink
[LV] Preserve inbounds on created GEPs
Browse files Browse the repository at this point in the history
Summary:
This is a fix for PR23997.

The loop vectorizer is not preserving the inbounds property of GEPs that it creates.
This is inhibiting some optimizations. This patch preserves the inbounds property in
the case where a load/store is being fed by an inbounds GEP.

Reviewers: mkuper, javed.absar, hsaito

Reviewed By: hsaito

Subscribers: dcaballe, hsaito, llvm-commits

Differential Revision: https://reviews.llvm.org/D46191

llvm-svn: 331269
  • Loading branch information
Daniel Neilson committed May 1, 2018
1 parent 969d63e commit 9e4bbe8
Show file tree
Hide file tree
Showing 12 changed files with 382 additions and 254 deletions.
25 changes: 22 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -2254,6 +2254,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
if (Group->isReverse())
Index += (VF - 1) * Group->getFactor();

bool InBounds = false;
if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
InBounds = gep->isInBounds();

for (unsigned Part = 0; Part < UF; Part++) {
Value *NewPtr = getOrCreateScalarValue(Ptr, {Part, 0});

Expand All @@ -2269,6 +2273,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
// A[i+2] = c; // Member of index 2 (Current instruction)
// Current pointer is pointed to A[i+2], adjust it to A[i].
NewPtr = Builder.CreateGEP(NewPtr, Builder.getInt32(-Index));
if (InBounds)
cast<GetElementPtrInst>(NewPtr)->setIsInBounds(true);

// Cast to the vector pointer type.
NewPtrs.push_back(Builder.CreateBitCast(NewPtr, PtrTy));
Expand Down Expand Up @@ -2405,17 +2411,30 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
if (isMaskRequired)
Mask = *BlockInMask;

bool InBounds = false;
if (auto *gep = dyn_cast<GetElementPtrInst>(
getLoadStorePointerOperand(Instr)->stripPointerCasts()))
InBounds = gep->isInBounds();

const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
GetElementPtrInst *PartPtr = nullptr;

if (Reverse) {
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
PartPtr = cast<GetElementPtrInst>(
Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)));
PartPtr->setIsInBounds(InBounds);
PartPtr = cast<GetElementPtrInst>(
Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)));
PartPtr->setIsInBounds(InBounds);
if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
Mask[Part] = reverseVector(Mask[Part]);
} else {
PartPtr = cast<GetElementPtrInst>(
Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)));
PartPtr->setIsInBounds(InBounds);
}

return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll
Expand Up @@ -78,11 +78,11 @@ define void @_Z1dv() local_unnamed_addr #0 {
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[CONV]], [[TMP19]]
; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x i8], [6 x i8]* @c, i64 0, i64 [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, i8* [[TMP22]], i32 0
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, i8* [[TMP22]], i32 0
; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP23]] to <4 x i8>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP24]], align 1, !alias.scope !0
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 [[TMP17]]
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, i8* [[TMP25]], i32 0
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, i8* [[TMP25]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to <4 x i8>*
; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP27]], align 1, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll
Expand Up @@ -49,18 +49,18 @@ define i32 @test(float* nocapture readonly %x) {
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, float* [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <2 x float>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[T4]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, float* [[TMP5]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP5]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>*
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, <2 x float>* [[TMP7]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP9:%.*]] = fpext <2 x float> [[TMP8]] to <2 x double>
; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> [[TMP9]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[T6]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, float* [[TMP11]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[TMP11]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP12]] to <2 x float>*
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, <2 x float>* [[TMP13]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = fpext <2 x float> [[WIDE_LOAD2]] to <2 x double>
Expand Down

0 comments on commit 9e4bbe8

Please sign in to comment.