-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LV] Preserve GEP nusw when widening memory #160885
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesFull diff: https://github.com/llvm/llvm-project/pull/160885.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a2d61d689397b..699eb0fb2abd1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7482,12 +7482,11 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VPSingleDefRecipe *VectorPtr;
if (Reverse) {
// When folding the tail, we may compute an address that we don't in the
- // original scalar loop and it may not be inbounds. Drop Inbounds in that
- // case.
+ // original scalar loop: use none in this case.
GEPNoWrapFlags Flags =
- (CM.foldTailByMasking() || !GEP || !GEP->isInBounds())
+ CM.foldTailByMasking() || !GEP
? GEPNoWrapFlags::none()
- : GEPNoWrapFlags::inBounds();
+ : GEP->getNoWrapFlags().withoutNoUnsignedWrap();
VectorPtr =
new VPVectorEndPointerRecipe(Ptr, &Plan.getVF(), getLoadStoreType(I),
/*Stride*/ -1, Flags, I->getDebugLoc());
diff --git a/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll
new file mode 100644
index 0000000000000..553e56926dcfa
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll
@@ -0,0 +1,189 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+define i32 @preserve_inbounds(i64 %start, ptr %ptr) {
+; CHECK-LABEL: define i32 @preserve_inbounds(
+; CHECK-SAME: i64 [[START:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -3
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT: br label %[[END:.*]]
+; CHECK: [[SCALAR_PH:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[REV_IND:%.*]] = phi i64 [ [[START]], %[[SCALAR_PH]] ], [ [[REV_IND_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[REDUX:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[REDUX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[REV_IND_NEXT]] = add i64 [[REV_IND]], -1
+; CHECK-NEXT: [[GEP_PTR_IND:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[REV_IND_NEXT]]
+; CHECK-NEXT: [[LD_PTR:%.*]] = load i32, ptr [[GEP_PTR_IND]], align 4
+; CHECK-NEXT: [[REDUX_NEXT]] = add i32 [[LD_PTR]], [[REDUX]]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1024
+; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: [[REDUX_NEXT_LCSSA:%.*]] = phi i32 [ [[REDUX_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[REDUX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %rev.ind = phi i64 [ %start, %entry ], [ %rev.ind.next, %loop ]
+ %redux = phi i32 [ 0, %entry ], [ %redux.next, %loop ]
+ %rev.ind.next = add i64 %rev.ind, -1
+ %gep.ptr.ind = getelementptr inbounds i32, ptr %ptr, i64 %rev.ind.next
+ %ld.ptr = load i32, ptr %gep.ptr.ind, align 4
+ %redux.next = add i32 %ld.ptr, %redux
+ %iv.next = add i32 %iv, 1
+ %exit.cond = icmp ne i32 %iv.next, 1024
+ br i1 %exit.cond, label %loop, label %end
+
+end:
+ ret i32 %redux.next
+}
+
+define i32 @preserve_nusw(i64 %start, ptr %ptr) {
+; CHECK-LABEL: define i32 @preserve_nusw(
+; CHECK-SAME: i64 [[START:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i32, ptr [[PTR]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr nusw i32, ptr [[TMP2]], i32 -3
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT: br label %[[END:.*]]
+; CHECK: [[SCALAR_PH:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[REV_IND:%.*]] = phi i64 [ [[START]], %[[SCALAR_PH]] ], [ [[REV_IND_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[REDUX:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[REDUX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[REV_IND_NEXT]] = add i64 [[REV_IND]], -1
+; CHECK-NEXT: [[GEP_PTR_IND:%.*]] = getelementptr nusw i32, ptr [[PTR]], i64 [[REV_IND_NEXT]]
+; CHECK-NEXT: [[LD_PTR:%.*]] = load i32, ptr [[GEP_PTR_IND]], align 4
+; CHECK-NEXT: [[REDUX_NEXT]] = add i32 [[LD_PTR]], [[REDUX]]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1024
+; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: [[REDUX_NEXT_LCSSA:%.*]] = phi i32 [ [[REDUX_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[REDUX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %rev.ind = phi i64 [ %start, %entry ], [ %rev.ind.next, %loop ]
+ %redux = phi i32 [ 0, %entry ], [ %redux.next, %loop ]
+ %rev.ind.next = add i64 %rev.ind, -1
+ %gep.ptr.ind = getelementptr nusw i32, ptr %ptr, i64 %rev.ind.next
+ %ld.ptr = load i32, ptr %gep.ptr.ind, align 4
+ %redux.next = add i32 %ld.ptr, %redux
+ %iv.next = add i32 %iv, 1
+ %exit.cond = icmp ne i32 %iv.next, 1024
+ br i1 %exit.cond, label %loop, label %end
+
+end:
+ ret i32 %redux.next
+}
+
+define i32 @drop_nuw(i64 %start, ptr %ptr) {
+; CHECK-LABEL: define i32 @drop_nuw(
+; CHECK-SAME: i64 [[START:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i32, ptr [[PTR]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 -3
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT: br label %[[END:.*]]
+; CHECK: [[SCALAR_PH:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[REV_IND:%.*]] = phi i64 [ [[START]], %[[SCALAR_PH]] ], [ [[REV_IND_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[REDUX:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[REDUX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[REV_IND_NEXT]] = add i64 [[REV_IND]], -1
+; CHECK-NEXT: [[GEP_PTR_IND:%.*]] = getelementptr nuw i32, ptr [[PTR]], i64 [[REV_IND_NEXT]]
+; CHECK-NEXT: [[LD_PTR:%.*]] = load i32, ptr [[GEP_PTR_IND]], align 4
+; CHECK-NEXT: [[REDUX_NEXT]] = add i32 [[LD_PTR]], [[REDUX]]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1024
+; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: [[REDUX_NEXT_LCSSA:%.*]] = phi i32 [ [[REDUX_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[REDUX_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %rev.ind = phi i64 [ %start, %entry ], [ %rev.ind.next, %loop ]
+ %redux = phi i32 [ 0, %entry ], [ %redux.next, %loop ]
+ %rev.ind.next = add i64 %rev.ind, -1
+ %gep.ptr.ind = getelementptr nuw i32, ptr %ptr, i64 %rev.ind.next
+ %ld.ptr = load i32, ptr %gep.ptr.ind, align 4
+ %redux.next = add i32 %ld.ptr, %redux
+ %iv.next = add i32 %iv, 1
+ %exit.cond = icmp ne i32 %iv.next, 1024
+ br i1 %exit.cond, label %loop, label %end
+
+end:
+ ret i32 %redux.next
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+;.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, but I'm no GEP expert so definitely wait for a second review :)
llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll
Outdated
Show resolved
Hide resolved
ec76860
to
2a23fca
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
2a23fca
to
30f7797
Compare
No description provided.