-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[VPlan] Avoid sinking allocas in sinkScalarOperands #166135
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Ramkumar Ramachandra (artagnon) ChangesUse cannotHoistOrSinkRecipe to forbid sinking allocas. Full diff: https://github.com/llvm/llvm-project/pull/166135.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b45536869c5af..b646fec53993f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -161,8 +161,7 @@ static bool sinkScalarOperands(VPlan &Plan) {
if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Candidate))
return false;
- if (Candidate->getParent() == SinkTo || Candidate->mayHaveSideEffects() ||
- Candidate->mayReadOrWriteMemory())
+ if (Candidate->getParent() == SinkTo || cannotHoistOrSinkRecipe(*Candidate))
return false;
if (auto *RepR = dyn_cast<VPReplicateRecipe>(Candidate))
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
index a852b731ea13b..9e523be618b44 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
@@ -12,12 +12,15 @@ define i32 @test(ptr %vf1, i64 %n) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 -56)
+; CHECK-NEXT: [[TMP18:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP18]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
; CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -26,7 +29,7 @@ define i32 @test(ptr %vf1, i64 %n) {
; CHECK: [[PRED_STORE_IF1]]:
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
@@ -35,7 +38,7 @@ define i32 @test(ptr %vf1, i64 %n) {
; CHECK: [[PRED_STORE_IF3]]:
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
; CHECK-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; CHECK: [[PRED_STORE_CONTINUE4]]:
@@ -44,7 +47,7 @@ define i32 @test(ptr %vf1, i64 %n) {
; CHECK: [[PRED_STORE_IF5]]:
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = alloca i8, i64 [[N]], align 16
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
; CHECK-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; CHECK: [[PRED_STORE_CONTINUE6]]:
|
fhahn
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks, although it looks like there's a conflict that needs resolving first
Use cannotHoistOrSinkRecipe to forbid sinking allocas.
5ad2b9f to
91475e3
Compare
Use cannotHoistOrSinkRecipe to forbid sinking allocas.