-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[VPlan] Handle scalar VPWidenPointerInd in convertToConcreteRecipes. #169338
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Handle scalar VPWidenPointerInd in convertToConcreteRecipes. #169338
Conversation
…ipes. In some case, VPWidenPointerInductions become only used by scalars after legalizeAndOptimizationInducftions was already run, for example due to some VPlan optimizations. Move the code to scalarize VPWidenPointerInductions to a helper and use it if needed. This fixes a crash after llvm#148274 in the added test case.
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesIn some case, VPWidenPointerInductions become only used by scalars after legalizeAndOptimizationInducftions was already run, for example due to some VPlan optimizations. Move the code to scalarize VPWidenPointerInductions to a helper and use it if needed. This fixes a crash after #148274 in the added test case. Full diff: https://github.com/llvm/llvm-project/pull/169338.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index e7a8773be067b..5079fa488ec46 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -668,6 +668,23 @@ static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
return Users.takeVector();
}
+/// Scalarize a VPWidenPointerInductionRecipe by replacing it with a PtrAdd
+/// (IndStart, ScalarIVSteps (0, Step)). This is used when the recipe only
+/// generates scalar values.
+static VPValue *
+scalarizeVPWidenPointerInduction(VPWidenPointerInductionRecipe *PtrIV,
+ VPlan &Plan, VPBuilder &Builder) {
+ const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
+ VPValue *StartV = Plan.getConstantInt(ID.getStep()->getType(), 0);
+ VPValue *StepV = PtrIV->getOperand(1);
+ VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
+ Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr,
+ nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
+
+ return Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
+ PtrIV->getDebugLoc(), "next.gep");
+}
+
/// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
/// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
/// VPWidenPointerInductionRecipe will generate vectors only. If some users
@@ -720,16 +737,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))
continue;
- const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
- VPValue *StartV = Plan.getConstantInt(ID.getStep()->getType(), 0);
- VPValue *StepV = PtrIV->getOperand(1);
- VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
- Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr,
- nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
-
- VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
- PtrIV->getDebugLoc(), "next.gep");
-
+ VPValue *PtrAdd = scalarizeVPWidenPointerInduction(PtrIV, Plan, Builder);
PtrIV->replaceAllUsesWith(PtrAdd);
continue;
}
@@ -3502,6 +3510,16 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
}
if (auto *WidenIVR = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
+ // If the recipe only generates scalars, scalarize it instead of
+ // expanding it.
+ if (WidenIVR->onlyScalarsGenerated(Plan.hasScalableVF())) {
+ VPBuilder Builder(WidenIVR);
+ VPValue *PtrAdd =
+ scalarizeVPWidenPointerInduction(WidenIVR, Plan, Builder);
+ WidenIVR->replaceAllUsesWith(PtrAdd);
+ ToRemove.push_back(WidenIVR);
+ continue;
+ }
expandVPWidenPointerInduction(WidenIVR, TypeInfo);
ToRemove.push_back(WidenIVR);
continue;
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll
index fa710cb8d65b1..47bb6b469c8ad 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll
@@ -61,4 +61,106 @@ exit:
ret void
}
+define i1 @scalarize_ptr_induction(ptr %start, ptr %end, ptr noalias %dst, i1 %c) #1 {
+; CHECK-LABEL: define i1 @scalarize_ptr_induction(
+; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]], ptr noalias [[DST:%.*]], i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[START5:%.*]] = ptrtoint ptr [[START]] to i64
+; CHECK-NEXT: [[END4:%.*]] = ptrtoint ptr [[END]] to i64
+; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END4]], -12
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START5]]
+; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP1]], 12
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 1
+; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP5]], i64 15)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP3]], [[UMAX]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK: [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[END1]], -12
+; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], [[START2]]
+; CHECK-NEXT: [[TMP8:%.*]] = udiv i64 [[TMP7]], 12
+; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 12
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 8
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP10]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP3]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[START]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP12]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[TMP14]]
+; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[N_VEC]], 12
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP15]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 12
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 6 x i32>, ptr [[NEXT_GEP]], align 4, !alias.scope [[META3:![0-9]+]]
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3.nxv6i32(<vscale x 6 x i32> [[WIDE_VEC]])
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } [[STRIDED_VEC]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } [[STRIDED_VEC]], 1
+; CHECK-NEXT: [[TMP19:%.*]] = zext <vscale x 2 x i32> [[TMP18]] to <vscale x 2 x i64>
+; CHECK-NEXT: [[TMP20:%.*]] = mul <vscale x 2 x i64> [[TMP19]], splat (i64 -7070675565921424023)
+; CHECK-NEXT: [[TMP21:%.*]] = add <vscale x 2 x i64> [[TMP20]], splat (i64 -4)
+; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i32 [[TMP22]], 2
+; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <vscale x 2 x i64> [[TMP21]], i32 [[TMP24]]
+; CHECK-NEXT: store i64 [[TMP25]], ptr [[DST]], align 1, !alias.scope [[META6:![0-9]+]], !noalias [[META3]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
+; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ], [ [[START]], %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR_IV]], i64 4
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT: [[EXT:%.*]] = zext i32 [[L]] to i64
+; CHECK-NEXT: [[UNUSED:%.*]] = load i32, ptr [[PTR_IV]], align 4
+; CHECK-NEXT: [[MUL1:%.*]] = mul i64 [[EXT]], -7070675565921424023
+; CHECK-NEXT: [[MUL2:%.*]] = add i64 [[MUL1]], -4
+; CHECK-NEXT: store i64 [[MUL2]], ptr [[DST]], align 1
+; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr nusw i8, ptr [[PTR_IV]], i64 12
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 false
+; CHECK-NEXT: br i1 [[OR_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[CMP_LCSSA:%.*]] = phi i1 [ [[CMP]], %[[LOOP]] ]
+; CHECK-NEXT: ret i1 [[CMP_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop ]
+ %gep = getelementptr i8, ptr %ptr.iv, i64 4
+ %l = load i32, ptr %gep, align 4
+ %ext = zext i32 %l to i64
+ %unused = load i32, ptr %ptr.iv, align 4
+ %mul1 = mul i64 %ext, -7070675565921424023
+ %mul2 = add i64 %mul1, -4
+ store i64 %mul2, ptr %dst, align 1
+ %ptr.iv.next = getelementptr nusw i8, ptr %ptr.iv, i64 12
+ %cmp = icmp eq ptr %ptr.iv.next, %end
+ %or.cond = select i1 %cmp, i1 true, i1 false
+ br i1 %or.cond, label %exit, label %loop
+
+exit:
+ ret i1 %cmp
+}
+
attributes #0 = { "target-features"="+v" }
+attributes #1 = { "target-cpu"="sifive-p670" }
|
arcbbb
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Fixes #169780
🐧 Linux x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.Transforms/LoopVectorize/RISCV/pointer-induction.llIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
lukel97
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does VPWidenIntOrFpInductionRecipe have the same problem where it could be replaced by scalar steps? Does running legalizeAndOptimizeInductions a second time before convertToConcreteRecipes help
I don't think so, I think VPWidenPointerInductionRecipe is special in some sense here as it previously supported both wide/scalar codegen. |
In some case, VPWidenPointerInductions become only used by scalars after legalizeAndOptimizationInducftions was already run, for example due to some VPlan optimizations.
Move the code to scalarize VPWidenPointerInductions to a helper and use it if needed.
This fixes a crash after #148274 in the added test case.
Fixes #169780