Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 28 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,23 @@ static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
return Users.takeVector();
}

/// Scalarize a VPWidenPointerInductionRecipe by replacing it with a PtrAdd
/// (IndStart, ScalarIVSteps (0, Step)). This is used when the recipe only
/// generates scalar values.
static VPValue *
scalarizeVPWidenPointerInduction(VPWidenPointerInductionRecipe *PtrIV,
VPlan &Plan, VPBuilder &Builder) {
const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
VPValue *StartV = Plan.getConstantInt(ID.getStep()->getType(), 0);
VPValue *StepV = PtrIV->getOperand(1);
VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr,
nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);

return Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
PtrIV->getDebugLoc(), "next.gep");
}

/// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
/// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
/// VPWidenPointerInductionRecipe will generate vectors only. If some users
Expand Down Expand Up @@ -755,16 +772,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))
continue;

const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
VPValue *StartV = Plan.getConstantInt(ID.getStep()->getType(), 0);
VPValue *StepV = PtrIV->getOperand(1);
VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr,
nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);

VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
PtrIV->getDebugLoc(), "next.gep");

VPValue *PtrAdd = scalarizeVPWidenPointerInduction(PtrIV, Plan, Builder);
PtrIV->replaceAllUsesWith(PtrAdd);
continue;
}
Expand Down Expand Up @@ -3575,6 +3583,16 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
}

if (auto *WidenIVR = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
// If the recipe only generates scalars, scalarize it instead of
// expanding it.
if (WidenIVR->onlyScalarsGenerated(Plan.hasScalableVF())) {
VPBuilder Builder(WidenIVR);
VPValue *PtrAdd =
scalarizeVPWidenPointerInduction(WidenIVR, Plan, Builder);
WidenIVR->replaceAllUsesWith(PtrAdd);
ToRemove.push_back(WidenIVR);
continue;
}
expandVPWidenPointerInduction(WidenIVR, TypeInfo);
ToRemove.push_back(WidenIVR);
continue;
Expand Down
100 changes: 100 additions & 0 deletions llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,104 @@ exit:
ret void
}

define i1 @scalarize_ptr_induction(ptr %start, ptr %end, ptr noalias %dst, i1 %c) #1 {
; CHECK-LABEL: define i1 @scalarize_ptr_induction(
; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]], ptr noalias [[DST:%.*]], i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[START5:%.*]] = ptrtoint ptr [[START]] to i64
; CHECK-NEXT: [[END4:%.*]] = ptrtoint ptr [[END]] to i64
; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END4]], -12
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START5]]
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP1]], 12
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 8
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[END1]], -12
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], [[START2]]
; CHECK-NEXT: [[TMP8:%.*]] = udiv i64 [[TMP7]], 12
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 12
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 8
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP10]]
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP3]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[START]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[DST]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[END]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT6]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP3]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 2 x i64> [[TMP13]], splat (i64 12)
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP14]]
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, <vscale x 2 x ptr> [[VECTOR_GEP]], i64 4
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]]
; CHECK-NEXT: [[TMP19:%.*]] = zext <vscale x 2 x i32> [[TMP18]] to <vscale x 2 x i64>
; CHECK-NEXT: [[TMP20:%.*]] = mul <vscale x 2 x i64> [[TMP19]], splat (i64 -7070675565921424023)
; CHECK-NEXT: [[TMP21:%.*]] = add <vscale x 2 x i64> [[TMP20]], splat (i64 -4)
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[TMP21]], <vscale x 2 x ptr> align 1 [[BROADCAST_SPLAT]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META6:![0-9]+]], !noalias [[META3]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr nusw i8, <vscale x 2 x ptr> [[VECTOR_GEP]], i64 12
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <vscale x 2 x ptr> [[TMP16]], [[BROADCAST_SPLAT7]]
; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP26]]
; CHECK-NEXT: [[TMP27:%.*]] = mul i64 12, [[TMP26]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP27]]
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP29:%.*]] = sub i64 [[TMP26]], 1
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 2
; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 0
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <vscale x 2 x i1> [[TMP17]], i64 [[TMP29]]
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR_IV]], i64 4
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: [[EXT:%.*]] = zext i32 [[L]] to i64
; CHECK-NEXT: [[UNUSED:%.*]] = load i32, ptr [[PTR_IV]], align 4
; CHECK-NEXT: [[MUL1:%.*]] = mul i64 [[EXT]], -7070675565921424023
; CHECK-NEXT: [[MUL2:%.*]] = add i64 [[MUL1]], -4
; CHECK-NEXT: store i64 [[MUL2]], ptr [[DST]], align 1
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr nusw i8, ptr [[PTR_IV]], i64 12
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 false
; CHECK-NEXT: br i1 [[OR_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[CMP_LCSSA:%.*]] = phi i1 [ [[CMP]], %[[LOOP]] ], [ [[TMP25]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i1 [[CMP_LCSSA]]
;
entry:
br label %loop

loop:
%ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop ]
%gep = getelementptr i8, ptr %ptr.iv, i64 4
%l = load i32, ptr %gep, align 4
%ext = zext i32 %l to i64
%unused = load i32, ptr %ptr.iv, align 4
%mul1 = mul i64 %ext, -7070675565921424023
%mul2 = add i64 %mul1, -4
store i64 %mul2, ptr %dst, align 1
%ptr.iv.next = getelementptr nusw i8, ptr %ptr.iv, i64 12
%cmp = icmp eq ptr %ptr.iv.next, %end
%or.cond = select i1 %cmp, i1 true, i1 false
br i1 %or.cond, label %exit, label %loop

exit:
ret i1 %cmp
}

attributes #0 = { "target-features"="+v" }
attributes #1 = { "target-cpu"="sifive-p670" }
Loading