Skip to content

Commit

Permalink
[VPlan] Introduce recipe to build scalar steps.
Browse files Browse the repository at this point in the history
This patch adds a new VPScalarIVStepsRecipe to handle building scalar
steps.

In the first patch, it only handles the case where there is no vector
induction variable needed.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D115953
  • Loading branch information
fhahn committed Feb 27, 2022
1 parent b6d7568 commit 49b23f4
Show file tree
Hide file tree
Showing 21 changed files with 312 additions and 141 deletions.
103 changes: 71 additions & 32 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -2642,7 +2642,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(
TruncInst *Trunc = Def->getTruncInst();
IRBuilderBase &Builder = State.Builder;
assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
assert(!State.VF.isZero() && "VF must be non-zero");
assert(State.VF.isVector() && "must have vector VF");

// The value from the original loop to which we are mapping the new induction
// variable.
Expand Down Expand Up @@ -2695,37 +2695,11 @@ void InnerLoopVectorizer::widenIntOrFpInduction(

// Now do the actual transformations, and start with creating the step value.
Value *Step = CreateStepValue(ID.getStep());
if (State.VF.isScalar()) {
Value *ScalarIV = CreateScalarIV(Step);
Type *ScalarTy = IntegerType::get(ScalarIV->getContext(),
Step->getType()->getScalarSizeInBits());

Instruction::BinaryOps IncOp = ID.getInductionOpcode();
if (IncOp == Instruction::BinaryOpsEnd)
IncOp = Instruction::Add;
for (unsigned Part = 0; Part < UF; ++Part) {
Value *StartIdx = ConstantInt::get(ScalarTy, Part);
Instruction::BinaryOps MulOp = Instruction::Mul;
if (Step->getType()->isFloatingPointTy()) {
StartIdx = Builder.CreateUIToFP(StartIdx, Step->getType());
MulOp = Instruction::FMul;
}

Value *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
Value *EntryPart = Builder.CreateBinOp(IncOp, ScalarIV, Mul, "induction");
State.set(Def, EntryPart, Part);
if (Trunc) {
assert(!Step->getType()->isFloatingPointTy() &&
"fp inductions shouldn't be truncated");
addMetadata(EntryPart, Trunc);
}
}
return;
}

// Create a new independent vector induction variable, if one is needed.
if (Def->needsVectorIV())
createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);
// Create a new independent vector induction variable. Later VPlan2VPlan
// optimizations will remove it, if it won't be needed, e.g. because all users
// of it access scalar values.
createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);

if (Def->needsScalarIV()) {
// Create scalar steps that can be used by instructions we will later
Expand Down Expand Up @@ -9328,6 +9302,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// in ways that accessing values using original IR values is incorrect.
Plan->disableValue2VPValue();

VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE());
VPlanTransforms::sinkScalarOperands(*Plan);
VPlanTransforms::mergeReplicateRegions(*Plan);
VPlanTransforms::removeDeadRecipes(*Plan, *OrigLoop);
Expand Down Expand Up @@ -9754,6 +9729,69 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
State.ILV->widenIntOrFpInduction(IV, this, State, CanonicalIV);
}

void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "VPScalarIVStepsRecipe being replicated.");

// Fast-math-flags propagate from the original induction instruction.
IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
if (IndDesc.getInductionBinOp() &&
isa<FPMathOperator>(IndDesc.getInductionBinOp()))
State.Builder.setFastMathFlags(
IndDesc.getInductionBinOp()->getFastMathFlags());

Value *Step = State.get(getStepValue(), VPIteration(0, 0));
auto *Trunc = dyn_cast<TruncInst>(getUnderlyingValue());
auto CreateScalarIV = [&](Value *&Step) -> Value * {
Value *ScalarIV = State.get(getCanonicalIV(), VPIteration(0, 0));
auto *CanonicalIV = State.get(getParent()->getPlan()->getCanonicalIV(), 0);
if (!isCanonical() || CanonicalIV->getType() != IV->getType()) {
ScalarIV = IV->getType()->isIntegerTy()
? State.Builder.CreateSExtOrTrunc(ScalarIV, IV->getType())
: State.Builder.CreateCast(Instruction::SIToFP, ScalarIV,
IV->getType());
ScalarIV = emitTransformedIndex(State.Builder, ScalarIV,
getStartValue()->getLiveInIRValue(), Step,
IndDesc);
ScalarIV->setName("offset.idx");
}
if (Trunc) {
auto *TruncType = cast<IntegerType>(Trunc->getType());
assert(Step->getType()->isIntegerTy() &&
"Truncation requires an integer step");
ScalarIV = State.Builder.CreateTrunc(ScalarIV, TruncType);
Step = State.Builder.CreateTrunc(Step, TruncType);
}
return ScalarIV;
};

Value *ScalarIV = CreateScalarIV(Step);
if (State.VF.isVector()) {
buildScalarSteps(ScalarIV, Step, IV, IndDesc, this, State);
return;
}

for (unsigned Part = 0; Part < State.UF; ++Part) {
assert(!State.VF.isScalable() && "scalable vectors not yet supported.");
Value *EntryPart;
if (Step->getType()->isFloatingPointTy()) {
Value *StartIdx =
getRuntimeVFAsFloat(State.Builder, Step->getType(), State.VF * Part);
// Floating-point operations inherit FMF via the builder's flags.
Value *MulOp = State.Builder.CreateFMul(StartIdx, Step);
EntryPart = State.Builder.CreateBinOp(IndDesc.getInductionOpcode(),
ScalarIV, MulOp);
} else {
Value *StartIdx =
getRuntimeVF(State.Builder, Step->getType(), State.VF * Part);
EntryPart = State.Builder.CreateAdd(
ScalarIV, State.Builder.CreateMul(StartIdx, Step), "induction");
}
State.set(this, EntryPart, Part);
if (Trunc)
State.ILV->addMetadata(EntryPart, Trunc);
}
}

void VPWidenPHIRecipe::execute(VPTransformState &State) {
State.ILV->widenPHIInstruction(cast<PHINode>(getUnderlyingValue()), this,
State);
Expand Down Expand Up @@ -10161,7 +10199,8 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part) {
// Check if there is a scalar value for the selected lane.
if (!hasScalarValue(Def, {Part, LastLane})) {
// At the moment, VPWidenIntOrFpInductionRecipes can also be uniform.
assert(isa<VPWidenIntOrFpInductionRecipe>(Def->getDef()) &&
assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDef()) ||
isa<VPScalarIVStepsRecipe>(Def->getDef())) &&
"unexpected recipe found to be invariant");
IsUniform = true;
LastLane = 0;
Expand Down
45 changes: 40 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Expand Up @@ -583,7 +583,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPWidenSC:
case VPWidenGEPSC:
case VPReductionSC:
case VPWidenSelectSC: {
case VPWidenSelectSC:
case VPScalarIVStepsSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
Expand All @@ -608,6 +609,14 @@ void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
Parent->getRecipeList().insert(InsertPos->getIterator(), this);
}

void VPRecipeBase::insertBefore(VPBasicBlock &BB,
iplist<VPRecipeBase>::iterator I) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(I == BB.end() || I->getParent() == &BB);
Parent = &BB;
BB.getRecipeList().insert(I, this);
}

void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(InsertPos->getParent() &&
Expand All @@ -634,10 +643,8 @@ void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {

void VPRecipeBase::moveBefore(VPBasicBlock &BB,
iplist<VPRecipeBase>::iterator I) {
assert(I == BB.end() || I->getParent() == &BB);
removeFromParent();
Parent = &BB;
BB.getRecipeList().insert(I, this);
insertBefore(BB, I);
}

void VPInstruction::generateInstruction(VPTransformState &State,
Expand Down Expand Up @@ -875,13 +882,16 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
auto *IV = getCanonicalIV();
assert(all_of(IV->users(),
[](const VPUser *U) {
if (isa<VPScalarIVStepsRecipe>(U))
return true;
auto *VPI = cast<VPInstruction>(U);
return VPI->getOpcode() ==
VPInstruction::CanonicalIVIncrement ||
VPI->getOpcode() ==
VPInstruction::CanonicalIVIncrementNUW;
}) &&
"the canonical IV should only be used by its increments when "
"the canonical IV should only be used by its increments or "
"ScalarIVSteps when "
"resetting the start value");
IV->setOperand(0, VPV);
}
Expand Down Expand Up @@ -1272,7 +1282,32 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
return StartC && StartC->isZero() && StepC && StepC->isOne();
}

VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
}

bool VPScalarIVStepsRecipe::isCanonical() const {
auto *CanIV = getCanonicalIV();
// The start value of the steps-recipe must match the start value of the
// canonical induction and it must step by 1.
if (CanIV->getStartValue() != getStartValue())
return false;
auto *StepVPV = getStepValue();
if (StepVPV->getDef())
return false;
auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
return StepC && StepC->isOne();
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent;
printAsOperand(O, SlotTracker);
O << Indent << "= SCALAR-STEPS ";
printOperands(O, SlotTracker);
}

void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-GEP ";
Expand Down
58 changes: 58 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Expand Up @@ -700,6 +700,9 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
/// Insert an unlinked recipe into a basic block immediately before
/// the specified recipe.
void insertBefore(VPRecipeBase *InsertPos);
/// Insert an unlinked recipe into \p BB immediately before the insertion
/// point \p IP;
void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);

/// Insert an unlinked Recipe into a basic block immediately after
/// the specified Recipe.
Expand Down Expand Up @@ -1103,6 +1106,8 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
}

PHINode *getPHINode() { return IV; }

/// Returns the induction descriptor for the recipe.
const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }

Expand Down Expand Up @@ -1769,6 +1774,12 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
static inline bool classof(const VPDef *D) {
return D->getVPDefID() == VPCanonicalIVPHISC;
}
static inline bool classof(const VPHeaderPHIRecipe *D) {
return D->getVPDefID() == VPCanonicalIVPHISC;
}
static inline bool classof(const VPValue *V) {
return V->getVPValueID() == VPValue::VPVCanonicalIVPHISC;
}

/// Generate the canonical scalar induction phi of the vector loop.
void execute(VPTransformState &State) override;
Expand Down Expand Up @@ -1834,6 +1845,53 @@ class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
}
};

/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their scalar values.
class VPScalarIVStepsRecipe : public VPRecipeBase, public VPValue {
PHINode *IV;
const InductionDescriptor &IndDesc;

public:
VPScalarIVStepsRecipe(PHINode *IV, const InductionDescriptor &IndDesc,
VPValue *CanonicalIV, VPValue *Start, VPValue *Step,
Instruction *Trunc)
: VPRecipeBase(VPScalarIVStepsSC, {CanonicalIV, Start, Step}),
VPValue(Trunc ? Trunc : IV, this), IV(IV), IndDesc(IndDesc) {}

~VPScalarIVStepsRecipe() override = default;

/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPDef *D) {
return D->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC;
}
/// Extra classof implementations to allow directly casting from VPUser ->
/// VPScalarIVStepsRecipe.
static inline bool classof(const VPUser *U) {
auto *R = dyn_cast<VPRecipeBase>(U);
return R && R->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC;
}
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC;
}

/// Generate the scalarized versions of the phi node as needed by their users.
void execute(VPTransformState &State) override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif

/// Returns true if the induction is canonical, i.e. starting at 0 and
/// incremented by UF * VF (= the original IV is incremented by 1).
bool isCanonical() const;

VPCanonicalIVPHIRecipe *getCanonicalIV() const;
VPValue *getStartValue() const { return getOperand(1); }
VPValue *getStepValue() const { return getOperand(2); }
};

/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
Expand Down
36 changes: 36 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Expand Up @@ -378,3 +378,39 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan, Loop &OrigLoop) {
R.eraseFromParent();
}
}

void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
SmallVector<VPRecipeBase *> ToRemove;
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
if (!IV || IV->needsVectorIV())
continue;

const InductionDescriptor &ID = IV->getInductionDescriptor();
const SCEV *StepSCEV = ID.getStep();
VPValue *Step = nullptr;
if (auto *E = dyn_cast<SCEVConstant>(StepSCEV)) {
Step = new VPValue(E->getValue());
Plan.addExternalDef(Step);
} else if (auto *E = dyn_cast<SCEVUnknown>(StepSCEV)) {
Step = new VPValue(E->getValue());
Plan.addExternalDef(Step);
} else {
Step = new VPExpandSCEVRecipe(StepSCEV, SE);
}

VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(
IV->getPHINode(), ID, Plan.getCanonicalIV(), IV->getStartValue(), Step,
IV->getTruncInst());

HeaderVPBB->insert(Steps, HeaderVPBB->getFirstNonPhi());
if (Step->getDef()) {
// TODO: Place the step in the preheader, once it is explicitly modeled in
// VPlan.
HeaderVPBB->insert(cast<VPRecipeBase>(Step->getDef()),
HeaderVPBB->getFirstNonPhi());
}
IV->replaceAllUsesWith(Steps);
}
}
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Expand Up @@ -54,6 +54,10 @@ struct VPlanTransforms {
/// Try to remove dead recipes. At the moment, only dead header recipes are
/// removed.
static void removeDeadRecipes(VPlan &Plan, Loop &OrigLoop);

// If all users of a vector IV need scalar values, provide them by building
// scalar steps off of the canonical scalar IV, and remove the vector IV.
static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE);
};

} // namespace llvm
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanValue.h
Expand Up @@ -332,6 +332,7 @@ class VPDef {
VPInterleaveSC,
VPReductionSC,
VPReplicateSC,
VPScalarIVStepsSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenGEPSC,
Expand Down
Expand Up @@ -34,7 +34,7 @@ define void @outside_user_blocks_tail_folding(i8* nocapture readonly %ptr, i32 %
; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], <16 x i8>* [[TMP6]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
Expand All @@ -50,7 +50,7 @@ define void @outside_user_blocks_tail_folding(i8* nocapture readonly %ptr, i32 %
; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
; CHECK-NEXT: store i8 [[TMP8]], i8* [[BUFF]], align 1
; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0
; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop !2
; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: end:
; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END2]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4
Expand Down

0 comments on commit 49b23f4

Please sign in to comment.