Skip to content

Commit

Permalink
[VPlan] Add VPCanonicalIVPHIRecipe, partly retire createInductionVari…
Browse files Browse the repository at this point in the history
…able.

At the moment, the primary induction variable for the vector loop is
created as part of the skeleton creation. This is tied to creating the
vector loop latch outside of VPlan. This prevents from modeling the
*whole* vector loop in VPlan, which in turn is required to model
preheader and exit blocks in VPlan as well.

This patch introduces a new recipe VPCanonicalIVPHIRecipe to represent the
primary IV in VPlan and CanonicalIVIncrement{NUW} opcodes for
VPInstruction to model the increment.

This allows us to partly retire createInductionVariable. At the moment,
a bit of patching up is done after executing all blocks in the plan.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D113223
  • Loading branch information
fhahn committed Jan 5, 2022
1 parent 015e08c commit 65c4d61
Show file tree
Hide file tree
Showing 29 changed files with 491 additions and 287 deletions.
254 changes: 124 additions & 130 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Large diffs are not rendered by default.

128 changes: 104 additions & 24 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Expand Up @@ -711,6 +711,25 @@ void VPInstruction::generateInstruction(VPTransformState &State,
}
break;
}

case VPInstruction::CanonicalIVIncrement:
case VPInstruction::CanonicalIVIncrementNUW: {
Value *Next = nullptr;
if (Part == 0) {
bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
auto *Phi = State.get(getOperand(0), 0);
// The loop step is equal to the vectorization factor (num of SIMD
// elements) times the unroll factor (num of SIMD instructions).
Value *Step =
createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
} else {
Next = State.get(this, 0);
}

State.set(this, Next, Part);
break;
}
default:
llvm_unreachable("Unsupported opcode for instruction");
}
Expand Down Expand Up @@ -758,6 +777,12 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::FirstOrderRecurrenceSplice:
O << "first-order splice";
break;
case VPInstruction::CanonicalIVIncrement:
O << "VF * UF + ";
break;
case VPInstruction::CanonicalIVIncrementNUW:
O << "VF * UF +(nuw) ";
break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
Expand Down Expand Up @@ -786,7 +811,9 @@ void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
FMF = FMFNew;
}

void VPlan::prepareToExecute(Value *TripCountV, VPTransformState &State) {
void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
Value *CanonicalIVStartValue,
VPTransformState &State) {
// Check if the trip count is needed, and if so build it.
if (TripCount && TripCount->getNumUsers()) {
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
Expand All @@ -805,6 +832,18 @@ void VPlan::prepareToExecute(Value *TripCountV, VPTransformState &State) {
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(BackedgeTakenCount, VTCMO, Part);
}

for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(&VectorTripCount, VectorTripCountV, Part);

// When vectorizing the epilogue loop, the canonical induction start value
// needs to be changed from zero to the value after the main vector loop.
if (CanonicalIVStartValue) {
VPValue *VPV = new VPValue(CanonicalIVStartValue);
addExternalDef(VPV);
auto *IV = getCanonicalIV();
IV->setOperand(0, VPV);
}
}

/// Generate the code inside the body of the vectorized loop. Assumes a single
Expand Down Expand Up @@ -842,28 +881,6 @@ void VPlan::execute(VPTransformState *State) {
for (VPBlockBase *Block : depth_first(Entry))
Block->execute(State);

// Fix the latch value of reduction and first-order recurrences phis in the
// vector loop.
VPBasicBlock *Header = Entry->getEntryBasicBlock();
for (VPRecipeBase &R : Header->phis()) {
auto *PhiR = dyn_cast<VPHeaderPHIRecipe>(&R);
if (!PhiR || !(isa<VPFirstOrderRecurrencePHIRecipe>(&R) ||
isa<VPReductionPHIRecipe>(&R)))
continue;
// For first-order recurrences and in-order reduction phis, only a single
// part is generated, which provides the last part from the previous
// iteration. Otherwise all UF parts are generated.
bool SinglePartNeeded = isa<VPFirstOrderRecurrencePHIRecipe>(&R) ||
cast<VPReductionPHIRecipe>(&R)->isOrdered();
unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
Value *VecPhi = State->get(PhiR, Part);
Value *Val = State->get(PhiR->getBackedgeValue(),
SinglePartNeeded ? State->UF - 1 : Part);
cast<PHINode>(VecPhi)->addIncoming(Val, VectorLatchBB);
}
}

// Setup branch terminator successors for VPBBs in VPBBsToFix based on
// VPBB's successors.
for (auto VPBB : State->CFG.VPBBsToFix) {
Expand Down Expand Up @@ -899,6 +916,47 @@ void VPlan::execute(VPTransformState *State) {
assert(Merged && "Could not merge last basic block with latch.");
VectorLatchBB = LastBB;

// Fix the latch value of canonical, reduction and first-order recurrences
// phis in the vector loop.
VPBasicBlock *Header = Entry->getEntryBasicBlock();
if (Header->empty()) {
assert(EnableVPlanNativePath);
Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
}
for (VPRecipeBase &R : Header->phis()) {
// Skip phi-like recipes that generate their backedege values themselves.
// TODO: Model their backedge values explicitly.
if (isa<VPWidenIntOrFpInductionRecipe>(&R) || isa<VPWidenPHIRecipe>(&R))
continue;

auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
// For canonical IV, first-order recurrences and in-order reduction phis,
// only a single part is generated, which provides the last part from the
// previous iteration. For non-ordered reductions all UF parts are
// generated.
bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
cast<VPReductionPHIRecipe>(PhiR)->isOrdered();
unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;

for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
Value *Phi = State->get(PhiR, Part);
Value *Val = State->get(PhiR->getBackedgeValue(),
SinglePartNeeded ? State->UF - 1 : Part);
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}
}

// Add the loop exit condition and branch based on the canonical induction.
auto *CanonicalIV = getCanonicalIV();
// TODO: Model compare and branch explicitly in VPlan as recipes.
auto *Next = State->get(CanonicalIV->getBackedgeValue(), 0);
auto *TermBr = cast<BranchInst>(VectorLatchBB->getTerminator());
State->Builder.SetInsertPoint(TermBr);
auto *ICmp =
State->Builder.CreateICmpEQ(Next, State->get(&getVectorTripCount(), 0));
TermBr->setCondition(ICmp);

// We do not attempt to preserve DT for outer loop vectorization currently.
if (!EnableVPlanNativePath)
updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB,
Expand All @@ -912,6 +970,8 @@ void VPlan::print(raw_ostream &O) const {

O << "VPlan '" << Name << "' {";

assert(VectorTripCount.getNumUsers() == 0 &&
"should not be used yet in VPlan");
if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
O << "\nLive-in ";
BackedgeTakenCount->printAsOperand(O, SlotTracker);
Expand Down Expand Up @@ -1272,8 +1332,27 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif

void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
Value *Start = getStartValue()->getLiveInIRValue();
PHINode *EntryPart = PHINode::Create(
Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
EntryPart->addIncoming(Start, State.CFG.VectorPreHeader);
EntryPart->setDebugLoc(DL);
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(this, EntryPart, Part);
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
getVPSingleValue()->printAsOperand(O, SlotTracker);
O << " = CANONICAL-INDUCTION";
}
#endif

void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
Value *CanonicalIV = State.CanonicalIV;
Value *CanonicalIV = State.get(getParent()->getPlan()->getCanonicalIV(), 0);
Type *STy = CanonicalIV->getType();
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
ElementCount VF = State.VF;
Expand Down Expand Up @@ -1514,6 +1593,7 @@ void VPSlotTracker::assignSlots(const VPlan &Plan) {
for (const VPValue *V : Plan.VPExternalDefs)
assignSlot(V);

assignSlot(&Plan.VectorTripCount);
if (Plan.BackedgeTakenCount)
assignSlot(Plan.BackedgeTakenCount);

Expand Down
84 changes: 74 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Expand Up @@ -69,6 +69,9 @@ class VPlanSlp;
/// vectors it is an expression determined at runtime.
Value *getRuntimeVF(IRBuilder<> &B, Type *Ty, ElementCount VF);

/// Return a value for Step multiplied by VF.
Value *createStepForVF(IRBuilder<> &B, Type *Ty, ElementCount VF, int64_t Step);

/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
/// [1, 9) = {1, 2, 4, 8}
Expand Down Expand Up @@ -790,6 +793,8 @@ class VPInstruction : public VPRecipeBase, public VPValue {
SLPLoad,
SLPStore,
ActiveLaneMask,
CanonicalIVIncrement,
CanonicalIVIncrementNUW,
};

private:
Expand Down Expand Up @@ -1074,14 +1079,18 @@ class VPHeaderPHIRecipe : public VPRecipeBase, public VPValue {

/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *B) {
return B->getVPDefID() == VPRecipeBase::VPWidenPHISC ||
return B->getVPDefID() == VPRecipeBase::VPCanonicalIVPHISC ||
B->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC ||
B->getVPDefID() == VPRecipeBase::VPReductionPHISC;
B->getVPDefID() == VPRecipeBase::VPReductionPHISC ||
B->getVPDefID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
B->getVPDefID() == VPRecipeBase::VPWidenPHISC;
}
static inline bool classof(const VPValue *V) {
return V->getVPValueID() == VPValue::VPVWidenPHISC ||
return V->getVPValueID() == VPValue::VPVCanonicalIVPHISC ||
V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC ||
V->getVPValueID() == VPValue::VPVReductionPHISC;
V->getVPValueID() == VPValue::VPVReductionPHISC ||
V->getVPValueID() == VPValue::VPVWidenIntOrFpInductionSC ||
V->getVPValueID() == VPValue::VPVWidenPHISC;
}

/// Generate the phi nodes.
Expand Down Expand Up @@ -1131,6 +1140,9 @@ class VPWidenPHIRecipe : public VPHeaderPHIRecipe {
static inline bool classof(const VPRecipeBase *B) {
return B->getVPDefID() == VPRecipeBase::VPWidenPHISC;
}
static inline bool classof(const VPHeaderPHIRecipe *R) {
return R->getVPDefID() == VPRecipeBase::VPWidenPHISC;
}
static inline bool classof(const VPValue *V) {
return V->getVPValueID() == VPValue::VPVWidenPHISC;
}
Expand Down Expand Up @@ -1169,8 +1181,8 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC;
}
static inline bool classof(const VPWidenPHIRecipe *D) {
return D->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC;
static inline bool classof(const VPHeaderPHIRecipe *R) {
return R->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC;
}
static inline bool classof(const VPValue *V) {
return V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC;
Expand Down Expand Up @@ -1215,12 +1227,12 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPReductionPHISC;
}
static inline bool classof(const VPHeaderPHIRecipe *R) {
return R->getVPDefID() == VPRecipeBase::VPReductionPHISC;
}
static inline bool classof(const VPValue *V) {
return V->getVPValueID() == VPValue::VPVReductionPHISC;
}
static inline bool classof(const VPWidenPHIRecipe *R) {
return R->getVPDefID() == VPRecipeBase::VPReductionPHISC;
}

/// Generate the phi/select nodes.
void execute(VPTransformState &State) override;
Expand Down Expand Up @@ -1618,6 +1630,36 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPValue {
#endif
};

/// Canonical scalar induction phi of the vector loop. Starting at the specified
/// start value (either 0 or the resume value when vectorizing the epilogue
/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
/// canonical induction variable.
class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
DebugLoc DL;

public:
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
: VPHeaderPHIRecipe(VPValue::VPVCanonicalIVPHISC, VPCanonicalIVPHISC,
nullptr, StartV),
DL(DL) {}

~VPCanonicalIVPHIRecipe() override = default;

/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPDef *D) {
return D->getVPDefID() == VPCanonicalIVPHISC;
}

/// Generate the canonical scalar induction phi of the vector loop.
void execute(VPTransformState &State) override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
};

/// A Recipe for widening the canonical induction variable of the vector loop.
class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
public:
Expand Down Expand Up @@ -2137,6 +2179,9 @@ class VPlan {
/// the tail. It equals TripCount - 1.
VPValue *BackedgeTakenCount = nullptr;

/// Represents the vector trip count.
VPValue VectorTripCount;

/// Holds a mapping between Values and their corresponding VPValue inside
/// VPlan.
Value2VPValueTy Value2VPValue;
Expand Down Expand Up @@ -2177,7 +2222,8 @@ class VPlan {
}

/// Prepare the plan for execution, setting up the required live-in values.
void prepareToExecute(Value *TripCount, VPTransformState &State);
void prepareToExecute(Value *TripCount, Value *VectorTripCount,
Value *CanonicalIVStartValue, VPTransformState &State);

/// Generate the IR code for this VPlan.
void execute(struct VPTransformState *State);
Expand Down Expand Up @@ -2205,6 +2251,9 @@ class VPlan {
return BackedgeTakenCount;
}

/// The vector trip count.
VPValue &getVectorTripCount() { return VectorTripCount; }

/// Mark the plan to indicate that using Value2VPValue is not safe any
/// longer, because it may be stale.
void disableValue2VPValue() { Value2VPValueEnabled = false; }
Expand Down Expand Up @@ -2297,6 +2346,21 @@ class VPlan {
return !VPV->getDef() || (RepR && RepR->isUniform());
}

/// Returns the VPRegionBlock of the vector loop.
VPRegionBlock *getVectorLoopRegion() {
return cast<VPRegionBlock>(getEntry());
}

/// Returns the canonical induction recipe of the vector loop.
VPCanonicalIVPHIRecipe *getCanonicalIV() {
VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
if (EntryVPBB->empty()) {
// VPlan native path.
EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
}
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
}

private:
/// Add to the given dominator tree the header block and every new basic block
/// that was created between it and the latch block, inclusive.
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanValue.h
Expand Up @@ -102,6 +102,7 @@ class VPValue {

// Phi-like VPValues. Need to be kept together.
VPVBlendSC,
VPVCanonicalIVPHISC,
VPVFirstOrderRecurrencePHISC,
VPVWidenPHISC,
VPVWidenIntOrFpInductionSC,
Expand Down Expand Up @@ -333,6 +334,7 @@ class VPDef {

// Phi-like recipes. Need to be kept together.
VPBlendSC,
VPCanonicalIVPHISC,
VPFirstOrderRecurrencePHISC,
VPWidenPHISC,
VPWidenIntOrFpInductionSC,
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
Expand Up @@ -163,6 +163,13 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
errs() << "VPlan entry block is not a VPBasicBlock\n";
return false;
}

if (!isa<VPCanonicalIVPHIRecipe>(&*Entry->begin())) {
errs() << "VPlan vector loop header does not start with a "
"VPCanonicalIVPHIRecipe\n";
return false;
}

const VPBasicBlock *Exit = dyn_cast<VPBasicBlock>(TopRegion->getExit());
if (!Exit) {
errs() << "VPlan exit block is not a VPBasicBlock\n";
Expand Down

0 comments on commit 65c4d61

Please sign in to comment.