Skip to content

Commit

Permalink
Revert "[VPlan] Introduce recipe to build scalar steps."
Browse files Browse the repository at this point in the history
This reverts commit 49b23f4.

This appears to break some PPC build bots. Revert while I investigate.
  • Loading branch information
fhahn committed Feb 27, 2022
1 parent 33ce97f commit ff93260
Show file tree
Hide file tree
Showing 21 changed files with 141 additions and 312 deletions.
103 changes: 32 additions & 71 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -2642,7 +2642,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(
TruncInst *Trunc = Def->getTruncInst();
IRBuilderBase &Builder = State.Builder;
assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
assert(State.VF.isVector() && "must have vector VF");
assert(!State.VF.isZero() && "VF must be non-zero");

// The value from the original loop to which we are mapping the new induction
// variable.
Expand Down Expand Up @@ -2695,11 +2695,37 @@ void InnerLoopVectorizer::widenIntOrFpInduction(

// Now do the actual transformations, and start with creating the step value.
Value *Step = CreateStepValue(ID.getStep());
if (State.VF.isScalar()) {
Value *ScalarIV = CreateScalarIV(Step);
Type *ScalarTy = IntegerType::get(ScalarIV->getContext(),
Step->getType()->getScalarSizeInBits());

Instruction::BinaryOps IncOp = ID.getInductionOpcode();
if (IncOp == Instruction::BinaryOpsEnd)
IncOp = Instruction::Add;
for (unsigned Part = 0; Part < UF; ++Part) {
Value *StartIdx = ConstantInt::get(ScalarTy, Part);
Instruction::BinaryOps MulOp = Instruction::Mul;
if (Step->getType()->isFloatingPointTy()) {
StartIdx = Builder.CreateUIToFP(StartIdx, Step->getType());
MulOp = Instruction::FMul;
}

Value *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
Value *EntryPart = Builder.CreateBinOp(IncOp, ScalarIV, Mul, "induction");
State.set(Def, EntryPart, Part);
if (Trunc) {
assert(!Step->getType()->isFloatingPointTy() &&
"fp inductions shouldn't be truncated");
addMetadata(EntryPart, Trunc);
}
}
return;
}

// Create a new independent vector induction variable. Later VPlan2VPlan
// optimizations will remove it, if it won't be needed, e.g. because all users
// of it access scalar values.
createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);
// Create a new independent vector induction variable, if one is needed.
if (Def->needsVectorIV())
createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);

if (Def->needsScalarIV()) {
// Create scalar steps that can be used by instructions we will later
Expand Down Expand Up @@ -9302,7 +9328,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// in ways that accessing values using original IR values is incorrect.
Plan->disableValue2VPValue();

VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE());
VPlanTransforms::sinkScalarOperands(*Plan);
VPlanTransforms::mergeReplicateRegions(*Plan);
VPlanTransforms::removeDeadRecipes(*Plan, *OrigLoop);
Expand Down Expand Up @@ -9729,69 +9754,6 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
State.ILV->widenIntOrFpInduction(IV, this, State, CanonicalIV);
}

void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "VPScalarIVStepsRecipe being replicated.");

// Fast-math-flags propagate from the original induction instruction.
IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
if (IndDesc.getInductionBinOp() &&
isa<FPMathOperator>(IndDesc.getInductionBinOp()))
State.Builder.setFastMathFlags(
IndDesc.getInductionBinOp()->getFastMathFlags());

Value *Step = State.get(getStepValue(), VPIteration(0, 0));
auto *Trunc = dyn_cast<TruncInst>(getUnderlyingValue());
auto CreateScalarIV = [&](Value *&Step) -> Value * {
Value *ScalarIV = State.get(getCanonicalIV(), VPIteration(0, 0));
auto *CanonicalIV = State.get(getParent()->getPlan()->getCanonicalIV(), 0);
if (!isCanonical() || CanonicalIV->getType() != IV->getType()) {
ScalarIV = IV->getType()->isIntegerTy()
? State.Builder.CreateSExtOrTrunc(ScalarIV, IV->getType())
: State.Builder.CreateCast(Instruction::SIToFP, ScalarIV,
IV->getType());
ScalarIV = emitTransformedIndex(State.Builder, ScalarIV,
getStartValue()->getLiveInIRValue(), Step,
IndDesc);
ScalarIV->setName("offset.idx");
}
if (Trunc) {
auto *TruncType = cast<IntegerType>(Trunc->getType());
assert(Step->getType()->isIntegerTy() &&
"Truncation requires an integer step");
ScalarIV = State.Builder.CreateTrunc(ScalarIV, TruncType);
Step = State.Builder.CreateTrunc(Step, TruncType);
}
return ScalarIV;
};

Value *ScalarIV = CreateScalarIV(Step);
if (State.VF.isVector()) {
buildScalarSteps(ScalarIV, Step, IV, IndDesc, this, State);
return;
}

for (unsigned Part = 0; Part < State.UF; ++Part) {
assert(!State.VF.isScalable() && "scalable vectors not yet supported.");
Value *EntryPart;
if (Step->getType()->isFloatingPointTy()) {
Value *StartIdx =
getRuntimeVFAsFloat(State.Builder, Step->getType(), State.VF * Part);
// Floating-point operations inherit FMF via the builder's flags.
Value *MulOp = State.Builder.CreateFMul(StartIdx, Step);
EntryPart = State.Builder.CreateBinOp(IndDesc.getInductionOpcode(),
ScalarIV, MulOp);
} else {
Value *StartIdx =
getRuntimeVF(State.Builder, Step->getType(), State.VF * Part);
EntryPart = State.Builder.CreateAdd(
ScalarIV, State.Builder.CreateMul(StartIdx, Step), "induction");
}
State.set(this, EntryPart, Part);
if (Trunc)
State.ILV->addMetadata(EntryPart, Trunc);
}
}

void VPWidenPHIRecipe::execute(VPTransformState &State) {
State.ILV->widenPHIInstruction(cast<PHINode>(getUnderlyingValue()), this,
State);
Expand Down Expand Up @@ -10199,8 +10161,7 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part) {
// Check if there is a scalar value for the selected lane.
if (!hasScalarValue(Def, {Part, LastLane})) {
// At the moment, VPWidenIntOrFpInductionRecipes can also be uniform.
assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDef()) ||
isa<VPScalarIVStepsRecipe>(Def->getDef())) &&
assert(isa<VPWidenIntOrFpInductionRecipe>(Def->getDef()) &&
"unexpected recipe found to be invariant");
IsUniform = true;
LastLane = 0;
Expand Down
45 changes: 5 additions & 40 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Expand Up @@ -583,8 +583,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPWidenSC:
case VPWidenGEPSC:
case VPReductionSC:
case VPWidenSelectSC:
case VPScalarIVStepsSC: {
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
Expand All @@ -609,14 +608,6 @@ void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
Parent->getRecipeList().insert(InsertPos->getIterator(), this);
}

void VPRecipeBase::insertBefore(VPBasicBlock &BB,
iplist<VPRecipeBase>::iterator I) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(I == BB.end() || I->getParent() == &BB);
Parent = &BB;
BB.getRecipeList().insert(I, this);
}

void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(InsertPos->getParent() &&
Expand All @@ -643,8 +634,10 @@ void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {

void VPRecipeBase::moveBefore(VPBasicBlock &BB,
iplist<VPRecipeBase>::iterator I) {
assert(I == BB.end() || I->getParent() == &BB);
removeFromParent();
insertBefore(BB, I);
Parent = &BB;
BB.getRecipeList().insert(I, this);
}

void VPInstruction::generateInstruction(VPTransformState &State,
Expand Down Expand Up @@ -882,16 +875,13 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
auto *IV = getCanonicalIV();
assert(all_of(IV->users(),
[](const VPUser *U) {
if (isa<VPScalarIVStepsRecipe>(U))
return true;
auto *VPI = cast<VPInstruction>(U);
return VPI->getOpcode() ==
VPInstruction::CanonicalIVIncrement ||
VPI->getOpcode() ==
VPInstruction::CanonicalIVIncrementNUW;
}) &&
"the canonical IV should only be used by its increments or "
"ScalarIVSteps when "
"the canonical IV should only be used by its increments when "
"resetting the start value");
IV->setOperand(0, VPV);
}
Expand Down Expand Up @@ -1282,32 +1272,7 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
return StartC && StartC->isZero() && StepC && StepC->isOne();
}

VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
}

bool VPScalarIVStepsRecipe::isCanonical() const {
auto *CanIV = getCanonicalIV();
// The start value of the steps-recipe must match the start value of the
// canonical induction and it must step by 1.
if (CanIV->getStartValue() != getStartValue())
return false;
auto *StepVPV = getStepValue();
if (StepVPV->getDef())
return false;
auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
return StepC && StepC->isOne();
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent;
printAsOperand(O, SlotTracker);
O << Indent << "= SCALAR-STEPS ";
printOperands(O, SlotTracker);
}

void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-GEP ";
Expand Down
58 changes: 0 additions & 58 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Expand Up @@ -700,9 +700,6 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
/// Insert an unlinked recipe into a basic block immediately before
/// the specified recipe.
void insertBefore(VPRecipeBase *InsertPos);
/// Insert an unlinked recipe into \p BB immediately before the insertion
/// point \p IP;
void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);

/// Insert an unlinked Recipe into a basic block immediately after
/// the specified Recipe.
Expand Down Expand Up @@ -1106,8 +1103,6 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
}

PHINode *getPHINode() { return IV; }

/// Returns the induction descriptor for the recipe.
const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }

Expand Down Expand Up @@ -1774,12 +1769,6 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
static inline bool classof(const VPDef *D) {
return D->getVPDefID() == VPCanonicalIVPHISC;
}
static inline bool classof(const VPHeaderPHIRecipe *D) {
return D->getVPDefID() == VPCanonicalIVPHISC;
}
static inline bool classof(const VPValue *V) {
return V->getVPValueID() == VPValue::VPVCanonicalIVPHISC;
}

/// Generate the canonical scalar induction phi of the vector loop.
void execute(VPTransformState &State) override;
Expand Down Expand Up @@ -1845,53 +1834,6 @@ class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
}
};

/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their scalar values.
class VPScalarIVStepsRecipe : public VPRecipeBase, public VPValue {
PHINode *IV;
const InductionDescriptor &IndDesc;

public:
VPScalarIVStepsRecipe(PHINode *IV, const InductionDescriptor &IndDesc,
VPValue *CanonicalIV, VPValue *Start, VPValue *Step,
Instruction *Trunc)
: VPRecipeBase(VPScalarIVStepsSC, {CanonicalIV, Start, Step}),
VPValue(Trunc ? Trunc : IV, this), IV(IV), IndDesc(IndDesc) {}

~VPScalarIVStepsRecipe() override = default;

/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPDef *D) {
return D->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC;
}
/// Extra classof implementations to allow directly casting from VPUser ->
/// VPScalarIVStepsRecipe.
static inline bool classof(const VPUser *U) {
auto *R = dyn_cast<VPRecipeBase>(U);
return R && R->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC;
}
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC;
}

/// Generate the scalarized versions of the phi node as needed by their users.
void execute(VPTransformState &State) override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif

/// Returns true if the induction is canonical, i.e. starting at 0 and
/// incremented by UF * VF (= the original IV is incremented by 1).
bool isCanonical() const;

VPCanonicalIVPHIRecipe *getCanonicalIV() const;
VPValue *getStartValue() const { return getOperand(1); }
VPValue *getStepValue() const { return getOperand(2); }
};

/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
Expand Down
36 changes: 0 additions & 36 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Expand Up @@ -378,39 +378,3 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan, Loop &OrigLoop) {
R.eraseFromParent();
}
}

void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
SmallVector<VPRecipeBase *> ToRemove;
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
if (!IV || IV->needsVectorIV())
continue;

const InductionDescriptor &ID = IV->getInductionDescriptor();
const SCEV *StepSCEV = ID.getStep();
VPValue *Step = nullptr;
if (auto *E = dyn_cast<SCEVConstant>(StepSCEV)) {
Step = new VPValue(E->getValue());
Plan.addExternalDef(Step);
} else if (auto *E = dyn_cast<SCEVUnknown>(StepSCEV)) {
Step = new VPValue(E->getValue());
Plan.addExternalDef(Step);
} else {
Step = new VPExpandSCEVRecipe(StepSCEV, SE);
}

VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(
IV->getPHINode(), ID, Plan.getCanonicalIV(), IV->getStartValue(), Step,
IV->getTruncInst());

HeaderVPBB->insert(Steps, HeaderVPBB->getFirstNonPhi());
if (Step->getDef()) {
// TODO: Place the step in the preheader, once it is explicitly modeled in
// VPlan.
HeaderVPBB->insert(cast<VPRecipeBase>(Step->getDef()),
HeaderVPBB->getFirstNonPhi());
}
IV->replaceAllUsesWith(Steps);
}
}
4 changes: 0 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Expand Up @@ -54,10 +54,6 @@ struct VPlanTransforms {
/// Try to remove dead recipes. At the moment, only dead header recipes are
/// removed.
static void removeDeadRecipes(VPlan &Plan, Loop &OrigLoop);

// If all users of a vector IV need scalar values, provide them by building
// scalar steps off of the canonical scalar IV, and remove the vector IV.
static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE);
};

} // namespace llvm
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanValue.h
Expand Up @@ -332,7 +332,6 @@ class VPDef {
VPInterleaveSC,
VPReductionSC,
VPReplicateSC,
VPScalarIVStepsSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenGEPSC,
Expand Down
Expand Up @@ -34,7 +34,7 @@ define void @outside_user_blocks_tail_folding(i8* nocapture readonly %ptr, i32 %
; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], <16 x i8>* [[TMP6]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
Expand All @@ -50,7 +50,7 @@ define void @outside_user_blocks_tail_folding(i8* nocapture readonly %ptr, i32 %
; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
; CHECK-NEXT: store i8 [[TMP8]], i8* [[BUFF]], align 1
; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0
; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop !2
; CHECK: end:
; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END2]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4
Expand Down

0 comments on commit ff93260

Please sign in to comment.