Skip to content

Commit

Permalink
[LV] Move exit cond simplification to separate transform.
Browse files Browse the repository at this point in the history
This sets the stage for D133017 by moving out the code that performs
VPlan based simplifications to a separate transform that takes the
chosen VF & UF as arguments.

The main advantage is that this transform runs before any changes to
the CFG are being made. This allows using SCEV without worrying about
making queries while the IR is in an incomplete state.

Note that this patch switches the reasoning to use SCEV, but still only
simplifies loops with constant trip counts. Using SCEV here is needed to
access the backedge taken count, because the trip count IR value has not
been created yet.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D135017
  • Loading branch information
fhahn committed Dec 23, 2022
1 parent 07d9ab9 commit e1650c8
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 35 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -7628,6 +7628,9 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
// mutation. See PR49900.
ILV.getOrCreateTripCount(OrigLoop->getLoopPreheader());

if (!IsEpilogueVectorization)
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);

// Perform the actual loop transformation.

// 1. Set up the skeleton for vectorization, including vector pre-header and
Expand Down
34 changes: 0 additions & 34 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Expand Up @@ -585,45 +585,11 @@ VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() {
return nullptr;
}

static bool canSimplifyBranchOnCond(VPInstruction *Term) {
VPInstruction *Not = dyn_cast<VPInstruction>(Term->getOperand(0));
if (!Not || Not->getOpcode() != VPInstruction::Not)
return false;

VPInstruction *ALM = dyn_cast<VPInstruction>(Not->getOperand(0));
return ALM && ALM->getOpcode() == VPInstruction::ActiveLaneMask;
}

void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
Value *CanonicalIVStartValue,
VPTransformState &State,
bool IsEpilogueVectorization) {

VPBasicBlock *ExitingVPBB = getVectorLoopRegion()->getExitingBasicBlock();
auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back());
// Try to simplify the branch condition if TC <= VF * UF when preparing to
// execute the plan for the main vector loop. We only do this if the
// terminator is:
// 1. BranchOnCount, or
// 2. BranchOnCond where the input is Not(ActiveLaneMask).
if (!IsEpilogueVectorization && Term && isa<ConstantInt>(TripCountV) &&
(Term->getOpcode() == VPInstruction::BranchOnCount ||
(Term->getOpcode() == VPInstruction::BranchOnCond &&
canSimplifyBranchOnCond(Term)))) {
ConstantInt *C = cast<ConstantInt>(TripCountV);
uint64_t TCVal = C->getZExtValue();
if (TCVal && TCVal <= State.VF.getKnownMinValue() * State.UF) {
auto *BOC =
new VPInstruction(VPInstruction::BranchOnCond,
{getOrAddExternalDef(State.Builder.getTrue())});
Term->eraseFromParent();
ExitingVPBB->appendRecipe(BOC);
// TODO: Further simplifications are possible
// 1. Replace inductions with constants.
// 2. Replace vector loop region with VPBasicBlock.
}
}

// Check if the trip count is needed, and if so build it.
if (TripCount && TripCount->getNumUsers()) {
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Expand Up @@ -2601,6 +2601,12 @@ class VPlan {

void addVF(ElementCount VF) { VFs.insert(VF); }

void setVF(ElementCount VF) {
assert(hasVF(VF) && "Cannot set VF not already in plan");
VFs.clear();
VFs.insert(VF);
}

bool hasVF(ElementCount VF) { return VFs.count(VF); }

bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
Expand Down
49 changes: 49 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Expand Up @@ -451,3 +451,52 @@ void VPlanTransforms::removeRedundantExpandSCEVRecipes(VPlan &Plan) {
ExpR->eraseFromParent();
}
}

static bool canSimplifyBranchOnCond(VPInstruction *Term) {
VPInstruction *Not = dyn_cast<VPInstruction>(Term->getOperand(0));
if (!Not || Not->getOpcode() != VPInstruction::Not)
return false;

VPInstruction *ALM = dyn_cast<VPInstruction>(Not->getOperand(0));
return ALM && ALM->getOpcode() == VPInstruction::ActiveLaneMask;
}

void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
unsigned BestUF,
PredicatedScalarEvolution &PSE) {
assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
VPBasicBlock *ExitingVPBB =
Plan.getVectorLoopRegion()->getExitingBasicBlock();
auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back());
// Try to simplify the branch condition if TC <= VF * UF when preparing to
// execute the plan for the main vector loop. We only do this if the
// terminator is:
// 1. BranchOnCount, or
// 2. BranchOnCond where the input is Not(ActiveLaneMask).
if (!Term || (Term->getOpcode() != VPInstruction::BranchOnCount &&
(Term->getOpcode() != VPInstruction::BranchOnCond ||
!canSimplifyBranchOnCond(Term))))
return;

Type *IdxTy =
Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType();
const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE);
auto *C = dyn_cast<SCEVConstant>(TripCount);
ScalarEvolution &SE = *PSE.getSE();
if (!C || TripCount->isZero() ||
C->getAPInt().getZExtValue() > BestVF.getKnownMinValue() * BestUF)
return;

LLVMContext &Ctx = SE.getContext();
auto *BOC =
new VPInstruction(VPInstruction::BranchOnCond,
{Plan.getOrAddExternalDef(ConstantInt::getTrue(Ctx))});
Term->eraseFromParent();
ExitingVPBB->appendRecipe(BOC);
Plan.setVF(BestVF);
Plan.setUF(BestUF);
// TODO: Further simplifications are possible
// 1. Replace inductions with constants.
// 2. Replace vector loop region with VPBasicBlock.
}
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Expand Up @@ -23,6 +23,7 @@ class Instruction;
class PHINode;
class ScalarEvolution;
class Loop;
class PredicatedScalarEvolution;
class TargetLibraryInfo;

struct VPlanTransforms {
Expand Down Expand Up @@ -62,6 +63,12 @@ struct VPlanTransforms {
/// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing
/// them with already existing recipes expanding the same SCEV expression.
static void removeRedundantExpandSCEVRecipes(VPlan &Plan);

/// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
/// resulting plan to \p BestVF and \p BestUF.
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
unsigned BestUF,
PredicatedScalarEvolution &PSE);
};

} // namespace llvm
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll
Expand Up @@ -13,7 +13,8 @@ define void @test() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1, 0
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_I_I_I:%.*]], label [[SCALAR_PH]]
Expand Down

0 comments on commit e1650c8

Please sign in to comment.