Skip to content

Commit

Permalink
[VPlan] Initial modeling of middle block in VPlan.
Browse files Browse the repository at this point in the history
This patch extends the scope of VPlan to also include the exit (aka
middle) block.

For now, the exit block remains empty, but handling of exit values will
subsequently be moved to VPlan, by adding recipes to model exit values
in the exit block.

As a first step, this will allow fixing #51366.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D123457
  • Loading branch information
fhahn committed Apr 20, 2022
1 parent ecc8479 commit bea69b2
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 28 deletions.
26 changes: 17 additions & 9 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -480,7 +480,7 @@ class InnerLoopVectorizer {
VPTransformState &State);

/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
void fixVectorizedLoop(VPTransformState &State);
void fixVectorizedLoop(VPTransformState &State, VPlan &Plan);

// Return true if any runtime check is added.
bool areSafetyChecksAdded() { return AddedSafetyChecks; }
Expand Down Expand Up @@ -3646,7 +3646,8 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths(VPTransformState &State) {
}
}

void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
VPlan &Plan) {
// Insert truncates and extends for any truncated instructions as hints to
// InstCombine.
if (VF.isVector())
Expand All @@ -3668,7 +3669,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
// Forget the original basic block.
PSE.getSE()->forgetLoop(OrigLoop);

Loop *VectorLoop = LI->getLoopFor(State.CFG.PrevBB);
VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitBasicBlock();
Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]);
// If we inserted an edge from the middle block to the unique exit block,
// update uses outside the loop (phis) to account for the newly inserted
// edge.
Expand Down Expand Up @@ -3864,8 +3866,10 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
setDebugLocFromInst(LoopExitInst);

Type *PhiTy = OrigPhi->getType();
BasicBlock *VectorLoopLatch =
LI->getLoopFor(State.CFG.PrevBB)->getLoopLatch();

VPBasicBlock *LatchVPBB =
PhiR->getParent()->getEnclosingLoopRegion()->getExitBasicBlock();
BasicBlock *VectorLoopLatch = State.CFG.VPBB2IRBB[LatchVPBB];
// If tail is folded by masking, the vector value to leave the loop should be
// a Select choosing between the vectorized LoopExitInst and vectorized Phi,
// instead of the former. For an inloop reduction the reduction will already
Expand Down Expand Up @@ -7628,7 +7632,9 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupVectorized});

Loop *L = LI->getLoopFor(State.CFG.PrevBB);
VPBasicBlock *HeaderVPBB =
BestVPlan.getVectorLoopRegion()->getEntryBasicBlock();
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
if (VectorizedLoopID.hasValue())
L->setLoopID(VectorizedLoopID.getValue());
else {
Expand All @@ -7646,7 +7652,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,

// 3. Fix the vectorized code: take care of header phi's, live-outs,
// predication, updating analyses.
ILV.fixVectorizedLoop(State);
ILV.fixVectorizedLoop(State, BestVPlan);

ILV.printDebugTracesAtEnd();
}
Expand Down Expand Up @@ -8750,8 +8756,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// ---------------------------------------------------------------------------

// Create initial VPlan skeleton, starting with a block for the pre-header,
// followed by a region for the vector loop. The skeleton vector loop region
// contains a header and latch block.
// followed by a region for the vector loop, followed by the middle block. The
// skeleton vector loop region contains a header and latch block.
VPBasicBlock *Preheader = new VPBasicBlock("vector.ph");
auto Plan = std::make_unique<VPlan>(Preheader);

Expand All @@ -8760,6 +8766,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop");
VPBlockUtils::insertBlockAfter(TopRegion, Preheader);
VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block");
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);

Instruction *DLInst =
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
Expand Down
43 changes: 25 additions & 18 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Expand Up @@ -325,27 +325,33 @@ void VPBasicBlock::execute(VPTransformState *State) {
return R && !R->isReplicator();
};

// 1. Create an IR basic block, or reuse the last one if possible.
// The last IR basic block is reused, as an optimization, in three cases:
// A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
// B. when the current VPBB has a single (hierarchical) predecessor which
// is PrevVPBB and the latter has a single (hierarchical) successor which
// both are in the same non-replicator region; and
// C. when the current VPBB is an entry of a region replica - where PrevVPBB
// is the exit of this region from a previous instance, or the predecessor
// of this region.
if (PrevVPBB && /* A */
!((SingleHPred = getSingleHierarchicalPredecessor()) &&
SingleHPred->getExitBasicBlock() == PrevVPBB &&
PrevVPBB->getSingleHierarchicalSuccessor() &&
(SingleHPred->getParent() == getEnclosingLoopRegion() &&
!IsNonReplicateR(SingleHPred))) && /* B */
!(Replica && getPredecessors().empty())) { /* C */
// 1. Create an IR basic block, or reuse the last one or ExitBB if possible.
if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) {
// ExitBB can be re-used for the exit block of the Plan.
NewBB = State->CFG.ExitBB;
State->CFG.PrevBB = NewBB;
} else if (PrevVPBB && /* A */
!((SingleHPred = getSingleHierarchicalPredecessor()) &&
SingleHPred->getExitBasicBlock() == PrevVPBB &&
PrevVPBB->getSingleHierarchicalSuccessor() &&
(SingleHPred->getParent() == getEnclosingLoopRegion() &&
!IsNonReplicateR(SingleHPred))) && /* B */
!(Replica && getPredecessors().empty())) { /* C */
// The last IR basic block is reused, as an optimization, in three cases:
// A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
// B. when the current VPBB has a single (hierarchical) predecessor which
// is PrevVPBB and the latter has a single (hierarchical) successor which
// both are in the same non-replicator region; and
// C. when the current VPBB is an entry of a region replica - where PrevVPBB
// is the exit of this region from a previous instance, or the
// predecessor of this region.

NewBB = createEmptyBasicBlock(State->CFG);
State->Builder.SetInsertPoint(NewBB);
// Temporarily terminate with unreachable until CFG is rewired.
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
// Register NewBB in its loop. In innermost loops its the same for all BB's.
// Register NewBB in its loop. In innermost loops its the same for all
// BB's.
if (State->CurrentVectorLoop)
State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
State->Builder.SetInsertPoint(Terminator);
Expand Down Expand Up @@ -969,7 +975,8 @@ void VPlan::execute(VPTransformState *State) {
}
}

BasicBlock *VectorLatchBB = State->CFG.PrevBB;
VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitBasicBlock();
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];

// Fix the latch value of canonical, reduction and first-order recurrences
// phis in the vector loop.
Expand Down
Expand Up @@ -72,6 +72,9 @@ define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize {
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down Expand Up @@ -145,6 +148,9 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down Expand Up @@ -215,6 +221,9 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down Expand Up @@ -312,6 +321,9 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8*
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down Expand Up @@ -407,6 +419,9 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down
28 changes: 27 additions & 1 deletion llvm/test/Transforms/LoopVectorize/vplan-printing.ll
Expand Up @@ -27,6 +27,9 @@ define void @print_call_and_memory(i64 %n, float* noalias %y, float* noalias %x)
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down Expand Up @@ -73,6 +76,9 @@ define void @print_widen_gep_and_select(i64 %n, float* noalias %y, float* noalia
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down Expand Up @@ -117,6 +123,9 @@ define float @print_reduction(i64 %n, float* noalias %y) {
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down Expand Up @@ -184,6 +193,9 @@ define void @print_replicate_predicated_phi(i64 %n, i64* %x) {
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down Expand Up @@ -243,6 +255,9 @@ define void @print_interleave_groups(i32 %C, i32 %D) {
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down Expand Up @@ -300,6 +315,11 @@ define float @print_fmuladd_strict(float* %a, float* %b, i64 %n) {
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT:}

entry:
br label %for.body
Expand Down Expand Up @@ -376,7 +396,10 @@ define void @debug_loc_vpinstruction(i32* nocapture %asd, i32* nocapture %bsd) !
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT:}
; CHECK-NEXT:No successors
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT:}
;
entry:
Expand Down Expand Up @@ -435,6 +458,9 @@ define void @print_expand_scev(i64 %y, i8* %ptr) {
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_INC]]> vp<%0>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down
Expand Up @@ -49,6 +49,9 @@ define void @sink_with_sideeffects(i1 %c, i8* %ptr) {
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
Expand Down

0 comments on commit bea69b2

Please sign in to comment.