[VPlan] Insert VPBlendRecipes in post order. NFC#201782
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Luke Lau (lukel97) ChangesAn upcoming PR wants to optimize blend masks by peeking through the contents of other phi nodes. Currently we eagerly convert phis to blends in reverse post order, so switch it to post order so that phis at the bottom can see the phis in their uses. Full diff: https://github.com/llvm/llvm-project/pull/201782.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index 2717b80e2eeaa..b01fef556f8ed 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -69,6 +69,8 @@ class VPPredicator {
return EdgeMaskCache[{Src, Dst}] = Mask;
}
+ DenseMap<const VPBasicBlock *, VPBasicBlock::iterator> InsertPoints;
+
public:
VPPredicator(VPlan &Plan) : VPDT(Plan), VPPDT(Plan) {}
@@ -136,6 +138,10 @@ void VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
// Start inserting after the block's phis, which be replaced by blends later.
Builder.setInsertPoint(VPBB, VPBB->getFirstNonPhi());
+ // Keep track of where in VPBB we are inserting the masks into.
+ scope_exit UpdateInsertPoint(
+ [this, &VPBB]() { InsertPoints[VPBB] = Builder.getInsertPoint(); });
+
// Reuse the mask of the immediate dominator if the VPBB post-dominates the
// immediate dominator.
auto *IDom = VPDT.getNode(VPBB)->getIDom();
@@ -225,6 +231,7 @@ void VPPredicator::createSwitchEdgeMasks(const VPInstruction *SI) {
}
void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
+ Builder.setInsertPoint(VPBB, InsertPoints[VPBB]);
SmallVector<VPPhi *> Phis;
for (VPRecipeBase &R : VPBB->phis())
Phis.push_back(cast<VPPhi>(&R));
@@ -276,10 +283,8 @@ void VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan) {
// Introduce the mask for VPBB, which may introduce needed edge masks, and
// convert all phi recipes of VPBB to blend recipes unless VPBB is the
// header.
- if (VPBB != Header) {
+ if (VPBB != Header)
Predicator.createBlockInMask(VPBB);
- Predicator.convertPhisToBlends(VPBB);
- }
VPValue *BlockMask = Predicator.getBlockInMask(VPBB);
if (!BlockMask)
@@ -292,6 +297,10 @@ void VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan) {
}
}
+ for (VPBlockBase *VPB : reverse(RPOT))
+ if (VPB != Header)
+ Predicator.convertPhisToBlends(cast<VPBasicBlock>(VPB));
+
// Linearize the blocks of the loop into one serial chain.
VPBlockBase *PrevVPBB = nullptr;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
|
|
@llvm/pr-subscribers-vectorizers Author: Luke Lau (lukel97) ChangesAn upcoming PR wants to optimize blend masks by peeking through the contents of other phi nodes. Currently we eagerly convert phis to blends in reverse post order, so switch it to post order so that phis at the bottom can see the phis in their uses. Full diff: https://github.com/llvm/llvm-project/pull/201782.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index 2717b80e2eeaa..b01fef556f8ed 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -69,6 +69,8 @@ class VPPredicator {
return EdgeMaskCache[{Src, Dst}] = Mask;
}
+ DenseMap<const VPBasicBlock *, VPBasicBlock::iterator> InsertPoints;
+
public:
VPPredicator(VPlan &Plan) : VPDT(Plan), VPPDT(Plan) {}
@@ -136,6 +138,10 @@ void VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
// Start inserting after the block's phis, which be replaced by blends later.
Builder.setInsertPoint(VPBB, VPBB->getFirstNonPhi());
+ // Keep track of where in VPBB we are inserting the masks into.
+ scope_exit UpdateInsertPoint(
+ [this, &VPBB]() { InsertPoints[VPBB] = Builder.getInsertPoint(); });
+
// Reuse the mask of the immediate dominator if the VPBB post-dominates the
// immediate dominator.
auto *IDom = VPDT.getNode(VPBB)->getIDom();
@@ -225,6 +231,7 @@ void VPPredicator::createSwitchEdgeMasks(const VPInstruction *SI) {
}
void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
+ Builder.setInsertPoint(VPBB, InsertPoints[VPBB]);
SmallVector<VPPhi *> Phis;
for (VPRecipeBase &R : VPBB->phis())
Phis.push_back(cast<VPPhi>(&R));
@@ -276,10 +283,8 @@ void VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan) {
// Introduce the mask for VPBB, which may introduce needed edge masks, and
// convert all phi recipes of VPBB to blend recipes unless VPBB is the
// header.
- if (VPBB != Header) {
+ if (VPBB != Header)
Predicator.createBlockInMask(VPBB);
- Predicator.convertPhisToBlends(VPBB);
- }
VPValue *BlockMask = Predicator.getBlockInMask(VPBB);
if (!BlockMask)
@@ -292,6 +297,10 @@ void VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan) {
}
}
+ for (VPBlockBase *VPB : reverse(RPOT))
+ if (VPB != Header)
+ Predicator.convertPhisToBlends(cast<VPBasicBlock>(VPB));
+
// Linearize the blocks of the loop into one serial chain.
VPBlockBase *PrevVPBB = nullptr;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
|
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
|
|
||
| // Keep track of where in VPBB we are inserting the masks into. | ||
| scope_exit UpdateInsertPoint( | ||
| [this, &VPBB]() { InsertPoints[VPBB] = Builder.getInsertPoint(); }); |
There was a problem hiding this comment.
If there is only one mask per block, is it worth asserting that InsertPoints[VPBB] is null before writing to it?
| return EdgeMaskCache[{Src, Dst}] = Mask; | ||
| } | ||
|
|
||
| DenseMap<const VPBasicBlock *, VPBasicBlock::iterator> InsertPoints; |
There was a problem hiding this comment.
Does this actually need to live here? If the only use case is in introduceMasksAndLinearize perhaps you can just declare it as a local variable in that function? You can change VPPredicator::createBlockInMask to return the insert point and use the return value to update the local map?
There was a problem hiding this comment.
#201783 eventually needs to read from the map from within VPPredicator::createMaskDisjunction here which is why I made it a field, to avoid having to plumb it all the way through 375dd0d#diff-9b8a0ea145b3fc89b1f906256d0cbc4e3a30d09ff271ad4341b2e2ce519de8e2R344-R347
fhahn
left a comment
There was a problem hiding this comment.
I think at least on the VPlan-level, there may be some changes with the patch.
Something like below in the predicator.ll VPlan test should have the generated blends changed. For this one, everything gets folded and there's no IR change, not sure if there's any way to construct a test case with end-to-end changes, but may be god to add the test.
define void @blend_chain_non_trivial(ptr noalias %a, ptr noalias %b) {
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%lb = load i64, ptr %b
%v1 = add i64 %iv, %lb
%v2 = mul i64 %iv, 3
%gep = getelementptr i64, ptr %a, i64 %iv
%c0 = icmp sle i64 %iv, 0
br i1 %c0, label %if.a, label %merge.a
if.a:
%ca = icmp sle i64 %iv, 8
br i1 %ca, label %if.a.inner, label %merge.a.inner
if.a.inner:
br label %merge.a.inner
merge.a.inner:
%blend.a.inner = phi i64 [ %v1, %if.a ], [ %v1, %if.a.inner ]
br label %merge.a
merge.a:
%blend.a = phi i64 [ %v1, %loop.header ], [ %blend.a.inner, %merge.a.inner ]
%d0 = icmp sgt i64 %iv, 0
br i1 %d0, label %if.b, label %merge.b
if.b:
%cb = icmp sle i64 %iv, 16
br i1 %cb, label %if.b.inner, label %merge.b.inner
if.b.inner:
br label %merge.b.inner
merge.b.inner:
%blend.b.inner = phi i64 [ %v2, %if.b ], [ %v2, %if.b.inner ]
br label %merge.b
merge.b:
%blend.b = phi i64 [ %v2, %merge.a ], [ %blend.b.inner, %merge.b.inner ]
%sum = add i64 %blend.a, %blend.b
store i64 %sum, ptr %gep
br label %loop.latch
loop.latch:
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, 128
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
| scope_exit UpdateInsertPoint([this, &VPBB]() { | ||
| assert(!InsertPoints.contains(VPBB) && "InsertPoint clobbered?"); | ||
| InsertPoints[VPBB] = Builder.getInsertPoint(); | ||
| }); |
There was a problem hiding this comment.
do we need the scope_exit? I think we don't reset the inert point, and only insert before, so it should not change
There was a problem hiding this comment.
We don't reset the insert point but it changes when new edge masks or block in-masks are created. Although I think we can just recalculate the insert point based off of the block-in mask. I've removed the InsertPoints map in 73427ee
e6431bc to
73427ee
Compare
For the cases where all the incoming values are equal, yeah there should be no end-to-end change because #201783 will restore the VPlan-level changes at it will be able to see when all incoming values are equal through nested phis. Added the test in f07305e |
|
I guess this is fine for now, so I definitely wouldn't want to block it, but I have a feeling RPOT would be better if we ever try to preserve some "uniform" branches as that would require placing SSA-phis sometimes. |
#201783 wants to optimize blend masks by peeking through the contents of other phi nodes. Currently we eagerly convert phis to blends in reverse post order, so switch it to post order so that phis at the bottom can see the phis in their uses.