Skip to content

Commit

Permalink
StructurizeCFG: Set Undef for non-predecessors in setPhiValues()
Browse files Browse the repository at this point in the history
During structurization process, we may place non-predecessor blocks
between the predecessors of a block in the structurized CFG. Take
the typical while-break case as an example:
```
 /---A(v=...)
 |  / \
 ^ B   C
 |  \ /|
 \---L |
     \ /
      E (r = phi (v:C)...)
```
After structurization, the CFG would be look like:
```
 /---A
 |   |\
 |   | C
 |   |/
 |   F1
 ^   |\
 |   | B
 |   |/
 |   F2
 |   |\
 |   | L
 \   |/
  \--F3
     |
     E
```
We can see that block B is placed between the predecessors(C/L) of E.
During phi reconstruction, to achieve the same sematics as before, we
are reconstructing the PHIs as:
  F1: v1 = phi (v:C), (undef:A)
  F3: r = phi (v1:F2), ...
But this is also saying that `v1` would be live through B, which is not
quite necessary. The idea in the change is to say the incoming value
from B is Undef for the PHI in E. With this change, the reconstructed
PHI would be:
  F1: v1 = phi (v:C), (undef:A)
  F2: v2 = phi (v1:F1), (undef:B)
  F3: r  = phi (v2:F2), ...

Reviewed by: sameerds

Differential Revision: https://reviews.llvm.org/D132450
  • Loading branch information
ruiling committed Sep 26, 2022
1 parent 40e9284 commit a5676a3
Show file tree
Hide file tree
Showing 6 changed files with 252 additions and 186 deletions.
95 changes: 90 additions & 5 deletions llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
Expand Down Expand Up @@ -246,6 +247,7 @@ class StructurizeCFG {

SmallVector<RegionNode *, 8> Order;
BBSet Visited;
BBSet FlowSet;

SmallVector<WeakVH, 8> AffectedPhis;
BBPhiMap DeletedPhis;
Expand Down Expand Up @@ -278,6 +280,9 @@ class StructurizeCFG {

void addPhiValues(BasicBlock *From, BasicBlock *To);

void findUndefBlocks(BasicBlock *PHIBlock,
const SmallSet<BasicBlock *, 8> &Incomings,
SmallVector<BasicBlock *> &UndefBlks) const;
void setPhiValues();

void simplifyAffectedPhis();
Expand Down Expand Up @@ -632,6 +637,67 @@ void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
AddedPhis[To].push_back(From);
}

/// When we are reconstructing a PHI inside \p PHIBlock with incoming values
/// from predecessors \p Incomings, we have a chance to mark the available value
/// from some blocks as undefined. The function will find out all such blocks
/// and return in \p UndefBlks.
void StructurizeCFG::findUndefBlocks(
BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings,
SmallVector<BasicBlock *> &UndefBlks) const {
// We may get a post-structured CFG like below:
//
// | P1
// |/
// F1
// |\
// | N
// |/
// F2
// |\
// | P2
// |/
// F3
// |\
// B
//
// B is the block that has a PHI being reconstructed. P1/P2 are predecessors
// of B before structurization. F1/F2/F3 are flow blocks inserted during
// structurization process. Block N is not a predecessor of B before
// structurization, but are placed between the predecessors(P1/P2) of B after
// structurization. This usually means that threads went to N never take the
// path N->F2->F3->B. For example, the threads take the branch F1->N may
// always take the branch F2->P2. So, when we are reconstructing a PHI
// originally in B, we can safely say the incoming value from N is undefined.
SmallSet<BasicBlock *, 8> VisitedBlock;
SmallVector<BasicBlock *, 8> Stack;
if (PHIBlock == ParentRegion->getExit()) {
for (auto P : predecessors(PHIBlock)) {
if (ParentRegion->contains(P))
Stack.push_back(P);
}
} else {
append_range(Stack, predecessors(PHIBlock));
}

// Do a backward traversal over the CFG, and stop further searching if
// the block is not a Flow. If a block is neither flow block nor the
// incoming predecessor, then the incoming value from the block is
// undefined value for the PHI being reconstructed.
while (!Stack.empty()) {
BasicBlock *Current = Stack.pop_back_val();
if (VisitedBlock.contains(Current))
continue;

VisitedBlock.insert(Current);
if (FlowSet.contains(Current)) {
for (auto P : predecessors(Current))
Stack.push_back(P);
} else if (!Incomings.contains(Current)) {
UndefBlks.push_back(Current);
}
}
}

/// Add the real PHI value as soon as everything is set up
void StructurizeCFG::setPhiValues() {
SmallVector<PHINode *, 8> InsertedPhis;
Expand All @@ -643,6 +709,8 @@ void StructurizeCFG::setPhiValues() {
if (!DeletedPhis.count(To))
continue;

SmallVector<BasicBlock *> UndefBlks;
bool CachedUndefs = false;
PhiMap &Map = DeletedPhis[To];
for (const auto &PI : Map) {
PHINode *Phi = PI.first;
Expand All @@ -651,15 +719,30 @@ void StructurizeCFG::setPhiValues() {
Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
Updater.AddAvailableValue(To, Undef);

NearestCommonDominator Dominator(DT);
Dominator.addBlock(To);
SmallSet<BasicBlock *, 8> Incomings;
SmallVector<BasicBlock *> ConstantPreds;
for (const auto &VI : PI.second) {
Incomings.insert(VI.first);
Updater.AddAvailableValue(VI.first, VI.second);
Dominator.addAndRememberBlock(VI.first);
if (isa<Constant>(VI.second))
ConstantPreds.push_back(VI.first);
}

if (!Dominator.resultIsRememberedBlock())
Updater.AddAvailableValue(Dominator.result(), Undef);
if (!CachedUndefs) {
findUndefBlocks(To, Incomings, UndefBlks);
CachedUndefs = true;
}

for (auto UB : UndefBlks) {
// If this undef block is dominated by any predecessor(before
// structurization) of reconstructed PHI with constant incoming value,
// don't mark the available value as undefined. Setting undef to such
// block will stop us from getting optimal phi insertion.
if (any_of(ConstantPreds,
[&](BasicBlock *CP) { return DT->dominates(CP, UB); }))
continue;
Updater.AddAvailableValue(UB, Undef);
}

for (BasicBlock *FI : From)
Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
Expand Down Expand Up @@ -759,6 +842,7 @@ BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
Order.back()->getEntry();
BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
Func, Insert);
FlowSet.insert(Flow);
DT->addNewBlock(Flow, Dominator);
ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
return Flow;
Expand Down Expand Up @@ -1103,6 +1187,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
Loops.clear();
LoopPreds.clear();
LoopConds.clear();
FlowSet.clear();

return true;
}
Expand Down
44 changes: 21 additions & 23 deletions llvm/test/CodeGen/AMDGPU/multilevel-break.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,32 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
; OPT-NEXT: main_body:
; OPT-NEXT: br label [[LOOP_OUTER:%.*]]
; OPT: LOOP.outer:
; OPT-NEXT: [[PHI_BROKEN2:%.*]] = phi i64 [ [[TMP10:%.*]], [[FLOW1:%.*]] ], [ 0, [[MAIN_BODY:%.*]] ]
; OPT-NEXT: [[TMP43:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP4:%.*]], [[FLOW1]] ]
; OPT-NEXT: [[PHI_BROKEN2:%.*]] = phi i64 [ [[TMP8:%.*]], [[FLOW1:%.*]] ], [ 0, [[MAIN_BODY:%.*]] ]
; OPT-NEXT: [[TMP43:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP3:%.*]], [[FLOW1]] ]
; OPT-NEXT: br label [[LOOP:%.*]]
; OPT: LOOP:
; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP8:%.*]], [[FLOW:%.*]] ], [ 0, [[LOOP_OUTER]] ]
; OPT-NEXT: [[TMP0:%.*]] = phi i32 [ undef, [[LOOP_OUTER]] ], [ [[TMP4]], [[FLOW]] ]
; OPT-NEXT: [[TMP45:%.*]] = phi i32 [ [[TMP43]], [[LOOP_OUTER]] ], [ [[TMP5:%.*]], [[FLOW]] ]
; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP6:%.*]], [[FLOW:%.*]] ], [ 0, [[LOOP_OUTER]] ]
; OPT-NEXT: [[TMP45:%.*]] = phi i32 [ [[TMP43]], [[LOOP_OUTER]] ], [ [[TMP3]], [[FLOW]] ]
; OPT-NEXT: [[TMP48:%.*]] = icmp slt i32 [[TMP45]], [[UB:%.*]]
; OPT-NEXT: [[TMP1:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP48]])
; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP1]], 0
; OPT-NEXT: [[TMP3:%.*]] = extractvalue { i1, i64 } [[TMP1]], 1
; OPT-NEXT: br i1 [[TMP2]], label [[ENDIF:%.*]], label [[FLOW]]
; OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP48]])
; OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0
; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1
; OPT-NEXT: br i1 [[TMP1]], label [[ENDIF:%.*]], label [[FLOW]]
; OPT: Flow:
; OPT-NEXT: [[TMP4]] = phi i32 [ [[TMP47:%.*]], [[ENDIF]] ], [ [[TMP0]], [[LOOP]] ]
; OPT-NEXT: [[TMP5]] = phi i32 [ [[TMP47]], [[ENDIF]] ], [ undef, [[LOOP]] ]
; OPT-NEXT: [[TMP6:%.*]] = phi i1 [ [[TMP51:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
; OPT-NEXT: [[TMP7:%.*]] = phi i1 [ [[TMP51_INV:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP3]])
; OPT-NEXT: [[TMP8]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP7]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP9:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP8]])
; OPT-NEXT: [[TMP10]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP6]], i64 [[PHI_BROKEN2]])
; OPT-NEXT: br i1 [[TMP9]], label [[FLOW1]], label [[LOOP]]
; OPT-NEXT: [[TMP3]] = phi i32 [ [[TMP47:%.*]], [[ENDIF]] ], [ undef, [[LOOP]] ]
; OPT-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP51:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
; OPT-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP51_INV:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ]
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
; OPT-NEXT: [[TMP6]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP5]], i64 [[PHI_BROKEN]])
; OPT-NEXT: [[TMP7:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP6]])
; OPT-NEXT: [[TMP8]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN2]])
; OPT-NEXT: br i1 [[TMP7]], label [[FLOW1]], label [[LOOP]]
; OPT: Flow1:
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]])
; OPT-NEXT: [[TMP11:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP10]])
; OPT-NEXT: br i1 [[TMP11]], label [[IF:%.*]], label [[LOOP_OUTER]]
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]])
; OPT-NEXT: [[TMP9:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP8]])
; OPT-NEXT: br i1 [[TMP9]], label [[IF:%.*]], label [[LOOP_OUTER]]
; OPT: IF:
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP10]])
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]])
; OPT-NEXT: ret void
; OPT: ENDIF:
; OPT-NEXT: [[TMP47]] = add i32 [[TMP45]], 1
Expand Down Expand Up @@ -156,7 +154,7 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
; OPT-NEXT: [[CMP2]] = icmp sge i32 [[TMP]], [[LOAD2]]
; OPT-NEXT: br label [[FLOW3]]
; OPT: Flow5:
; OPT-NEXT: [[TMP9]] = phi i32 [ [[LSR_IV_NEXT]], [[CASE0]] ], [ [[TMP6]], [[LEAFBLOCK]] ]
; OPT-NEXT: [[TMP9]] = phi i32 [ [[LSR_IV_NEXT]], [[CASE0]] ], [ undef, [[LEAFBLOCK]] ]
; OPT-NEXT: [[TMP10]] = phi i1 [ [[CMP1]], [[CASE0]] ], [ [[TMP7]], [[LEAFBLOCK]] ]
; OPT-NEXT: br label [[FLOW4]]
; OPT: bb9:
Expand Down
Loading

0 comments on commit a5676a3

Please sign in to comment.