Skip to content

Commit

Permalink
[LoopUnrollAndJam] Visit phi operand dependencies in post-order
Browse files Browse the repository at this point in the history
Fixes #58565

The previous implementation visits operands in pre-order, but this does
not guarantee an instruction is visited before its uses. This can cause
instructions to be copied in the incorrect order. For example:

```
a = ...
b = add a, 1
c = add a, b
d = add b, a
```

Pre-order visits does not guarantee the order in which `a` and `b` are
visited. LoopUnrollAndJam may incorrectly insert `b` before `a`.

This patch implements post-order visits. By visiting dependencies first,
we guarantee that an instruction's dependencies are visited first.

Differential Revision: https://reviews.llvm.org/D140255
  • Loading branch information
caojoshua committed Jan 5, 2023
1 parent 6a930e8 commit 629d880
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 22 deletions.
39 changes: 17 additions & 22 deletions llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
Expand Up @@ -137,25 +137,28 @@ static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
template <typename T>
static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
BasicBlockSet &AftBlocks, T Visit) {
SmallVector<Instruction *, 8> Worklist;
SmallPtrSet<Instruction *, 8> VisitedInstr;
for (auto &Phi : Header->phis()) {
Value *V = Phi.getIncomingValueForBlock(Latch);
if (Instruction *I = dyn_cast<Instruction>(V))
Worklist.push_back(I);
}

while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
if (!Visit(I))
return false;
std::function<bool(Instruction * I)> ProcessInstr = [&](Instruction *I) {
if (VisitedInstr.count(I))
return true;

VisitedInstr.insert(I);

if (AftBlocks.count(I->getParent()))
for (auto &U : I->operands())
if (Instruction *II = dyn_cast<Instruction>(U))
if (!VisitedInstr.count(II))
Worklist.push_back(II);
if (!ProcessInstr(II))
return false;

return Visit(I);
};

for (auto &Phi : Header->phis()) {
Value *V = Phi.getIncomingValueForBlock(Latch);
if (Instruction *I = dyn_cast<Instruction>(V))
if (!ProcessInstr(I))
return false;
}

return true;
Expand All @@ -168,20 +171,12 @@ static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
BasicBlockSet &AftBlocks) {
// We need to ensure we move the instructions in the correct order,
// starting with the earliest required instruction and moving forward.
std::vector<Instruction *> Visited;
processHeaderPhiOperands(Header, Latch, AftBlocks,
[&Visited, &AftBlocks](Instruction *I) {
[&AftBlocks, &InsertLoc](Instruction *I) {
if (AftBlocks.count(I->getParent()))
Visited.push_back(I);
I->moveBefore(InsertLoc);
return true;
});

// Move all instructions in program order to before the InsertLoc
BasicBlock *InsertLocBB = InsertLoc->getParent();
for (Instruction *I : reverse(Visited)) {
if (I->getParent() != InsertLocBB)
I->moveBefore(InsertLoc);
}
}

/*
Expand Down
85 changes: 85 additions & 0 deletions llvm/test/Transforms/LoopUnrollAndJam/dependencies_visit_order.ll
@@ -0,0 +1,85 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s

target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"

define void @test1() {
; CHECK-LABEL: @test1(
; CHECK-NEXT: bb:
; CHECK-NEXT: br i1 false, label [[BB1_BB43_CRIT_EDGE_UNR_LCSSA:%.*]], label [[BB_NEW:%.*]]
; CHECK: bb.new:
; CHECK-NEXT: br label [[BB5_PREHEADER:%.*]]
; CHECK: bb5.preheader:
; CHECK-NEXT: [[I10:%.*]] = phi i16 [ 0, [[BB_NEW]] ], [ [[I42_3:%.*]], [[BB38:%.*]] ]
; CHECK-NEXT: [[NITER:%.*]] = phi i16 [ 0, [[BB_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[BB38]] ]
; CHECK-NEXT: [[I4017:%.*]] = zext i16 [[I10]] to i64
; CHECK-NEXT: [[I13_I:%.*]] = add nuw nsw i64 [[I4017]], 1
; CHECK-NEXT: [[I42:%.*]] = trunc i64 [[I13_I]] to i16
; CHECK-NEXT: [[NITER_NEXT:%.*]] = add nuw nsw i16 [[NITER]], 1
; CHECK-NEXT: [[I4017_1:%.*]] = zext i16 [[I42]] to i64
; CHECK-NEXT: [[I13_I_1:%.*]] = add nuw nsw i64 [[I4017_1]], 1
; CHECK-NEXT: [[I42_1:%.*]] = trunc i64 [[I13_I_1]] to i16
; CHECK-NEXT: [[NITER_NEXT_1:%.*]] = add nuw nsw i16 [[NITER_NEXT]], 1
; CHECK-NEXT: [[I4017_2:%.*]] = zext i16 [[I42_1]] to i64
; CHECK-NEXT: [[I13_I_2:%.*]] = add nuw nsw i64 [[I4017_2]], 1
; CHECK-NEXT: [[I42_2:%.*]] = trunc i64 [[I13_I_2]] to i16
; CHECK-NEXT: [[NITER_NEXT_2:%.*]] = add nuw nsw i16 [[NITER_NEXT_1]], 1
; CHECK-NEXT: [[I4017_3:%.*]] = zext i16 [[I42_2]] to i64
; CHECK-NEXT: [[I13_I_3:%.*]] = add nuw nsw i64 [[I4017_3]], 1
; CHECK-NEXT: [[I42_3]] = trunc i64 [[I13_I_3]] to i16
; CHECK-NEXT: [[NITER_NEXT_3]] = add nuw i16 [[NITER_NEXT_2]], 1
; CHECK-NEXT: br label [[BB10_PREHEADER:%.*]]
; CHECK: bb10.preheader:
; CHECK-NEXT: br i1 true, label [[BB38]], label [[BB10_PREHEADER]]
; CHECK: bb38:
; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i16 [[NITER_NEXT_3]], -28
; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[BB1_BB43_CRIT_EDGE_UNR_LCSSA_LOOPEXIT:%.*]], label [[BB5_PREHEADER]], !llvm.loop [[LOOP1:![0-9]+]]
; CHECK: bb1.bb43_crit_edge.unr-lcssa.loopexit:
; CHECK-NEXT: [[I10_UNR_PH:%.*]] = phi i16 [ [[I42_3]], [[BB38]] ]
; CHECK-NEXT: br label [[BB1_BB43_CRIT_EDGE_UNR_LCSSA]]
; CHECK: bb1.bb43_crit_edge.unr-lcssa:
; CHECK-NEXT: [[I10_UNR:%.*]] = phi i16 [ 0, [[BB:%.*]] ], [ [[I10_UNR_PH]], [[BB1_BB43_CRIT_EDGE_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: br i1 true, label [[BB5_PREHEADER_EPIL_PREHEADER:%.*]], label [[BB1_BB43_CRIT_EDGE:%.*]]
; CHECK: bb5.preheader.epil.preheader:
; CHECK-NEXT: br label [[BB5_PREHEADER_EPIL:%.*]]
; CHECK: bb5.preheader.epil:
; CHECK-NEXT: [[I10_EPIL:%.*]] = phi i16 [ [[I10_UNR]], [[BB5_PREHEADER_EPIL_PREHEADER]] ], [ [[I42_EPIL:%.*]], [[BB38_EPIL:%.*]] ]
; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i16 [ 0, [[BB5_PREHEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[BB38_EPIL]] ]
; CHECK-NEXT: br label [[BB10_PREHEADER_EPIL:%.*]]
; CHECK: bb10.preheader.epil:
; CHECK-NEXT: br i1 true, label [[BB38_EPIL]], label [[BB10_PREHEADER_EPIL]]
; CHECK: bb38.epil:
; CHECK-NEXT: [[I4017_EPIL:%.*]] = zext i16 [[I10_EPIL]] to i64
; CHECK-NEXT: [[I13_I_EPIL:%.*]] = add i64 [[I4017_EPIL]], 1
; CHECK-NEXT: [[I14_I_EPIL:%.*]] = select i1 true, i64 [[I13_I_EPIL]], i64 [[I4017_EPIL]]
; CHECK-NEXT: [[I42_EPIL]] = trunc i64 [[I14_I_EPIL]] to i16
; CHECK-NEXT: [[I3_NOT_EPIL:%.*]] = icmp eq i16 [[I10_EPIL]], -27
; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i16 [[EPIL_ITER]], 1
; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i16 [[EPIL_ITER_NEXT]], 2
; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[BB5_PREHEADER_EPIL]], label [[BB1_BB43_CRIT_EDGE_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: bb1.bb43_crit_edge.epilog-lcssa:
; CHECK-NEXT: br label [[BB1_BB43_CRIT_EDGE]]
; CHECK: bb1.bb43_crit_edge:
; CHECK-NEXT: ret void
;
bb:
br label %bb5.preheader

bb5.preheader: ; preds = %bb38, %bb
%i10 = phi i16 [ 0, %bb ], [ %i42, %bb38 ]
br label %bb10.preheader

bb10.preheader: ; preds = %bb10.preheader, %bb5.preheader
br i1 true, label %bb38, label %bb10.preheader

bb38: ; preds = %bb10.preheader
%i4017 = zext i16 %i10 to i64
%i13.i = add i64 %i4017, 1
%i14.i = select i1 true, i64 %i13.i, i64 %i4017
%i42 = trunc i64 %i14.i to i16
%i3.not = icmp eq i16 %i10, -27
br i1 %i3.not, label %bb1.bb43_crit_edge, label %bb5.preheader

bb1.bb43_crit_edge: ; preds = %bb38
ret void
}

0 comments on commit 629d880

Please sign in to comment.