Skip to content

Commit

Permalink
[LoopInterchange] Fix transformation bugs in loop interchange
Browse files Browse the repository at this point in the history
After loop interchange, the (old) outer loop header should not jump to
the `LoopExit`. Note that the old outer loop becomes the new inner loop
after interchange. If we branched to `LoopExit` then after interchange
we would jump directly from the (new) inner loop header to `LoopExit`
without executing the rest of outer loop.

This patch modifies adjustLoopBranches() such that the old outer
loop header (which becomes the new inner loop header) jumps to the
old inner loop latch which becomes the new outer loop latch after
interchange.

Reviewed By: bmahjour

Differential Revision: https://reviews.llvm.org/D98475
  • Loading branch information
CongzheUalberta committed Apr 8, 2021
1 parent 461b554 commit ce2db90
Show file tree
Hide file tree
Showing 3 changed files with 216 additions and 15 deletions.
19 changes: 9 additions & 10 deletions llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Expand Up @@ -397,10 +397,8 @@ class LoopInterchangeTransform {
public:
LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT,
BasicBlock *LoopNestExit,
const LoopInterchangeLegality &LIL)
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
LoopExit(LoopNestExit), LIL(LIL) {}
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}

/// Interchange OuterLoop and InnerLoop.
bool transform();
Expand All @@ -421,7 +419,6 @@ class LoopInterchangeTransform {

LoopInfo *LI;
DominatorTree *DT;
BasicBlock *LoopExit;

const LoopInterchangeLegality &LIL;
};
Expand Down Expand Up @@ -523,7 +520,7 @@ struct LoopInterchange {
Loop *LoopToBeInterchanged = LoopList[SelecLoopId];
for (unsigned i = SelecLoopId; i > 0; i--) {
bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i,
i - 1, LoopNestExit, DependencyMatrix);
i - 1, DependencyMatrix);
if (!Interchanged)
return Changed;
// Update the DependencyMatrix
Expand All @@ -538,7 +535,7 @@ struct LoopInterchange {
}

bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
unsigned OuterLoopId, BasicBlock *LoopNestExit,
unsigned OuterLoopId,
std::vector<std::vector<char>> &DependencyMatrix) {
LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
<< " and OuterLoopId = " << OuterLoopId << "\n");
Expand All @@ -561,8 +558,7 @@ struct LoopInterchange {
<< "Loop interchanged with enclosing loop.";
});

LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit,
LIL);
LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
LIT.transform();
LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
LoopsInterchanged++;
Expand Down Expand Up @@ -1551,9 +1547,12 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
// The outer loop header might or might not branch to the outer latch.
// We are guaranteed to branch to the inner loop preheader.
if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch))
updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates,
if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
// In this case the outerLoopHeader should branch to the InnerLoopLatch.
updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
DTUpdates,
/*MustUpdateOnce=*/false);
}
updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
InnerLoopHeaderSuccessor, DTUpdates,
/*MustUpdateOnce=*/false);
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
Expand Up @@ -25,12 +25,12 @@ define void @lcssa_08(i32 %n, i32 %m) {
; CHECK-NEXT: [[INDVARS_IV27:%.*]] = phi i64 [ 0, [[OUTER_PREHEADER:%.*]] ], [ [[INDVARS_IV_NEXT28:%.*]], [[OUTER_LATCH:%.*]] ]
; CHECK-NEXT: [[CMP222:%.*]] = icmp sgt i32 [[M:%.*]], 0
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64
; CHECK-NEXT: br i1 [[CMP222]], label [[INNER_FOR_BODY_SPLIT1:%.*]], label [[OUTER_CRIT_EDGE:%.*]]
; CHECK-NEXT: br i1 [[CMP222]], label [[INNER_FOR_BODY_SPLIT1:%.*]], label [[INNER_FOR_BODY_SPLIT:%.*]]
; CHECK: inner.preheader:
; CHECK-NEXT: [[WIDE_TRIP_COUNT29:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: br label [[INNER_FOR_BODY:%.*]]
; CHECK: inner.for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[INNER_PREHEADER]] ], [ [[TMP1:%.*]], [[INNER_FOR_BODY_SPLIT:%.*]] ]
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[INNER_PREHEADER]] ], [ [[TMP1:%.*]], [[INNER_FOR_BODY_SPLIT]] ]
; CHECK-NEXT: br label [[OUTER_PREHEADER]]
; CHECK: inner.for.body.split1:
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [16 x [16 x i32]], [16 x [16 x i32]]* [[TEMP]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV27]]
Expand All @@ -41,10 +41,10 @@ define void @lcssa_08(i32 %n, i32 %m) {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br label [[INNER_CRIT_EDGE:%.*]]
; CHECK: inner.for.body.split:
; CHECK-NEXT: [[WIDE_TRIP_COUNT_LCSSA2:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[OUTER_LATCH]] ]
; CHECK-NEXT: [[WIDE_TRIP_COUNT_LCSSA:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[OUTER_LATCH]] ], [ [[WIDE_TRIP_COUNT]], [[OUTER_HEADER]] ]
; CHECK-NEXT: [[TMP1]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT_LCSSA2]]
; CHECK-NEXT: br i1 [[TMP2]], label [[INNER_FOR_BODY]], label [[OUTER_CRIT_EDGE]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT_LCSSA]]
; CHECK-NEXT: br i1 [[TMP2]], label [[INNER_FOR_BODY]], label [[OUTER_CRIT_EDGE:%.*]]
; CHECK: inner.crit_edge:
; CHECK-NEXT: br label [[OUTER_LATCH]]
; CHECK: outer.latch:
Expand Down
@@ -0,0 +1,202 @@
; RUN: opt -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-loop-lcssa -S %s | FileCheck %s

@b = global [3 x [5 x [8 x i16]]] [[5 x [8 x i16]] zeroinitializer, [5 x [8 x i16]] [[8 x i16] zeroinitializer, [8 x i16] [i16 0, i16 0, i16 0, i16 6, i16 1, i16 6, i16 0, i16 0], [8 x i16] zeroinitializer, [8 x i16] zeroinitializer, [8 x i16] zeroinitializer], [5 x [8 x i16]] zeroinitializer], align 2
@a = common global i32 0, align 4
@d = common dso_local local_unnamed_addr global [1 x [6 x i32]] zeroinitializer, align 4


; Doubly nested loop
;; C test case:
;; int a;
;; short b[3][5][8] = {{}, {{}, 0, 0, 0, 6, 1, 6}};
;; void test1() {
;; int c = 0, d;
;; for (; c <= 2; c++) {
;; if (c)
;; continue;
;; d = 0;
;; for (; d <= 2; d++)
;; a |= b[d][d][c + 5];
;; }
;; }

define void @test1() {
;CHECK-LABEL: @test1(
;CHECK: entry:
;CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]]
;CHECK: for.body.preheader:
;CHECK-NEXT: br label [[FOR_BODY:%.*]]
;CHECK: for.body:
;CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ [[INDVARS_IV_NEXT23:%.*]], [[FOR_INC8:%.*]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
;CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[INDVARS_IV22:%.*]], 0
;CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_BODY3_SPLIT1:%.*]], label [[FOR_BODY3_SPLIT:%.*]]
;CHECK: for.cond1.preheader:
;CHECK-NEXT: br label [[FOR_BODY3:%.*]]
;CHECK: for.body3:
;CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ %3, [[FOR_BODY3_SPLIT]] ]
;CHECK-NEXT: br label [[FOR_BODY_PREHEADER]]
;CHECK: for.body3.split1:
;CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 5
;CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x [5 x [8 x i16]]], [3 x [5 x [8 x i16]]]* @b, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV]], i64 [[TMP0]]
;CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX7]]
;CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
;CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @a
;CHECK-NEXT: [[TMP_OR:%.*]] = or i32 [[TMP2]], [[CONV]]
;CHECK-NEXT: store i32 [[TMP_OR]], i32* @a
;CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
;CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 3
;CHECK-NEXT: br label [[FOR_INC8_LOOPEXIT:%.*]]
;CHECK: for.body3.split:
;CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
;CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 3
;CHECK-NEXT: br i1 %4, label [[FOR_BODY3]], label [[FOR_END10:%.*]]
;CHECK: for.inc8.loopexit:
;CHECK-NEXT: br label [[FOR_INC8]]
;CHECK: for.inc8:
;CHECK-NEXT: [[INDVARS_IV_NEXT23]] = add nuw nsw i64 [[INDVARS_IV22]], 1
;CHECK-NEXT: [[EXITCOND25:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT23]], 3
;CHECK-NEXT: br i1 [[EXITCOND25]], label [[FOR_BODY]], label [[FOR_BODY3_SPLIT]]
;CHECK: for.end10:
;CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* @a
;CHECK-NEXT: ret void

entry:
br label %for.body

for.body: ; preds = %entry, %for.inc8
%indvars.iv22 = phi i64 [ 0, %entry ], [ %indvars.iv.next23, %for.inc8 ]
%tobool = icmp eq i64 %indvars.iv22, 0
br i1 %tobool, label %for.cond1.preheader, label %for.inc8

for.cond1.preheader: ; preds = %for.body
br label %for.body3

for.body3: ; preds = %for.cond1.preheader, %for.body3
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
%0 = add nuw nsw i64 %indvars.iv22, 5
%arrayidx7 = getelementptr inbounds [3 x [5 x [8 x i16]]], [3 x [5 x [8 x i16]]]* @b, i64 0, i64 %indvars.iv, i64 %indvars.iv, i64 %0
%1 = load i16, i16* %arrayidx7
%conv = sext i16 %1 to i32
%2 = load i32, i32* @a
%or = or i32 %2, %conv
store i32 %or, i32* @a
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 3
br i1 %exitcond, label %for.body3, label %for.inc8.loopexit

for.inc8.loopexit: ; preds = %for.body3
br label %for.inc8

for.inc8: ; preds = %for.inc8.loopexit, %for.body
%indvars.iv.next23 = add nuw nsw i64 %indvars.iv22, 1
%exitcond25 = icmp ne i64 %indvars.iv.next23, 3
br i1 %exitcond25, label %for.body, label %for.end10

for.end10: ; preds = %for.inc8
%3 = load i32, i32* @a
ret void
}

; Triply nested loop
; The innermost and the middle loop are interchanged.
; C test case:
;; a;
;; d[][6];
;; void test2() {
;; int g = 10;
;; for (; g; g = g - 5) {
;; short c = 4;
;; for (; c; c--) {
;; int i = 4;
;; for (; i; i--) {
;; if (a)
;; break;
;; d[i][c] = 0;
;; }
;; }
;; }
;; }

define void @test2() {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[OUTERMOST_HEADER:%.*]]
; CHECK: outermost.header:
; CHECK-NEXT: [[INDVAR_OUTERMOST:%.*]] = phi i32 [ 10, [[ENTRY:%.*]] ], [ [[INDVAR_OUTERMOST_NEXT:%.*]], [[OUTERMOST_LATCH:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4
; CHECK-NEXT: [[TOBOOL71_I:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br label [[INNERMOST_PREHEADER:%.*]]
; CHECK: middle.header.preheader:
; CHECK-NEXT: br label [[MIDDLE_HEADER:%.*]]
; CHECK: middle.header:
; CHECK-NEXT: [[INDVAR_MIDDLE:%.*]] = phi i64 [ [[INDVAR_MIDDLE_NEXT:%.*]], [[MIDDLE_LATCH:%.*]] ], [ 4, [[MIDDLE_HEADER_PREHEADER:%.*]] ]
; CHECK-NEXT: br i1 [[TOBOOL71_I]], label [[INNERMOST_BODY_SPLIT1:%.*]], label [[INNERMOST_BODY_SPLIT:%.*]]
; CHECK: innermost.preheader:
; CHECK-NEXT: br label [[INNERMOST_BODY:%.*]]
; CHECK: innermost.body:
; CHECK-NEXT: [[INDVAR_INNERMOST:%.*]] = phi i64 [ [[TMP1:%.*]], [[INNERMOST_BODY_SPLIT]] ], [ 4, [[INNERMOST_PREHEADER]] ]
; CHECK-NEXT: br label [[MIDDLE_HEADER_PREHEADER]]
; CHECK: innermost.body.split1:
; CHECK-NEXT: [[ARRAYIDX9_I:%.*]] = getelementptr inbounds [1 x [6 x i32]], [1 x [6 x i32]]* @d, i64 0, i64 [[INDVAR_INNERMOST]], i64 [[INDVAR_MIDDLE]]
; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX9_I]], align 4
; CHECK-NEXT: [[INDVAR_INNERMOST_NEXT:%.*]] = add nsw i64 [[INDVAR_INNERMOST]], -1
; CHECK-NEXT: [[TOBOOL5_I:%.*]] = icmp eq i64 [[INDVAR_INNERMOST_NEXT]], 0
; CHECK-NEXT: br label [[MIDDLE_LATCH_LOOPEXIT:%.*]]
; CHECK: innermost.body.split:
; CHECK-NEXT: [[TMP1]] = add nsw i64 [[INDVAR_INNERMOST]], -1
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[OUTERMOST_LATCH]], label [[INNERMOST_BODY]]
; CHECK: innermost.loopexit:
; CHECK-NEXT: br label [[MIDDLE_LATCH]]
; CHECK: middle.latch:
; CHECK-NEXT: [[INDVAR_MIDDLE_NEXT]] = add nsw i64 [[INDVAR_MIDDLE]], -1
; CHECK-NEXT: [[TOBOOL2_I:%.*]] = icmp eq i64 [[INDVAR_MIDDLE_NEXT]], 0
; CHECK-NEXT: br i1 [[TOBOOL2_I]], label [[INNERMOST_BODY_SPLIT]], label [[MIDDLE_HEADER]]
; CHECK: outermost.latch:
; CHECK-NEXT: [[INDVAR_OUTERMOST_NEXT]] = add nsw i32 [[INDVAR_OUTERMOST]], -5
; CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp eq i32 [[INDVAR_OUTERMOST_NEXT]], 0
; CHECK-NEXT: br i1 [[TOBOOL_I]], label [[OUTERMOST_EXIT:%.*]], label [[OUTERMOST_HEADER]]
; CHECK: outermost.exit:
; CHECK-NEXT: ret void
;

entry:
br label %outermost.header

outermost.header: ; preds = %outermost.latch, %entry
%indvar.outermost = phi i32 [ 10, %entry ], [ %indvar.outermost.next, %outermost.latch ]
%0 = load i32, i32* @a, align 4
%tobool71.i = icmp eq i32 %0, 0
br label %middle.header

middle.header: ; preds = %middle.latch, %outermost.header
%indvar.middle = phi i64 [ 4, %outermost.header ], [ %indvar.middle.next, %middle.latch ]
br i1 %tobool71.i, label %innermost.preheader, label %middle.latch

innermost.preheader: ; preds = %middle.header
br label %innermost.body

innermost.body: ; preds = %innermost.preheader, %innermost.body
%indvar.innermost = phi i64 [ %indvar.innermost.next, %innermost.body ], [ 4, %innermost.preheader ]
%arrayidx9.i = getelementptr inbounds [1 x [6 x i32]], [1 x [6 x i32]]* @d, i64 0, i64 %indvar.innermost, i64 %indvar.middle
store i32 0, i32* %arrayidx9.i, align 4
%indvar.innermost.next = add nsw i64 %indvar.innermost, -1
%tobool5.i = icmp eq i64 %indvar.innermost.next, 0
br i1 %tobool5.i, label %innermost.loopexit, label %innermost.body

innermost.loopexit: ; preds = %innermost.body
br label %middle.latch

middle.latch: ; preds = %middle.latch.loopexit, %middle.header
%indvar.middle.next = add nsw i64 %indvar.middle, -1
%tobool2.i = icmp eq i64 %indvar.middle.next, 0
br i1 %tobool2.i, label %outermost.latch, label %middle.header

outermost.latch: ; preds = %middle.latch
%indvar.outermost.next = add nsw i32 %indvar.outermost, -5
%tobool.i = icmp eq i32 %indvar.outermost.next, 0
br i1 %tobool.i, label %outermost.exit, label %outermost.header

outermost.exit: ; preds = %outermost.latch
ret void
}

0 comments on commit ce2db90

Please sign in to comment.