Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <utility>
#include <vector>
Expand Down Expand Up @@ -1872,6 +1873,51 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
}

/// This deals with a corner case when a LCSSA phi node appears in a non-exit
/// block: the outer loop latch block does not need to be exit block of the
/// inner loop. Consider a loop that was in LCSSA form, but then some
/// transformation like loop-unswitch comes along and creates an empty block,
/// where BB5 in this example is the outer loop latch block:
///
/// BB4:
/// br label %BB5
/// BB5:
/// %old.cond.lcssa = phi i16 [ %cond, %BB4 ]
/// br outer.header
///
/// Interchange then brings it in LCSSA form again resulting in this chain of
/// single-input phi nodes:
///
/// BB4:
/// %new.cond.lcssa = phi i16 [ %cond, %BB3 ]
/// br label %BB5
/// BB5:
/// %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %BB4 ]
///
/// The problem is that interchange can reoder blocks BB4 and BB5 placing the
/// use before the def if we don't check this. The solution is to simplify
/// lcssa phi nodes (remove) if they appear in non-exit blocks.
///
static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop) {
BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();

// Do not modify lcssa phis where they actually belong, i.e. in exit blocks.
if (OuterLoopLatch == InnerLoopExit)
return;

// Collect and remove phis in non-exit blocks if they have 1 input.
SmallVector<PHINode *, 8> Phis(
llvm::make_pointer_range(OuterLoopLatch->phis()));
for (PHINode *Phi : Phis) {
assert(Phi->getNumIncomingValues() == 1 && "Single input phi expected");
LLVM_DEBUG(dbgs() << "Removing 1-input phi in non-exit block: " << *Phi
<< "\n");
Phi->replaceAllUsesWith(Phi->getIncomingValue(0));
Phi->eraseFromParent();
}
}

bool LoopInterchangeTransform::adjustLoopBranches() {
LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
std::vector<DominatorTree::UpdateType> DTUpdates;
Expand All @@ -1882,6 +1928,9 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
InnerLoopPreHeader && "Guaranteed by loop-simplify form");

simplifyLCSSAPhis(OuterLoop, InnerLoop);

Comment on lines +1931 to +1933
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: is this the right place to call this function? I think it might be better to call this from moveLCSSAPhis directly.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was also wondering, and I have played with the place where to put this. But between this place here and moveLCSSAPhis, the CFG is massively rewired and sometimes in an funny state (i.e. it is under construction). I thought about doing this relatively early when the CFG is relatively stable before we start completely turning things around and adding and splitting things, and moving things around, which is here. Thus, I think it is fine here, and overall doesn't matter that much.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought moveLCSSAPhis is calling adjustLoopBranches, but it's actually the other way around. Never mind.

// Ensure that both preheaders do not contain PHI nodes and have single
// predecessors. This allows us to move them easily. We use
// InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
Expand Down
79 changes: 79 additions & 0 deletions llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s

; This test is checking that blocks outer.body and outer.latch, where outer.body is the exit
; block of the inner loop and outer.latch the latch of the outer loop, correctly
; deal with the phi-node use-def chain %new.cond.lcssa -> %old.cond.lcssa. What we expect
; here is that block outer.latch does not contain a phi node, because it is a single input
; phi in a non-exit block.

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

define i16 @main(ptr %a) {
; CHECK-LABEL: define i16 @main(
; CHECK-SAME: ptr [[A:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[INNER_HEADER_PREHEADER:.*]]
; CHECK: [[OUTER_HEADER_PREHEADER:.*]]:
; CHECK-NEXT: br label %[[OUTER_HEADER:.*]]
; CHECK: [[OUTER_HEADER]]:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[OUTER_LATCH:.*]] ], [ 1, %[[OUTER_HEADER_PREHEADER]] ]
; CHECK-NEXT: br label %[[INNER_HEADER_SPLIT:.*]]
; CHECK: [[INNER_HEADER_PREHEADER]]:
; CHECK-NEXT: br label %[[INNER_HEADER:.*]]
; CHECK: [[INNER_HEADER]]:
; CHECK-NEXT: [[J:%.*]] = phi i16 [ [[TMP1:%.*]], %[[INNER_LATCH_SPLIT:.*]] ], [ 0, %[[INNER_HEADER_PREHEADER]] ]
; CHECK-NEXT: br label %[[OUTER_HEADER_PREHEADER]]
; CHECK: [[INNER_HEADER_SPLIT]]:
; CHECK-NEXT: [[ARRAYIDX_US_US:%.*]] = getelementptr i16, ptr [[A]], i16 [[J]]
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_US_US]], align 1
; CHECK-NEXT: [[COND:%.*]] = select i1 false, i16 0, i16 0
; CHECK-NEXT: br label %[[INNER_LATCH:.*]]
; CHECK: [[INNER_LATCH]]:
; CHECK-NEXT: [[J_NEXT:%.*]] = add i16 [[J]], 1
; CHECK-NEXT: br label %[[OUTER_BODY:.*]]
; CHECK: [[INNER_LATCH_SPLIT]]:
; CHECK-NEXT: [[NEW_COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[OUTER_LATCH]] ]
; CHECK-NEXT: [[TMP1]] = add i16 [[J]], 1
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[INNER_HEADER]]
; CHECK: [[OUTER_BODY]]:
; CHECK-NEXT: br label %[[OUTER_LATCH]]
; CHECK: [[OUTER_LATCH]]:
; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1
; CHECK-NEXT: [[CMP286_US:%.*]] = icmp ugt i64 [[I]], 0
; CHECK-NEXT: br i1 [[CMP286_US]], label %[[OUTER_HEADER]], label %[[INNER_LATCH_SPLIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[OLD_COND_LCSSA_LCSSA:%.*]] = phi i16 [ [[NEW_COND_LCSSA]], %[[INNER_LATCH_SPLIT]] ]
; CHECK-NEXT: ret i16 [[OLD_COND_LCSSA_LCSSA]]
;
entry:
br label %outer.header

outer.header:
%i = phi i64 [ 1, %entry ], [ %i.next, %outer.latch ]
br label %inner.header

inner.header:
%j = phi i16 [ 0, %outer.header ], [ %j.next, %inner.latch ]
%arrayidx.us.us = getelementptr i16, ptr %a, i16 %j
%0 = load i16, ptr %arrayidx.us.us, align 1
%cond = select i1 false, i16 0, i16 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want to define an arbitrary value, it might be better to use freeze i16 poison.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, interchange doesn't care, it's the reproducer from the bug report, and it's short.
But yeah, it's no effort to read from a function argument, so will do that and change this accordingly.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, interchange doesn't care,

Future DA might care, when checking AliasAnalysis on whether the base pointers themselves alias.

br label %inner.latch

inner.latch:
%j.next = add i16 %j, 1
br i1 true, label %outer.body, label %inner.header

outer.body:
%new.cond.lcssa = phi i16 [ %cond, %inner.latch ]
br label %outer.latch

outer.latch:
%old.cond.lcssa = phi i16 [ %new.cond.lcssa, %outer.body ]
%i.next = add i64 %i, 1
%cmp286.us = icmp ugt i64 %i, 0
br i1 %cmp286.us, label %outer.header, label %exit

exit:
ret i16 %old.cond.lcssa
}
Loading