diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 28ae4f0a0aad9..b4003f8cd2163 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -44,6 +44,7 @@ #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include #include #include @@ -1872,6 +1873,50 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch); } +// This deals with a corner case when a LCSSA phi node appears in a non-exit +// block: the outer loop latch block does not need to be exit block of the +// inner loop. Consider a loop that was in LCSSA form, but then some +// transformation like loop-unswitch comes along and creates an empty block, +// where BB5 in this example is the outer loop latch block: +// +// BB4: +// br label %BB5 +// BB5: +// %old.cond.lcssa = phi i16 [ %cond, %BB4 ] +// br outer.header +// +// Interchange then brings it in LCSSA form again resulting in this chain of +// single-input phi nodes: +// +// BB4: +// %new.cond.lcssa = phi i16 [ %cond, %BB3 ] +// br label %BB5 +// BB5: +// %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %BB4 ] +// +// The problem is that interchange can reoder blocks BB4 and BB5 placing the +// use before the def if we don't check this. +// +static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop) { + BasicBlock *InnerLoopExit = InnerLoop->getExitBlock(); + BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); + + // Do not modify lcssa phis where they actually belong, i.e. in exit blocks. + if (OuterLoopLatch == InnerLoopExit) + return; + + // Collect and remove phis in non-exit blocks if they have 1 input. + SmallVector Phis( + llvm::make_pointer_range(OuterLoopLatch->phis())); + for (PHINode *Phi : Phis) { + assert(Phi->getNumIncomingValues() == 1 && "Single input phi expected"); + LLVM_DEBUG(dbgs() << "Removing 1-input phi in non-exit block: " << *Phi + << "\n"); + Phi->replaceAllUsesWith(Phi->getIncomingValue(0)); + Phi->eraseFromParent(); + } +} + bool LoopInterchangeTransform::adjustLoopBranches() { LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n"); std::vector DTUpdates; @@ -1882,6 +1927,9 @@ bool LoopInterchangeTransform::adjustLoopBranches() { assert(OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && "Guaranteed by loop-simplify form"); + + simplifyLCSSAPhis(OuterLoop, InnerLoop); + // Ensure that both preheaders do not contain PHI nodes and have single // predecessors. This allows us to move them easily. We use // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing diff --git a/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll b/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll new file mode 100644 index 0000000000000..d9323c088b235 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s + +; This test is checking that blocks outer.body and outer.latch, where outer.body is the exit +; block of the inner loop and outer.latch the latch of the outer loop, correctly +; deal with the phi-node use-def chain %new.cond.lcssa -> %old.cond.lcssa. What we expect +; here is that block outer.latch does not contain a phi node, because it is a single input +; phi in a non-exit block. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define i16 @main() { +; CHECK-LABEL: define i16 @main() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[INNER_HEADER_PREHEADER:.*]] +; CHECK: [[OUTER_HEADER_PREHEADER:.*]]: +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[OUTER_LATCH:.*]] ], [ 1, %[[OUTER_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label %[[INNER_HEADER_SPLIT:.*]] +; CHECK: [[INNER_HEADER_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER_HEADER:.*]] +; CHECK: [[INNER_HEADER]]: +; CHECK-NEXT: [[J:%.*]] = phi i16 [ [[TMP1:%.*]], %[[INNER_LATCH_SPLIT:.*]] ], [ 0, %[[INNER_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label %[[OUTER_HEADER_PREHEADER]] +; CHECK: [[INNER_HEADER_SPLIT]]: +; CHECK-NEXT: [[ARRAYIDX_US_US:%.*]] = getelementptr i16, ptr null, i16 [[J]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_US_US]], align 1 +; CHECK-NEXT: [[COND:%.*]] = select i1 false, i16 0, i16 0 +; CHECK-NEXT: br label %[[INNER_LATCH:.*]] +; CHECK: [[INNER_LATCH]]: +; CHECK-NEXT: [[J_NEXT:%.*]] = add i16 [[J]], 1 +; CHECK-NEXT: br label %[[OUTER_BODY:.*]] +; CHECK: [[INNER_LATCH_SPLIT]]: +; CHECK-NEXT: [[NEW_COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[OUTER_LATCH]] ] +; CHECK-NEXT: [[TMP1]] = add i16 [[J]], 1 +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[INNER_HEADER]] +; CHECK: [[OUTER_BODY]]: +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 +; CHECK-NEXT: [[CMP286_US:%.*]] = icmp ugt i64 [[I]], 0 +; CHECK-NEXT: br i1 [[CMP286_US]], label %[[OUTER_HEADER]], label %[[INNER_LATCH_SPLIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[OLD_COND_LCSSA_LCSSA:%.*]] = phi i16 [ [[NEW_COND_LCSSA]], %[[INNER_LATCH_SPLIT]] ] +; CHECK-NEXT: ret i16 [[OLD_COND_LCSSA_LCSSA]] +; +entry: + br label %outer.header + +outer.header: + %i = phi i64 [ 1, %entry ], [ %i.next, %outer.latch ] + br label %inner.header + +inner.header: + %j = phi i16 [ 0, %outer.header ], [ %j.next, %inner.latch ] + %arrayidx.us.us = getelementptr i16, ptr null, i16 %j + %0 = load i16, ptr %arrayidx.us.us, align 1 + %cond = select i1 false, i16 0, i16 0 + br label %inner.latch + +inner.latch: + %j.next = add i16 %j, 1 + br i1 true, label %outer.body, label %inner.header + +outer.body: + %new.cond.lcssa = phi i16 [ %cond, %inner.latch ] + br label %outer.latch + +outer.latch: + %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %outer.body ] + %i.next = add i64 %i, 1 + %cmp286.us = icmp ugt i64 %i, 0 + br i1 %cmp286.us, label %outer.header, label %exit + +exit: + ret i16 %old.cond.lcssa +}