Skip to content

Commit

Permalink
Merging r278413:
Browse files Browse the repository at this point in the history
------------------------------------------------------------------------
r278413 | gberry | 2016-08-11 14:05:17 -0700 (Thu, 11 Aug 2016) | 17 lines

[SCEV] Update interface to handle SCEVExpander insert point motion.

Summary:
This is an extension of the fix in r271424.  That fix dealt with builder
insert points being moved by SCEV expansion, but only for the lifetime
of the expand call.  This change modifies the interface so that LSR can
safely call expand multiple times at the same insert point and do the
right thing if one of the expansions decides to move the original insert
point.

This is a fix for PR28719.

Reviewers: sanjoy

Subscribers: llvm-commits, mcrosier, mzolotukhin

Differential Revision: https://reviews.llvm.org/D23342
------------------------------------------------------------------------

llvm-svn: 278424
  • Loading branch information
zmodem committed Aug 11, 2016
1 parent 3d88a18 commit 6ea7621
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 16 deletions.
22 changes: 16 additions & 6 deletions llvm/include/llvm/Analysis/ScalarEvolutionExpander.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,13 @@ namespace llvm {
/// block.
Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I);

/// \brief Insert code to directly compute the specified SCEV expression
/// into the program. The inserted code is inserted into the SCEVExpander's
/// current insertion point. If a type is specified, the result will be
/// expanded to have that type, with a cast if necessary.
Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr);


/// \brief Generates a code sequence that evaluates this predicate.
/// The inserted instructions will be at position \p Loc.
/// The result will be of type i1 and will have a value of 0 when the
Expand Down Expand Up @@ -253,6 +260,15 @@ namespace llvm {

void enableLSRMode() { LSRMode = true; }

/// \brief Set the current insertion point. This is useful if multiple calls
/// to expandCodeFor() are going to be made with the same insert point and
/// the insert point may be moved during one of the expansions (e.g. if the
/// insert point is not a block terminator).
void setInsertPoint(Instruction *IP) {
assert(IP);
Builder.SetInsertPoint(IP);
}

/// \brief Clear the current insertion point. This is useful if the
/// instruction that had been serving as the insertion point may have been
/// deleted.
Expand Down Expand Up @@ -313,12 +329,6 @@ namespace llvm {

Value *expand(const SCEV *S);

/// \brief Insert code to directly compute the specified SCEV expression
/// into the program. The inserted code is inserted into the SCEVExpander's
/// current insertion point. If a type is specified, the result will be
/// expanded to have that type, with a cast if necessary.
Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr);

/// \brief Determine the most "relevant" loop for the given SCEV.
const Loop *getRelevantLoop(const SCEV *);

Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Analysis/ScalarEvolutionExpander.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1610,8 +1610,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {

Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
Instruction *IP) {
assert(IP);
Builder.SetInsertPoint(IP);
setInsertPoint(IP);
return expandCodeFor(SH, Ty);
}

Expand Down
17 changes: 9 additions & 8 deletions llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4442,6 +4442,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
Rewriter.setInsertPoint(&*IP);

// Inform the Rewriter if we have a post-increment use, so that it can
// perform an advantageous expansion.
Expand Down Expand Up @@ -4473,7 +4474,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);

Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP)));
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
}

// Expand the ScaledReg portion.
Expand All @@ -4491,14 +4492,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Expand ScaleReg as if it was part of the base regs.
if (F.Scale == 1)
Ops.push_back(
SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)));
SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
else {
// An interesting way of "folding" with an icmp is to use a negated
// scale, which we'll implement by inserting it into the other operand
// of the icmp.
assert(F.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP);
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
}
} else {
// Otherwise just expand the scaled register and an explicit scale,
Expand All @@ -4508,11 +4509,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Unless the addressing mode will not be folded.
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP));
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
if (F.Scale != 1)
ScaledS =
SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
Expand All @@ -4524,7 +4525,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (F.BaseGV) {
// Flush the operand list to suppress SCEVExpander hoisting.
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
Expand All @@ -4534,7 +4535,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Flush the operand list to suppress SCEVExpander hoisting of both folded and
// unfolded offsets. LSR assumes they both live next to their uses.
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
Expand Down Expand Up @@ -4570,7 +4571,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
const SCEV *FullS = Ops.empty() ?
SE.getConstant(IntTy, 0) :
SE.getAddExpr(Ops);
Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP);
Value *FullV = Rewriter.expandCodeFor(FullS, Ty);

// We're done expanding now, so reset the rewriter.
Rewriter.clearPostInc();
Expand Down
47 changes: 47 additions & 0 deletions llvm/test/Transforms/LoopStrengthReduce/X86/pr28719.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; RUN: opt < %s -loop-reduce -S | FileCheck %s

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@a = global i32 0, align 4
@b = global i8 0, align 1
@c = global [4 x i8] zeroinitializer, align 1

; Just make sure we don't generate code with uses not dominated by defs.
; CHECK-LABEL: @main(
define i32 @main() {
entry:
%a0 = load i32, i32* @a, align 4
%cmpa = icmp slt i32 %a0, 4
br i1 %cmpa, label %preheader, label %for.end

preheader:
%b0 = load i8, i8* @b, align 1
%b0sext = sext i8 %b0 to i64
br label %for.body

for.body:
%iv = phi i64 [ 0, %preheader ], [ %iv.next, %lor.false ]
%mul = mul nsw i64 %b0sext, %iv
%multrunc = trunc i64 %mul to i32
%cmp = icmp eq i32 %multrunc, 0
br i1 %cmp, label %lor.false, label %if.then

lor.false:
%cgep = getelementptr inbounds [4 x i8], [4 x i8]* @c, i64 0, i64 %iv
%ci = load i8, i8* %cgep, align 1
%cisext = sext i8 %ci to i32
%ivtrunc = trunc i64 %iv to i32
%cmp2 = icmp eq i32 %cisext, %ivtrunc
%iv.next = add i64 %iv, 1
br i1 %cmp2, label %for.body, label %if.then

if.then:
tail call void @abort()
unreachable

for.end:
ret i32 0
}

declare void @abort()

0 comments on commit 6ea7621

Please sign in to comment.