Skip to content

Commit

Permalink
[IndVars] Eliminate op.with.overflow when possible (re-apply)
Browse files Browse the repository at this point in the history
Summary:
If we can prove that an op.with.overflow intrinsic does not overflow, we
can get rid of the intrinsic, and replace it with non-wrapping
arithmetic.

This was first checked in at r265913 but reverted in r265950 because it
exposed some issues around how SCEV handled post-inc add recurrences.
Those issues have now been fixed.

Reviewers: atrick, regehr

Subscribers: sanjoy, mcrosier, llvm-commits

Differential Revision: http://reviews.llvm.org/D18685

llvm-svn: 271153
  • Loading branch information
sanjoy committed May 29, 2016
1 parent f49ca52 commit ae09b3c
Show file tree
Hide file tree
Showing 2 changed files with 244 additions and 0 deletions.
107 changes: 107 additions & 0 deletions llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
Expand Up @@ -70,6 +70,7 @@ namespace {

bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);

bool eliminateOverflowIntrinsic(CallInst *CI);
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
Expand Down Expand Up @@ -317,6 +318,108 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
DeadInsts.emplace_back(Rem);
}

bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
auto *F = CI->getCalledFunction();
if (!F)
return false;

typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)(
const SCEV *, const SCEV *, SCEV::NoWrapFlags);
typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)(
const SCEV *, Type *);

OperationFunctionTy Operation;
ExtensionFunctionTy Extension;

Instruction::BinaryOps RawOp;

// We always have exactly one of nsw or nuw. If NoSignedOverflow is false, we
// have nuw.
bool NoSignedOverflow;

switch (F->getIntrinsicID()) {
default:
return false;

case Intrinsic::sadd_with_overflow:
Operation = &ScalarEvolution::getAddExpr;
Extension = &ScalarEvolution::getSignExtendExpr;
RawOp = Instruction::Add;
NoSignedOverflow = true;
break;

case Intrinsic::uadd_with_overflow:
Operation = &ScalarEvolution::getAddExpr;
Extension = &ScalarEvolution::getZeroExtendExpr;
RawOp = Instruction::Add;
NoSignedOverflow = false;
break;

case Intrinsic::ssub_with_overflow:
Operation = &ScalarEvolution::getMinusSCEV;
Extension = &ScalarEvolution::getSignExtendExpr;
RawOp = Instruction::Sub;
NoSignedOverflow = true;
break;

case Intrinsic::usub_with_overflow:
Operation = &ScalarEvolution::getMinusSCEV;
Extension = &ScalarEvolution::getZeroExtendExpr;
RawOp = Instruction::Sub;
NoSignedOverflow = false;
break;
}

const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0));
const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1));

auto *NarrowTy = cast<IntegerType>(LHS->getType());
auto *WideTy =
IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);

const SCEV *A =
(SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy);
const SCEV *B =
(SE->*Operation)((SE->*Extension)(LHS, WideTy),
(SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap);

if (A != B)
return false;

// Proved no overflow, nuke the overflow check and, if possible, the overflow
// intrinsic as well.

BinaryOperator *NewResult = BinaryOperator::Create(
RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI);

if (NoSignedOverflow)
NewResult->setHasNoSignedWrap(true);
else
NewResult->setHasNoUnsignedWrap(true);

SmallVector<ExtractValueInst *, 4> ToDelete;

for (auto *U : CI->users()) {
if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
if (EVI->getIndices()[0] == 1)
EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext()));
else {
assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
EVI->replaceAllUsesWith(NewResult);
}
ToDelete.push_back(EVI);
}
}

for (auto *EVI : ToDelete)
EVI->eraseFromParent();

if (CI->use_empty())
CI->eraseFromParent();

return true;
}

/// Eliminate an operation that consumes a simple IV and has no observable
/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
/// but UseInst may not be.
Expand All @@ -334,6 +437,10 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
}
}

if (auto *CI = dyn_cast<CallInst>(UseInst))
if (eliminateOverflowIntrinsic(CI))
return true;

if (eliminateIdentitySCEV(UseInst, IVOperand))
return true;

Expand Down
137 changes: 137 additions & 0 deletions llvm/test/Transforms/IndVarSimplify/overflow-intrinsics.ll
@@ -0,0 +1,137 @@
; RUN: opt -S -indvars < %s | FileCheck %s

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define void @f_sadd(i8* %a) {
; CHECK-LABEL: @f_sadd(
entry:
br label %for.body

for.cond.cleanup: ; preds = %cont
ret void

for.body: ; preds = %entry, %cont
%i.04 = phi i32 [ 0, %entry ], [ %2, %cont ]
%idxprom = sext i32 %i.04 to i64
%arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
store i8 0, i8* %arrayidx, align 1
%0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1)
%1 = extractvalue { i32, i1 } %0, 1
; CHECK: for.body:
; CHECK-NOT: @llvm.sadd.with.overflow
; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
br i1 %1, label %trap, label %cont, !nosanitize !{}

trap: ; preds = %for.body
tail call void @llvm.trap() #2, !nosanitize !{}
unreachable, !nosanitize !{}

cont: ; preds = %for.body
%2 = extractvalue { i32, i1 } %0, 0
%cmp = icmp slt i32 %2, 16
br i1 %cmp, label %for.body, label %for.cond.cleanup
}

define void @f_uadd(i8* %a) {
; CHECK-LABEL: @f_uadd(
entry:
br label %for.body

for.cond.cleanup: ; preds = %cont
ret void

for.body: ; preds = %entry, %cont
%i.04 = phi i32 [ 0, %entry ], [ %2, %cont ]
%idxprom = sext i32 %i.04 to i64
%arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
store i8 0, i8* %arrayidx, align 1
%0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.04, i32 1)
%1 = extractvalue { i32, i1 } %0, 1
; CHECK: for.body:
; CHECK-NOT: @llvm.uadd.with.overflow
; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
br i1 %1, label %trap, label %cont, !nosanitize !{}

trap: ; preds = %for.body
tail call void @llvm.trap(), !nosanitize !{}
unreachable, !nosanitize !{}

cont: ; preds = %for.body
%2 = extractvalue { i32, i1 } %0, 0
%cmp = icmp slt i32 %2, 16
br i1 %cmp, label %for.body, label %for.cond.cleanup
}

define void @f_ssub(i8* nocapture %a) {
; CHECK-LABEL: @f_ssub(
entry:
br label %for.body

for.cond.cleanup: ; preds = %cont
ret void

for.body: ; preds = %entry, %cont
%i.04 = phi i32 [ 15, %entry ], [ %2, %cont ]
%idxprom = sext i32 %i.04 to i64
%arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
store i8 0, i8* %arrayidx, align 1
%0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %i.04, i32 1)
%1 = extractvalue { i32, i1 } %0, 1
; CHECK: for.body:
; CHECK-NOT: @llvm.ssub.with.overflow.i32
; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
br i1 %1, label %trap, label %cont, !nosanitize !{}

trap: ; preds = %for.body
tail call void @llvm.trap(), !nosanitize !{}
unreachable, !nosanitize !{}

cont: ; preds = %for.body
%2 = extractvalue { i32, i1 } %0, 0
%cmp = icmp sgt i32 %2, -1
br i1 %cmp, label %for.body, label %for.cond.cleanup
}

define void @f_usub(i8* nocapture %a) {
; CHECK-LABEL: @f_usub(
entry:
br label %for.body

for.cond.cleanup: ; preds = %cont
ret void

for.body: ; preds = %entry, %cont
%i.04 = phi i32 [ 15, %entry ], [ %2, %cont ]
%idxprom = sext i32 %i.04 to i64
%arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
store i8 0, i8* %arrayidx, align 1
%0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.04, i32 1)
%1 = extractvalue { i32, i1 } %0, 1

; It is theoretically possible to prove this, but SCEV cannot
; represent non-unsigned-wrapping subtraction operations.

; CHECK: for.body:
; CHECK: [[COND:%[^ ]+]] = extractvalue { i32, i1 } %1, 1
; CHECK-NEXT: br i1 [[COND]], label %trap, label %cont, !nosanitize !0
br i1 %1, label %trap, label %cont, !nosanitize !{}

trap: ; preds = %for.body
tail call void @llvm.trap(), !nosanitize !{}
unreachable, !nosanitize !{}

cont: ; preds = %for.body
%2 = extractvalue { i32, i1 } %0, 0
%cmp = icmp sgt i32 %2, -1
br i1 %cmp, label %for.body, label %for.cond.cleanup
}

declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone

declare void @llvm.trap() #2

0 comments on commit ae09b3c

Please sign in to comment.